In [1]:
from sklearn.ensemble import RandomForestClassifier, VotingClassifier

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# IRIS Dataset

In [2]:
dataset = r".\IRIS.csv"

In [3]:
df = pd.read_csv(dataset)
df.sample(5)

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
5,6,5.4,3.9,1.7,0.4,Iris-setosa
56,57,6.3,3.3,4.7,1.6,Iris-versicolor
123,124,6.3,2.7,4.9,1.8,Iris-virginica
127,128,6.1,3.0,4.9,1.8,Iris-virginica
16,17,5.4,3.9,1.3,0.4,Iris-setosa


In [4]:
X = df.drop(columns=['Species'])
y = df[['Species']]

In [None]:
splits = range(2,8)
random_states = [0, 42]
result = []
models = []
for split_size in splits:
    split_size = split_size / 10
    for random_state in random_states:
        X_train, X_test, y_train, y_test = train_test_split(X, le_y, test_size=split_size, random_state=random_state , shuffle=True)
        model = RandomForestClassifier()
        model.fit(X_train, y_train)
        models.append(model)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        result.append(["RandomForest","iris", split_size, random_state, accuracy])

result_df = pd.DataFrame(result, columns=["model","dataset","test_size","random_state","accuracy_score"])
result_df

Unnamed: 0,model,dataset,test_size,random_state,accuracy_score
0,RandomForest,iris,0.2,0,1.0
1,RandomForest,iris,0.2,42,1.0
2,RandomForest,iris,0.3,0,1.0
3,RandomForest,iris,0.3,42,1.0
4,RandomForest,iris,0.4,0,1.0
5,RandomForest,iris,0.4,42,1.0
6,RandomForest,iris,0.5,0,0.986667
7,RandomForest,iris,0.5,42,1.0
8,RandomForest,iris,0.6,0,1.0
9,RandomForest,iris,0.6,42,1.0


# Voting Classifier

In [None]:
splits = range(2,8)
random_states = [0, 42]
models = []
for split_size in splits:
    split_size = split_size / 10
    for random_state in random_states:
        X_train, X_test, y_train, y_test = train_test_split(X, le_y, test_size=split_size, random_state=random_state , shuffle=True)
        
        
        model1 = SVC()
        model2 = LogisticRegression()
        model3 = GaussianNB()

        model = VotingClassifier(
            estimators=[('svc', model1), ('lr', model2), ('gnb', model3)],
            voting = 'hard'
        )
        
        model.fit(X_train, y_train)
        models.append(model)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        result.append(["VotingClassifier","iris", split_size, random_state, accuracy])

res_df = pd.DataFrame(result, columns=["model","dataset","test_size","random_state","accuracy_score"])
res_df

# Wine dataset

In [None]:
dataset = r".\Wine.csv"

In [None]:
df = pd.read_csv(dataset)
df.sample(5)

Unnamed: 0,Alcohol,Malic_Acid,Ash,Ash_Alcanity,Magnesium,Total_Phenols,Flavanoids,Nonflavanoid_Phenols,Proanthocyanins,Color_Intensity,Hue,OD280,Proline,Customer_Segment
102,12.34,2.45,2.46,21.0,98,2.56,2.11,0.34,1.31,2.8,0.8,3.38,438,2
42,13.88,1.89,2.59,15.0,101,3.25,3.56,0.17,1.7,5.43,0.88,3.56,1095,1
132,12.81,2.31,2.4,24.0,98,1.15,1.09,0.27,0.83,5.7,0.66,1.36,560,3
36,13.28,1.64,2.84,15.5,110,2.6,2.68,0.34,1.36,4.6,1.09,2.78,880,1
169,13.4,4.6,2.86,25.0,112,1.98,0.96,0.27,1.11,8.5,0.67,1.92,630,3


In [None]:
X = df.drop(columns=['Customer_Segment'])
y = df[['Customer_Segment']]

In [None]:
splits = range(2,8)
random_states = [0, 42]
models = []
for split_size in splits:
    split_size = split_size / 10
    for random_state in random_states:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split_size, random_state=random_state , shuffle=True)
        
        
        model1 = SVC()
        model2 = LogisticRegression()
        model3 = GaussianNB()

        model = VotingClassifier(
            estimators=[('svc', model1), ('lr', model2), ('gnb', model3)],
            voting = 'hard'
        )
        
        model.fit(X_train, y_train)
        models.append(model)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        result.append(["VotingClassifier","wine", split_size, random_state, accuracy])

res_df = pd.DataFrame(result, columns=["model","dataset","test_size","random_state","accuracy_score"])
res_df

In [None]:
res_df.to_excel("./output.xlsx")