In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.preprocessing import scale
import numpy as np
import pandas as pd

In [2]:
cancer = load_breast_cancer()
X_cancer = cancer.data
Y_cancer = cancer.target
X_cancer = scale(X_cancer)

In [3]:
abalone = pd.read_csv('abalone.csv')
X_abalone = abalone.iloc[:, :-1]
X_abalone = pd.get_dummies(X_abalone)
Y_abalone = abalone['Rings']
X_abalone = scale(X_abalone)


In [4]:
character_X_test = pd.read_csv('x_test', sep=' ', header=None)
character_X_train = pd.read_csv('x_train', sep=' ', header=None)
character_X_train.drop(character_X_train.columns[-1], axis=1, inplace=True)
character_X_test.drop(character_X_test.columns[-1], axis=1, inplace=True)

X_character = character_X_train.append(character_X_test)

character_Y_test = pd.read_csv('y_test', sep=' ', header=None)
character_Y_train = pd.read_csv('y_train', sep=' ', header=None)

Y_character = character_Y_train.append(character_Y_test)
Y_character = Y_character.idxmax(axis = 1)


In [6]:
def model(X, Y, test, iteration, classifier):
    accuracy = np.array([])
    for i in range(iteration):
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = test)
        classifier.fit(X_train, Y_train)
        Y_pred = classifier.predict(X_test)
        accuracy = np.append(accuracy, accuracy_score(Y_test, Y_pred)*100)
    
    accuracy = np.mean(accuracy)
    #matrix = confusion_matrix(Y_test, Y_pred)

    return accuracy


In [5]:
def validate_model(X, Y, test, classifier):

    accuracy = cross_val_score(classifier, X, Y, cv= int(1/test)).mean() *100
    
    #matrix = confusion_matrix(Y_test, Y_pred)

    return accuracy


In [7]:
classifier = {'NaiveBayes' : GaussianNB(), 'KNN': KNeighborsClassifier(3), 'DecisionTree' : DecisionTreeClassifier()}
dataset = {'Breast_Cancer' : [X_cancer, Y_cancer], 'Abalone' : [X_abalone, Y_abalone], 'Charcter' : [X_character , Y_character]}

df_holdout1 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_holdout2 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_random1 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_random2 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_validate1 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_validate2 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())

test = {0.25 : [df_holdout1, df_random1, df_validate1] , 0.33 : [df_holdout2, df_random2, df_validate2]}

for k_key,k_value in test.items():
    for i_key,i_value in classifier.items():
        for j_key, j_value in dataset.items():
            k_value[0].loc[i_key, j_key] = model(j_value[0], j_value[1], k_key, 1, i_value)
            k_value[1].loc[i_key, j_key] = model(j_value[0], j_value[1], k_key, 10, i_value)
            k_value[2].loc[i_key, j_key] = validate_model(j_value[0], j_value[1], k_key, i_value)

In [8]:
df_holdout1

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,92.307692,8.516746,83.185185
KNN,98.601399,19.617225,91.555556
DecisionTree,95.104895,19.617225,96.740741


In [9]:
df_holdout2

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,91.489362,11.167513,83.950617
KNN,96.808511,20.884699,90.460157
DecisionTree,93.617021,19.579405,96.352413


In [10]:
df_random1

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,92.307692,10.373206,82.533333
KNN,96.643357,20.066986,91.866667
DecisionTree,91.818182,20.401914,96.992593


In [11]:
df_random2

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,93.510638,10.797679,82.648709
KNN,96.968085,20.630892,91.228956
DecisionTree,91.755319,20.203046,96.503928


In [12]:
df_validate1

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,92.974983,10.390864,82.925926
KNN,96.485029,20.446021,91.592593
DecisionTree,92.271989,19.607921,96.814815


In [13]:
df_validate2

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,92.970389,10.294992,82.648148
KNN,95.95563,20.660895,91.111111
DecisionTree,90.679476,19.488022,96.518519
