In [277]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.preprocessing import scale
import numpy as np
import pandas as pd

In [278]:
cancer = load_breast_cancer()
X_cancer = cancer.data
Y_cancer = cancer.target
X_cancer = scale(X_cancer)

In [279]:
abalone = pd.read_csv('abalone.csv')
X_abalone = abalone.iloc[:, :-1]
X_abalone = pd.get_dummies(X_abalone)
Y_abalone = abalone['Rings'].T
X_abalone = scale(X_abalone)


In [280]:
character_X_test = pd.read_csv('x_test', sep=' ', header=None)
character_X_train = pd.read_csv('x_train', sep=' ', header=None)
character_X_train.drop(character_X_train.columns[-1], axis=1, inplace=True)
character_X_test.drop(character_X_test.columns[-1], axis=1, inplace=True)

X_character = character_X_train.append(character_X_test)

character_Y_test = pd.read_csv('y_test', sep=' ', header=None)
character_Y_train = pd.read_csv('y_train', sep=' ', header=None)

Y_character = character_Y_train.append(character_Y_test)
Y_character = Y_character.idxmax(axis = 1)


In [281]:
def model(X, Y, test, iteration, classifier):
    accuracy = np.array([])
    for i in range(iteration):
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = test)
        classifier.fit(X_train, Y_train)
        Y_pred = classifier.predict(X_test)
        accuracy = np.append(accuracy, accuracy_score(Y_test, Y_pred)*100)
    
    accuracy = np.mean(accuracy)
    #matrix = confusion_matrix(Y_test, Y_pred)

    return accuracy


In [282]:
def validate_model(X, Y, test, classifier):

    accuracy = cross_val_score(classifier, X, Y, cv= int(1/test)).mean() *100
    
    #matrix = confusion_matrix(Y_test, Y_pred)

    return accuracy


In [283]:
classifier = {'NaiveBayes' : GaussianNB(), 'KNN': KNeighborsClassifier(3), 'DecisionTree' : DecisionTreeClassifier()}
dataset = {'Breast_Cancer' : [X_cancer, Y_cancer], 'Abalone' : [X_abalone, Y_abalone], 'Charcter' : [X_character , Y_character]}

df_holdout1 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_holdout2 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_random1 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_random2 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_validate1 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_validate2 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())

test = {0.25 : [df_holdout1, df_random1, df_validate1] , 0.33 : [df_holdout2, df_random2, df_validate2]}

for k_key,k_value in test.items():
    for i_key,i_value in classifier.items():
        for j_key, j_value in dataset.items():
            k_value[0].loc[i_key, j_key] = model(j_value[0], j_value[1], k_key, 1, i_value)
            k_value[1].loc[i_key, j_key] = model(j_value[0], j_value[1], k_key, 10, i_value)
            k_value[2].loc[i_key, j_key] = validate_model(j_value[0], j_value[1], k_key, i_value)

In [284]:
df_holdout1

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,96.503497,8.421053,82.740741
KNN,95.804196,21.435407,91.111111
DecisionTree,90.909091,18.277512,96.592593


In [285]:
df_holdout2

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,93.617021,9.64467,82.828283
KNN,97.340426,21.537346,91.021324
DecisionTree,94.148936,19.796954,95.510662


In [286]:
df_random1

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,93.146853,10.114833,82.288889
KNN,96.573427,20.91866,91.118519
DecisionTree,92.937063,20.62201,97.133333


In [287]:
df_random2

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,93.297872,10.195794,82.867565
KNN,97.074468,20.500363,90.768799
DecisionTree,92.606383,19.95649,96.683502


In [288]:
df_validate1

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,92.974983,10.205745,82.925926
KNN,96.485029,20.528196,91.592593
DecisionTree,92.446814,18.99885,96.777778


In [289]:
df_validate2

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,92.970389,10.20762,82.648148
KNN,95.95563,20.711904,91.111111
DecisionTree,89.624988,20.21185,96.666667
