In [290]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.preprocessing import scale
import numpy as np
import pandas as pd

In [291]:
cancer = load_breast_cancer()
X_cancer = cancer.data
Y_cancer = cancer.target
X_cancer = scale(X_cancer)

In [292]:
abalone = pd.read_csv('abalone.csv')
X_abalone = abalone.iloc[:, :-1]
X_abalone = pd.get_dummies(X_abalone)
Y_abalone = abalone['Rings']
X_abalone = scale(X_abalone)


In [293]:
character_X_test = pd.read_csv('x_test', sep=' ', header=None)
character_X_train = pd.read_csv('x_train', sep=' ', header=None)
character_X_train.drop(character_X_train.columns[-1], axis=1, inplace=True)
character_X_test.drop(character_X_test.columns[-1], axis=1, inplace=True)

X_character = character_X_train.append(character_X_test)

character_Y_test = pd.read_csv('y_test', sep=' ', header=None)
character_Y_train = pd.read_csv('y_train', sep=' ', header=None)

Y_character = character_Y_train.append(character_Y_test)
Y_character = Y_character.idxmax(axis = 1)


In [294]:
def model(X, Y, test, iteration, classifier):
    accuracy = np.array([])
    for i in range(iteration):
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = test)
        classifier.fit(X_train, Y_train)
        Y_pred = classifier.predict(X_test)
        accuracy = np.append(accuracy, accuracy_score(Y_test, Y_pred)*100)
    
    accuracy = np.mean(accuracy)
    #matrix = confusion_matrix(Y_test, Y_pred)

    return accuracy


In [295]:
def validate_model(X, Y, test, classifier):

    accuracy = cross_val_score(classifier, X, Y, cv= int(1/test)).mean() *100
    
    #matrix = confusion_matrix(Y_test, Y_pred)

    return accuracy


In [296]:
classifier = {'NaiveBayes' : GaussianNB(), 'KNN': KNeighborsClassifier(3), 'DecisionTree' : DecisionTreeClassifier()}
dataset = {'Breast_Cancer' : [X_cancer, Y_cancer], 'Abalone' : [X_abalone, Y_abalone], 'Charcter' : [X_character , Y_character]}

df_holdout1 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_holdout2 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_random1 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_random2 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_validate1 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_validate2 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())

test = {0.25 : [df_holdout1, df_random1, df_validate1] , 0.33 : [df_holdout2, df_random2, df_validate2]}

for k_key,k_value in test.items():
    for i_key,i_value in classifier.items():
        for j_key, j_value in dataset.items():
            k_value[0].loc[i_key, j_key] = model(j_value[0], j_value[1], k_key, 1, i_value)
            k_value[1].loc[i_key, j_key] = model(j_value[0], j_value[1], k_key, 10, i_value)
            k_value[2].loc[i_key, j_key] = validate_model(j_value[0], j_value[1], k_key, i_value)

In [297]:
df_holdout1

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,95.804196,10.62201,83.037037
KNN,98.601399,20.76555,92.296296
DecisionTree,93.706294,20.861244,96.888889


In [298]:
df_holdout2

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,89.893617,9.862219,82.267116
KNN,95.744681,22.84264,91.189675
DecisionTree,92.021277,21.174764,97.362514


In [299]:
df_random1

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,93.006993,10.411483,82.222222
KNN,96.713287,20.086124,91.392593
DecisionTree,91.678322,19.454545,96.933333


In [300]:
df_random2

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,94.361702,10.478608,82.227834
KNN,96.43617,20.065265,90.965208
DecisionTree,93.031915,20.174039,96.694725


In [301]:
df_validate1

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,92.974983,10.205745,82.925926
KNN,96.485029,20.528196,91.592593
DecisionTree,91.394169,19.114804,96.722222


In [302]:
df_validate2

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,92.970389,10.20762,82.648148
KNN,95.95563,20.711904,91.111111
DecisionTree,89.097744,19.923837,96.574074
