In [74]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.preprocessing import scale
import numpy as np
import pandas as pd

In [75]:
cancer = load_breast_cancer()
X_cancer = cancer.data
Y_cancer = cancer.target
X_cancer = scale(X_cancer)

In [76]:
abalone = pd.read_csv('abalone.csv')
X_abalone = abalone.iloc[:, :-1]
X_abalone = pd.get_dummies(X_abalone)
Y_abalone = abalone['Rings'].T
X_abalone = scale(X_abalone)


In [92]:
character_X_test = pd.read_csv('x_test', sep=' ', header=None)
character_X_train = pd.read_csv('x_train', sep=' ', header=None)
character_X_train.drop(character_X_train.columns[-1], axis=1, inplace=True)
character_X_test.drop(character_X_test.columns[-1], axis=1, inplace=True)

X_character = character_X_train.append(character_X_test)

character_Y_test = pd.read_csv('y_test', sep=' ', header=None)
character_Y_train = pd.read_csv('y_train', sep=' ', header=None)

Y_character = character_Y_train.append(character_Y_test)

Y_character

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1,0,0,0,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0
3,0,0,0,0,1,0,0,0,0,0
4,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
4495,0,0,0,0,0,1,0,0,0,0
4496,0,0,0,0,0,0,1,0,0,0
4497,0,0,0,0,0,0,0,0,1,0
4498,0,0,0,0,0,0,0,1,0,0


In [77]:
def model(X, Y, test, iteration, classifier):
    accuracy = np.array([])
    for i in range(iteration):
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = test)
        classifier.fit(X_train, Y_train)
        Y_pred = classifier.predict(X_test)
        accuracy = np.append(accuracy, accuracy_score(Y_test, Y_pred)*100)
    
    accuracy = np.mean(accuracy)
    #matrix = confusion_matrix(Y_test, Y_pred)

    return accuracy


In [78]:
def validate_model(X, Y, test, classifier):

    score = cross_val_score(classifier, X, Y, cv= int(1/test))
    accuracy = np.mean(score) * 100
    
    #matrix = confusion_matrix(Y_test, Y_pred)

    return accuracy


In [79]:
classifier = {'NaiveBayes' : GaussianNB(), 'KNN': KNeighborsClassifier(3), 'DecisionTree' : DecisionTreeClassifier()}
dataset = {'Breast_Cancer' : [X_cancer, Y_cancer], 'Abalone' : [X_abalone, Y_abalone]}

df_holdout1 = pd.DataFrame(np.empty([3, 2]), columns=dataset.keys(), index=classifier.keys())
df_holdout2 = pd.DataFrame(np.empty([3, 2]), columns=dataset.keys(), index=classifier.keys())
df_random1 = pd.DataFrame(np.empty([3, 2]), columns=dataset.keys(), index=classifier.keys())
df_random2 = pd.DataFrame(np.empty([3, 2]), columns=dataset.keys(), index=classifier.keys())
df_validate1 = pd.DataFrame(np.empty([3, 2]), columns=dataset.keys(), index=classifier.keys())
df_validate2 = pd.DataFrame(np.empty([3, 2]), columns=dataset.keys(), index=classifier.keys())

test = {0.25 : [df_holdout1, df_random1, df_validate1] , 0.33 : [df_holdout2, df_random2, df_validate2]}

for k_key,k_value in test.items():
    for i_key,i_value in classifier.items():
        for j_key, j_value in dataset.items():
            k_value[0].loc[i_key, j_key] = model(j_value[0], j_value[1], k_key, 1, i_value)
            k_value[1].loc[i_key, j_key] = model(j_value[0], j_value[1], k_key, 10, i_value)
            k_value[2].loc[i_key, j_key] = validate_model(j_value[0], j_value[1], k_key, i_value)

In [80]:
df_holdout1

Unnamed: 0,Breast_Cancer,Abalone
NaiveBayes,97.902098,10.334928
KNN,99.300699,21.052632
DecisionTree,94.405594,22.200957


In [81]:
df_holdout2

Unnamed: 0,Breast_Cancer,Abalone
NaiveBayes,91.489362,11.457578
KNN,96.276596,19.361856
DecisionTree,92.021277,19.361856


In [82]:
df_random1

Unnamed: 0,Breast_Cancer,Abalone
NaiveBayes,93.286713,9.980861
KNN,96.013986,20.373206
DecisionTree,91.678322,19.69378


In [83]:
df_random2

Unnamed: 0,Breast_Cancer,Abalone
NaiveBayes,92.978723,10.812183
KNN,95.638298,20.493111
DecisionTree,93.031915,19.448876


In [84]:
df_validate1

Unnamed: 0,Breast_Cancer,Abalone
NaiveBayes,92.974983,10.205745
KNN,96.485029,20.528196
DecisionTree,91.040825,18.858146


In [85]:
df_validate2

Unnamed: 0,Breast_Cancer,Abalone
NaiveBayes,92.970389,10.20762
KNN,95.95563,20.711904
DecisionTree,89.450478,19.419983
