In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.preprocessing import scale
import numpy as np
import pandas as pd

In [16]:
cancer = load_breast_cancer()
X_cancer = cancer.data
Y_cancer = cancer.target
X_cancer = scale(X_cancer)

In [17]:
abalone = pd.read_csv('abalone.csv')
X_abalone = abalone.iloc[:, :-1]
X_abalone = pd.get_dummies(X_abalone)
Y_abalone = abalone['Rings']
X_abalone = scale(X_abalone)

In [4]:
character_X_test = pd.read_csv('x_test', sep=' ', header=None)
character_X_train = pd.read_csv('x_train', sep=' ', header=None)
character_X_train.drop(character_X_train.columns[-1], axis=1, inplace=True)
character_X_test.drop(character_X_test.columns[-1], axis=1, inplace=True)

X_character = character_X_train.append(character_X_test)

character_Y_test = pd.read_csv('y_test', sep=' ', header=None)
character_Y_train = pd.read_csv('y_train', sep=' ', header=None)

Y_character = character_Y_train.append(character_Y_test)
Y_character = Y_character.idxmax(axis = 1)


In [5]:
def model(X, Y, test, iteration, classifier):
    accuracy = np.array([])
    for i in range(iteration):
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = test)
        classifier.fit(X_train, Y_train)
        Y_pred = classifier.predict(X_test)
        accuracy = np.append(accuracy, accuracy_score(Y_test, Y_pred)*100)
    
    accuracy = np.mean(accuracy)
    #matrix = confusion_matrix(Y_test, Y_pred)

    return accuracy


In [6]:
def validate_model(X, Y, test, classifier):

    accuracy = cross_val_score(classifier, X, Y, cv= int(1/test)).mean() *100
    
    #matrix = confusion_matrix(Y_test, Y_pred)

    return accuracy


In [7]:
classifier = {'NaiveBayes' : GaussianNB(), 'KNN': KNeighborsClassifier(3), 'DecisionTree' : DecisionTreeClassifier()}
dataset = {'Breast_Cancer' : [X_cancer, Y_cancer], 'Abalone' : [X_abalone, Y_abalone], 'Charcter' : [X_character , Y_character]}

df_holdout1 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_holdout2 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_random1 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_random2 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_validate1 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())
df_validate2 = pd.DataFrame(np.empty([3, 3]), columns=dataset.keys(), index=classifier.keys())

test = {0.25 : [df_holdout1, df_random1, df_validate1] , 0.33 : [df_holdout2, df_random2, df_validate2]}

for k_key,k_value in test.items():
    for i_key,i_value in classifier.items():
        for j_key, j_value in dataset.items():
            k_value[0].loc[i_key, j_key] = model(j_value[0], j_value[1], k_key, 1, i_value)
            k_value[1].loc[i_key, j_key] = model(j_value[0], j_value[1], k_key, 10, i_value)
            k_value[2].loc[i_key, j_key] = validate_model(j_value[0], j_value[1], k_key, i_value)

In [8]:
df_holdout1

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,93.706294,11.100478,83.259259
KNN,97.202797,18.086124,91.62963
DecisionTree,95.104895,19.330144,96.666667


In [9]:
df_holdout2

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,95.212766,11.312545,83.2211
KNN,96.276596,23.132705,91.638608
DecisionTree,93.085106,20.377085,96.91358


In [10]:
df_random1

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,93.636364,10.411483,82.888889
KNN,96.643357,20.574163,91.940741
DecisionTree,93.356643,19.244019,96.866667


In [11]:
df_random2

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,92.606383,9.963742,82.805836
KNN,96.542553,20.62364,91.487093
DecisionTree,92.978723,20.471356,96.548822


In [12]:
df_validate1

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,92.974983,10.390864,82.925926
KNN,96.485029,20.446021,91.592593
DecisionTree,91.040825,19.344397,96.888889


In [13]:
df_validate2

Unnamed: 0,Breast_Cancer,Abalone,Charcter
NaiveBayes,92.970389,10.294992,82.648148
KNN,95.95563,20.660895,91.111111
DecisionTree,89.974937,19.320432,96.796296
