In [23]:
# import the required libraries
import os
import itertools
import numpy as np
import time
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import linear_model
from sklearn import tree
from sklearn.neural_network import MLPClassifier

# set the random state
np.random.seed(seed=300)

# load train and test data
train = np.loadtxt(open(os.path.join('input', 'optdigits.tra'), "rb"), delimiter=",")
test =np.loadtxt(open(os.path.join('input', 'optdigits.tes'), "rb"), delimiter=",")

# Define labels for confusion matrix figure
tick_label = ['0','1','2','3','4','5','6','7','8','9']

# Slicing features and labels from train and test data
X_train = train[:,:64]
y_train = train[:,64]
X_test = test[:,:64]
y_test = test[:,64]


# spliting train set into 90% train and 10% validation set
Xtr, Xval, ytr, yval = train_test_split(X_train, y_train, test_size=0.10)



# http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
# define a function to plot confusion matrix
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.figure()
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print(title)

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.savefig(title)



# compute accuracy per class
def acc_per_class(conf_train, conf_test, flag=False):
    if flag:
        print('Class    train accuracy     test accuracy')
    else:
        print('Class    Before Removal   After Removal')
    for i in range(10):
        train_acc = float(conf_train[i,i])/np.sum(conf_train[i,:])
        test_acc = float(conf_test[i,i])/np.sum(conf_test[i,:])
        print(' {}       {:.4f}           {:.4f}'.format(i, train_acc, test_acc))      


In [2]:

##################################################
#												 #
# tuning the hyper-parameter k of KNN Classifier #
#												 #
##################################################	 

# k values (1, 3, 5, 7, ... , 19)
k_array = np.arange(1,20,2)

# an empty list to store validation accuracies
val_scores_knn = []

# compute knn classifier accuracy for each k
print("\n"+50*"#")
print('Hyper-parameters tunning for knn...')
print(50*"#")
best_knn = None
best_acc_knn = -1
for k in k_array:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(Xtr, ytr)
    val_acc_knn = knn.score(Xval, yval) # accuracy for validation set
    val_scores_knn.append(val_acc_knn)
    if val_acc_knn > best_acc_knn:
    	best_knn = knn
    	best_acc_knn = val_acc_knn

# choose the optimal k
best_k = k_array[val_scores_knn.index(max(val_scores_knn))]


print ("Best n_neighbors: {}\n".format(best_k))

# Best model accuracy on validation set
print("Validation accuracy (KNN): {:.4f}".format(best_knn.score(Xval, yval)))



# compute knn train time
print('\nComputing knn training time...')
start = time.time()
knn = KNeighborsClassifier(n_neighbors=best_k)
knn.fit(X_train, y_train)
print("n_neighbors: {}, training took {:.4f} seconds.\n".format(best_k, time.time() - start))

y_pred_train_knn = knn.predict(X_train)
conf_knn_train = confusion_matrix(y_train, y_pred_train_knn)
plot_confusion_matrix(conf_knn_train, classes=tick_label, title="Confusion Matrix of KNN (Train)")



# compute knn test time
start = time.time()
print('\nComputing knn test time...')
y_pred_test_knn = knn.predict(X_test)
print("n_neighbors: {} test took {:.4f} seconds.\n".format(best_k, time.time() - start))
print("Test accuracy (KNN): {:.4f}\n".format(knn.score(X_test, y_test)))

# plot test set confusion matrix
conf_knn_test = confusion_matrix(y_test, y_pred_test_knn)
plot_confusion_matrix(conf_knn_test, classes=tick_label, title="Confusion Matrix of KNN (Test)")




##################################################
Hyper-parameters tunning for knn...
##################################################
Best n_neighbors: 1

Validation accuracy (KNN): 0.9896

Computing knn training time...
n_neighbors: 1, training took 0.0064 seconds.

Confusion Matrix of KNN (Train)
[[376   0   0   0   0   0   0   0   0   0]
 [  0 389   0   0   0   0   0   0   0   0]
 [  0   0 380   0   0   0   0   0   0   0]
 [  0   0   0 389   0   0   0   0   0   0]
 [  0   0   0   0 387   0   0   0   0   0]
 [  0   0   0   0   0 376   0   0   0   0]
 [  0   0   0   0   0   0 377   0   0   0]
 [  0   0   0   0   0   0   0 387   0   0]
 [  0   0   0   0   0   0   0   0 380   0]
 [  0   0   0   0   0   0   0   0   0 382]]

Computing knn test time...
n_neighbors: 1 test took 0.5024 seconds.

Test accuracy (KNN): 0.9800

Confusion Matrix of KNN (Test)
[[178   0   0   0   0   0   0   0   0   0]
 [  0 181   0   0   0   0   0   0   1   0]
 [  0   2 175   0   0   0   0   0   0   0]
 [  0

In [3]:

##################################################
#												 #
# 	Tuning the hyper-parameter: max depth of 	 #
#				Decision Tree					 #
#												 #
##################################################	 

# set an aray of max depth values
max_depth_array = np.arange(1,51)

# an empty list to store validation accuracies
val_scores_dtree = []

# compute dtree classifier accuracy for each max depth
print("\n\n"+50*"#")
print('Hyper-parameters tunning for Decision Tree...')
print(50*"#")
best_dtree = None
best_acc_dtree = -1
for d in max_depth_array:
    dtree = tree.DecisionTreeClassifier(max_depth=d)
    dtree.fit(Xtr, ytr)
    val_acc_dtree = dtree.score(Xval, yval) # accuracy for validation set
    val_scores_dtree.append(val_acc_dtree)
    if val_acc_dtree > best_acc_dtree:
    	best_dtree = dtree
    	best_acc_dtree = val_acc_dtree

# choose the best max depth
best_depth = max_depth_array[val_scores_dtree.index(max(val_scores_dtree))]


print ("Best max_depth: {}\n".format(best_depth))


# Classification accuracy on validation set
print("Validation accuracy (Decision Tree): {:.4f}\n".format(best_dtree.score(Xval, yval)))


# compute Deision tree train time
print('\nComputing Decision Tree training time...')
start = time.time()
dtree = tree.DecisionTreeClassifier(max_depth=best_depth)
dtree.fit(X_train, y_train)
print("max_depth: {}, training took {:.4f} seconds.\n".format(best_depth, time.time() - start))


# train set confusion matrix
y_pred_train_dtree = dtree.predict(X_train)
conf_dtree_train = confusion_matrix(y_train, y_pred_train_dtree)
plot_confusion_matrix(conf_dtree_train, classes=tick_label, title="Confusion Matrix of Decision Tree (Train)")


# compute Deision tree test time
start = time.time()
print('\nComputing Decision Tree test time...')
y_pred_test_dtree = dtree.predict(X_test)
print("max_depth: {}, test took {:.4f} seconds.\n".format(best_depth, time.time() - start))
print("Test accuracy (Decision Tree): {:.4f}\n".format(dtree.score(X_test, y_test)))

# plot test set confusion matrix
conf_dtree_test = confusion_matrix(y_test, y_pred_test_dtree)
plot_confusion_matrix(conf_dtree_test, classes=tick_label, title="Confusion Matrix of Decision Tree (Test)")




##################################################
Hyper-parameters tunning for Decision Tree...
##################################################
Best max_depth: 33

Validation accuracy (Decision Tree): 0.9217


Computing Decision Tree training time...
max_depth: 33, training took 0.0393 seconds.

Confusion Matrix of Decision Tree (Train)
[[376   0   0   0   0   0   0   0   0   0]
 [  0 389   0   0   0   0   0   0   0   0]
 [  0   0 380   0   0   0   0   0   0   0]
 [  0   0   0 389   0   0   0   0   0   0]
 [  0   0   0   0 387   0   0   0   0   0]
 [  0   0   0   0   0 376   0   0   0   0]
 [  0   0   0   0   0   0 377   0   0   0]
 [  0   0   0   0   0   0   0 387   0   0]
 [  0   0   0   0   0   0   0   0 380   0]
 [  0   0   0   0   0   0   0   0   0 382]]

Computing Decision Tree test time...
max_depth: 33, test took 0.0007 seconds.

Test accuracy (Decision Tree): 0.8559

Confusion Matrix of Decision Tree (Test)
[[173   0   1   0   2   2   0   0   0   0]
 [  0 161   0   7   1

In [4]:

##################################################
#												 #
# 			Tuning the hyper-parameter for 		 #
#				linear discrimination:  		 #
#				regularization penalty			 #
#												 #
##################################################	 

# set regularization penalty
regs = [0.0001, 0.001, 0.01, 0.1, 1, 10]

# an empty list to store validation accuracies
val_scores_sgd = []

# compute linear classifier accuracy for each regularization strength
print('\n'+50*'#')
print('Hyper-parameters tunning for linear classifier...')
print(50*'#')
best_sgd = None
best_acc_sgd = -1
for reg in regs:
    sgd = linear_model.SGDClassifier(alpha=reg)
    sgd.fit(Xtr, ytr)
    val_acc_sgd = sgd.score(Xval, yval) # accuracy for validation set
    val_scores_sgd.append(val_acc_sgd)
    if val_acc_sgd > best_acc_sgd:
    	best_sgd = sgd
    	best_acc_sgd = val_acc_sgd

# choose the best regularization penalty
best_reg = regs[val_scores_sgd.index(max(val_scores_sgd))]
print ("Best alpha: {}\n".format(best_reg))


# Validation accuracy
y_pred_val_sgd = best_sgd.predict(Xval)
print("Validation accuracy (Linear classifier): {:.4f}".format(best_sgd.score(Xval, yval)))


# compute linear classifier train time
print('\nComputing linear classifier training time...')
start = time.time()
sgd = linear_model.SGDClassifier(alpha=best_reg)
sgd.fit(X_train, y_train)
print("alpha: {}, training took {:.4f} seconds.\n".format(best_reg, time.time() - start))

# train set confusion matrix
y_pred_train_sgd = sgd.predict(X_train)
conf_sgd_train = confusion_matrix(y_train, y_pred_train_sgd)
plot_confusion_matrix(conf_sgd_train, classes=tick_label,title="Confusion Matrix of linear classifier (Train)")


# compute linear classifier test time
start = time.time()
print('\nComputing linear classifier test time...')
y_pred_test_sgd = sgd.predict(X_test)
print("alpha: {}, test took {:.4f} seconds.\n".format(best_reg, time.time() - start))
print("Test accuracy (linear classifier): {:.4f}\n".format(sgd.score(X_test, y_test)))

# plot test set confusion matrix
conf_sgd_test = confusion_matrix(y_test, y_pred_test_sgd)
plot_confusion_matrix(conf_sgd_test, classes=tick_label,title="Confusion Matrix of linear classifier (Test)")




##################################################
Hyper-parameters tunning for linear classifier...
##################################################
Best alpha: 1

Validation accuracy (Linear classifier): 0.9504

Computing linear classifier training time...
alpha: 1, training took 0.0263 seconds.

Confusion Matrix of linear classifier (Train)
[[374   0   0   0   1   0   1   0   0   0]
 [  0 364   5   0   0   0   2   2  12   4]
 [  0   1 369   2   0   0   1   1   5   1]
 [  0   0   3 371   0   5   0   1   3   6]
 [  1   1   0   0 369   0   3   0   5   8]
 [  0   0   3   1   0 364   0   0   0   8]
 [  1   2   0   0   1   0 373   0   0   0]
 [  0   0   1   1   2   0   0 382   1   0]
 [  1  12   3   1   5   2   4   0 351   1]
 [  1   7   0   2   7   4   0   5   5 351]]

Computing linear classifier test time...
alpha: 1, test took 0.0010 seconds.

Test accuracy (linear classifier): 0.9349

Confusion Matrix of linear classifier (Test)
[[175   0   0   0   1   2   0   0   0   0]
 [  0 161 

In [6]:

##################################################
#												 #
# 			Tuning the hyper-parameter:			 #
#				Multilayer perceptron			 #
#												 #
##################################################	 


# compute mlp classifier accuracy for each set of parameters
print('\n'+50*'#')
print('Hyper-parameters tunning for MLP...')
print(50*'#')

best_mlp = None
best_acc_mlp = -1
best_hls = []
best_reg = []
hls = [64, 128, 256, (64,64), (128,128), (256,256)]
regs = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10]
for hl in hls:
    for reg in regs:
    	mlp = MLPClassifier(solver='adam', alpha=reg, hidden_layer_sizes=hl)
    	mlp.fit(Xtr, ytr)
    	val_acc_mlp = mlp.score(Xval, yval) # accuracy for validation set
    	val_scores_sgd.append(val_acc_sgd)
    	if val_acc_mlp > best_acc_mlp:
    		best_mlp = mlp
    		best_acc_mlp = val_acc_mlp
    		best_hls = hl
    		best_reg = reg

# choose the best number of components
#best_n_components = n_components_array[val_scores_lda.index(max(val_scores_lda))]

# print best hyper-parameters
print ("Best hidden_layer_sizes: {}, Best alpha: {}\n".format(best_hls, best_reg))


# Validation accuracy
print("Validation accuracy (MLP): {:.4f}".format(best_mlp.score(Xval, yval)))


# compute MLP train time
print('\nComputing MLP training time...')
start = time.time()
mlp = MLPClassifier(solver='adam', alpha=best_reg, hidden_layer_sizes=best_hls)
mlp.fit(X_train, y_train)
print("hidden_layer_sizes: {}, alpha: {}, training took {:.4f} seconds.\n".format(best_hls, best_reg,\
														 time.time() - start))


# plot train set confusion matrix
y_pred_train_mlp = mlp.predict(X_train)
conf_mlp_train = confusion_matrix(y_train, y_pred_train_mlp)
plot_confusion_matrix(conf_mlp_train, classes=tick_label,title="Confusion Matrix of MLP (Train)")


# compute MLP test time
start = time.time()
print('\nComputing MLP test time...')
y_pred_test_mlp = mlp.predict(X_test)
print("hidden_layer_sizes: {}, alpha: {}, test took {:.4f} seconds.".format(best_hls, best_reg,\
														 time.time() - start))
# print test accuracy MLP
print("Test accuracy (MLP): {:.4f}\n".format(mlp.score(X_test, y_test)))

# plot test set confusion matrix
conf_mlp_test = confusion_matrix(y_test, y_pred_test_mlp)
plot_confusion_matrix(conf_mlp_test, classes=tick_label,title="Confusion Matrix of MLP (Test)")



##################################################
Hyper-parameters tunning for MLP...
##################################################
Best hidden_layer_sizes: 64, Best alpha: 0.1

Validation accuracy (MLP): 0.9948

Computing MLP training time...
hidden_layer_sizes: 64, alpha: 0.1, training took 2.1639 seconds.

Confusion Matrix of MLP (Train)
[[376   0   0   0   0   0   0   0   0   0]
 [  0 389   0   0   0   0   0   0   0   0]
 [  0   0 380   0   0   0   0   0   0   0]
 [  0   0   0 389   0   0   0   0   0   0]
 [  0   0   0   0 387   0   0   0   0   0]
 [  0   0   0   0   0 376   0   0   0   0]
 [  0   0   0   0   0   0 377   0   0   0]
 [  0   0   0   0   0   0   0 387   0   0]
 [  0   0   0   0   0   0   0   0 380   0]
 [  0   0   0   0   0   0   0   0   0 382]]

Computing MLP test time...
hidden_layer_sizes: 64, alpha: 0.1, test took 0.0030 seconds.
Test accuracy (MLP): 0.9599

Confusion Matrix of MLP (Test)
[[177   0   0   0   0   0   1   0   0   0]
 [  0 179   0   0   0   0 

In [15]:



# compute accuracy per class KNN 
print('\n\n'+40*'#')
print('Accuracy per class (KNN)')
acc_per_class(conf_knn_train, conf_knn_test, flag=True)


# compute accuracy per class Decision Tree
print('\n\n'+40*'#')
print('Accuracy per class (Decision Tree)')
acc_per_class(conf_dtree_train, conf_dtree_test, flag=True)

# compute accuracy per class Linear Classifier
print('\n\n'+40*'#')
print('Accuracy per class (Linear Classifier)')
acc_per_class(conf_sgd_train, conf_sgd_test, flag=True)

# compute accuracy per class MLP
print('\n\n'+40*'#')
print('Accuracy per class (MLP)')
acc_per_class(conf_mlp_train, conf_mlp_test, flag=True)





########################################
Accuracy per class (KNN)
Class    train accuracy     test accuracy
 0       1.0000           1.0000
 1       1.0000           0.9945
 2       1.0000           0.9887
 3       1.0000           0.9781
 4       1.0000           0.9834
 5       1.0000           0.9835
 6       1.0000           1.0000
 7       1.0000           0.9888
 8       1.0000           0.9425
 9       1.0000           0.9389


########################################
Accuracy per class (Decision Tree)
Class    train accuracy     test accuracy
 0       1.0000           0.9719
 1       1.0000           0.8846
 2       1.0000           0.8023
 3       1.0000           0.8306
 4       1.0000           0.7790
 5       1.0000           0.8791
 6       1.0000           0.9503
 7       1.0000           0.7542
 8       1.0000           0.8391
 9       1.0000           0.8667


########################################
Accuracy per class (Linear Classifier)
Class    train accuracy     

In [16]:

##################################################
#												 #
#				Removing noisy instances 		 #
#												 #
##################################################

# Find noisy instances index per model
idx_knn_noisy = [(y_train != y_pred_train_knn)]
idx_dtree_noisy = [(y_train != y_pred_train_dtree)]
idx_linear_noisy = [(y_train != y_pred_train_sgd)]
idx_mlp_noisy = [(y_train != y_pred_train_mlp)]

print('\n\n\n'+50*'#')
print('Number of noisy instances per model.')
print('knn: {}'.format(np.sum(idx_knn_noisy)))
print('decision tree: {}'.format(np.sum(idx_dtree_noisy)))
print('linear classifier: {}'.format(np.sum(idx_linear_noisy)))
print('mlp: {}'.format(np.sum(idx_mlp_noisy)))


# Identify index of all noisy instances
idx_noisy = [(y_train != y_pred_train_sgd) | (y_train != y_pred_train_dtree) |\
             (y_train != y_pred_train_knn) | (y_train != y_pred_train_mlp)]

print('\nTotal number of missclassified instaces: {}'.format(np.sum(idx_noisy)))

# Find the percentage of train data that are misclassified
noisy_percent = np.sum(idx_noisy)/float(y_train.shape[0])*100
print('{:.2f} percent of train data are missclasified.'.format(noisy_percent))

# Eliminate all noisy instances
idx_correct = (idx_noisy[0] == False)
X_train_new = X_train[idx_correct]
y_train_new = y_train[idx_correct]


# spliting new train set into 90% train and 10% validation set
Xtr, Xval, ytr, yval = train_test_split(X_train_new, y_train_new, test_size=0.10)






##################################################
Number of noisy instances per model.
knn: 0
decision tree: 0
linear classifier: 155
mlp: 0

Total number of missclassified instaces: 155
4.05 percent of train data are missclasified.


In [18]:

##################################################
#												 #
# tuning the hyper-parameter k of KNN Classifier #
#                After Removal                   # 
#												 #
##################################################	 

# k values (1, 3, 5, 7, ... , 19)
k_array = np.arange(1,20,2)

# an empty list to store validation accuracies
val_scores_knn = []

# compute knn classifier accuracy for each k
print("\n"+50*"#")
print('Hyper-parameters tunning for knn after removal...')
print(50*"#")
best_knn = None
best_acc_knn = -1
for k in k_array:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(Xtr, ytr)
    val_acc_knn = knn.score(Xval, yval) # accuracy for validation set
    val_scores_knn.append(val_acc_knn)
    if val_acc_knn > best_acc_knn:
    	best_knn = knn
    	best_acc_knn = val_acc_knn

# choose the optimal k
best_k = k_array[val_scores_knn.index(max(val_scores_knn))]


print ("Best n_neighbors after removal: {}\n".format(best_k))

# Best model accuracy on validation set
print("Validation accuracy after removal (KNN): {:.4f}".format(best_knn.score(Xval, yval)))



# compute knn train time
print('\nComputing knn training time after removal ...')
start = time.time()
knn = KNeighborsClassifier(n_neighbors=best_k)
knn.fit(X_train_new, y_train_new)
print("n_neighbors: {}, training took {:.4f} seconds.\n".format(best_k, time.time() - start))

y_pred_train_knn = knn.predict(X_train_new)
conf_knn_train_re = confusion_matrix(y_train_new, y_pred_train_knn)
plot_confusion_matrix(conf_knn_train_re, classes=tick_label, title="Confusion Matrix of KNN after removal (Train)")



# compute knn test time
start = time.time()
print('\nComputing knn test time after removal ...')
y_pred_test_knn = knn.predict(X_test)
print("n_neighbors: {} test took {:.4f} seconds.\n".format(best_k, time.time() - start))
print("Test accuracy after removal (KNN): {:.4f}\n".format(knn.score(X_test, y_test)))

# plot test set confusion matrix
conf_knn_test_re = confusion_matrix(y_test, y_pred_test_knn)
plot_confusion_matrix(conf_knn_test_re, classes=tick_label, title="Confusion Matrix of KNN after removal (Test)")




##################################################
Hyper-parameters tunning for knn after removal...
##################################################
Best n_neighbors after removal: 5

Validation accuracy after removal (KNN): 0.9973

Computing knn training time after removal ...
n_neighbors: 5, training took 0.0062 seconds.

Confusion Matrix of KNN after removal (Train)
[[374   0   0   0   0   0   0   0   0   0]
 [  0 364   0   0   0   0   0   0   0   0]
 [  0   0 369   0   0   0   0   0   0   0]
 [  0   0   0 370   0   0   0   0   0   1]
 [  0   0   0   0 368   0   0   1   0   0]
 [  0   0   0   1   0 363   0   0   0   0]
 [  0   0   0   0   0   0 373   0   0   0]
 [  0   2   0   0   0   0   0 380   0   0]
 [  0   5   0   0   0   0   0   0 346   0]
 [  0   0   0   1   0   0   0   0   0 350]]

Computing knn test time after removal ...
n_neighbors: 5 test took 0.5295 seconds.

Test accuracy after removal (KNN): 0.9750

Confusion Matrix of KNN after removal (Test)
[[178   0   0   0   

In [19]:

##################################################
#												 #
# 	Tuning the hyper-parameter: max depth of 	 #
#				Decision Tree After removal		 #
#												 #
##################################################	 

# set an aray of max depth values
max_depth_array = np.arange(1,51)

# an empty list to store validation accuracies
val_scores_dtree = []

# compute dtree classifier accuracy for each max depth
print("\n\n"+50*"#")
print('Hyper-parameters tunning for Decision Tree after removal...')
print(50*"#")
best_dtree = None
best_acc_dtree = -1
for d in max_depth_array:
    dtree = tree.DecisionTreeClassifier(max_depth=d)
    dtree.fit(Xtr, ytr)
    val_acc_dtree = dtree.score(Xval, yval) # accuracy for validation set
    val_scores_dtree.append(val_acc_dtree)
    if val_acc_dtree > best_acc_dtree:
    	best_dtree = dtree
    	best_acc_dtree = val_acc_dtree

# choose the best max depth
best_depth = max_depth_array[val_scores_dtree.index(max(val_scores_dtree))]


print ("Best max_depth after removal: {}\n".format(best_depth))


# Classification accuracy on validation set
print("Validation accuracy (Decision Tree) after removal: {:.4f}\n".format(best_dtree.score(Xval, yval)))


# compute Deision tree train time
print('\nComputing Decision Tree training time after removal ...')
start = time.time()
dtree = tree.DecisionTreeClassifier(max_depth=best_depth)
dtree.fit(X_train_new, y_train_new)
print("max_depth: {}, training took {:.4f} seconds.\n".format(best_depth, time.time() - start))


# train set confusion matrix
y_pred_train_dtree = dtree.predict(X_train_new)
conf_dtree_train_re = confusion_matrix(y_train_new, y_pred_train_dtree)
plot_confusion_matrix(conf_dtree_train_re, classes=tick_label, title="Confusion Matrix of Decision Tree after removal (Train)")


# compute Deision tree test time
start = time.time()
print('\nComputing Decision Tree test time after removal ...')
y_pred_test_dtree = dtree.predict(X_test)
print("max_depth: {}, test took {:.4f} seconds.\n".format(best_depth, time.time() - start))
print("Test accuracy (Decision Tree) after removal: {:.4f}\n".format(dtree.score(X_test, y_test)))

# plot test set confusion matrix
conf_dtree_test_re = confusion_matrix(y_test, y_pred_test_dtree)
plot_confusion_matrix(conf_dtree_test_re, classes=tick_label, title="Confusion Matrix of Decision Tree after removal (Test)")





##################################################
Hyper-parameters tunning for Decision Tree after removal...
##################################################
Best max_depth after removal: 25

Validation accuracy (Decision Tree) after removal: 0.9428


Computing Decision Tree training time after removal ...
max_depth: 25, training took 0.0363 seconds.

Confusion Matrix of Decision Tree after removal (Train)
[[374   0   0   0   0   0   0   0   0   0]
 [  0 364   0   0   0   0   0   0   0   0]
 [  0   0 369   0   0   0   0   0   0   0]
 [  0   0   0 371   0   0   0   0   0   0]
 [  0   0   0   0 369   0   0   0   0   0]
 [  0   0   0   0   0 364   0   0   0   0]
 [  0   0   0   0   0   0 373   0   0   0]
 [  0   0   0   0   0   0   0 382   0   0]
 [  0   0   0   0   0   0   0   0 351   0]
 [  0   0   0   0   0   0   0   0   0 351]]

Computing Decision Tree test time after removal ...
max_depth: 25, test took 0.0008 seconds.

Test accuracy (Decision Tree) after removal: 0.8637

Confu

In [20]:


##################################################
#												 #
# 			Tuning the hyper-parameter for 		 #
#	     linear discrimination after removal:  	 #
#				regularization penalty			 #
#												 #
##################################################	 

# set regularization penalty
regs = [0.0001, 0.001, 0.01, 0.1, 1, 10]

# an empty list to store validation accuracies
val_scores_sgd = []

# compute linear classifier accuracy for each regularization strength
print('\n'+50*'#')
print('Hyper-parameters tunning for linear classifier after removal...')
print(50*'#')
best_sgd = None
best_acc_sgd = -1
for reg in regs:
    sgd = linear_model.SGDClassifier(alpha=reg)
    sgd.fit(Xtr, ytr)
    val_acc_sgd = sgd.score(Xval, yval) # accuracy for validation set
    val_scores_sgd.append(val_acc_sgd)
    if val_acc_sgd > best_acc_sgd:
    	best_sgd = sgd
    	best_acc_sgd = val_acc_sgd

# choose the best regularization penalty
best_reg = regs[val_scores_sgd.index(max(val_scores_sgd))]
print ("Best alpha: {}\n".format(best_reg))


# Validation accuracy
y_pred_val_sgd = best_sgd.predict(Xval)
print("Validation accuracy (Linear classifier) after removal: {:.4f}".format(best_sgd.score(Xval, yval)))


# compute linear classifier train time
print('\nComputing linear classifier training time after removal...')
start = time.time()
sgd = linear_model.SGDClassifier(alpha=best_reg)
sgd.fit(X_train_new, y_train_new)
print("alpha: {}, training took {:.4f} seconds.\n".format(best_reg, time.time() - start))

# train set confusion matrix
y_pred_train_sgd = sgd.predict(X_train_new)
conf_sgd_train_re = confusion_matrix(y_train_new, y_pred_train_sgd)
plot_confusion_matrix(conf_sgd_train_re, classes=tick_label,title="Confusion Matrix of linear classifier after removal (Train)")


# compute linear classifier test time
start = time.time()
print('\nComputing linear classifier test time after removal...')
y_pred_test_sgd = sgd.predict(X_test)
print("alpha: {}, test took {:.4f} seconds.\n".format(best_reg, time.time() - start))
print("Test accuracy after removal (linear classifier): {:.4f}\n".format(sgd.score(X_test, y_test)))

# plot test set confusion matrix
conf_sgd_test_re = confusion_matrix(y_test, y_pred_test_sgd)
plot_confusion_matrix(conf_sgd_test_re, classes=tick_label,title="Confusion Matrix of linear classifier after removal (Test)")




##################################################
Hyper-parameters tunning for linear classifier after removal...
##################################################
Best alpha: 0.0001

Validation accuracy (Linear classifier) after removal: 0.9837

Computing linear classifier training time after removal...
alpha: 0.0001, training took 0.0246 seconds.

Confusion Matrix of linear classifier after removal (Train)
[[370   0   1   0   1   0   0   0   0   2]
 [  0 364   0   0   0   0   0   0   0   0]
 [  0   1 363   0   0   0   0   0   1   4]
 [  0   5   0 355   0   0   0   0   0  11]
 [  0   4   0   0 357   0   0   0   0   8]
 [  0   3   1   0   0 339   6   0   0  15]
 [  0   0   0   0   0   0 373   0   0   0]
 [  0   1   0   0   0   0   0 376   0   5]
 [  0  17   0   0   0   0   5   0 327   2]
 [  0   0   0   0   0   0   0   0   0 351]]

Computing linear classifier test time after removal...
alpha: 0.0001, test took 0.0010 seconds.

Test accuracy after removal (linear classifier): 0.9215


In [21]:

##################################################
#												 #
# 	Tuning the hyper-parameter after removal:	 #
#				Multilayer perceptron			 #
#												 #
##################################################	 


# compute mlp classifier accuracy for each set of parameters
print('\n'+50*'#')
print('Hyper-parameters tunning for MLP after removal...')
print(50*'#')
best_mlp = None
best_acc_mlp = -1
best_hls = []
best_reg = []
hls = [64, 128, 256, (64,64), (128,128), (256,256)]
regs = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10]
for hl in hls:
    for reg in regs:
    	mlp = MLPClassifier(solver='adam', alpha=reg, hidden_layer_sizes=hl)
    	mlp.fit(Xtr, ytr)
    	val_acc_mlp = mlp.score(Xval, yval) # accuracy for validation set
    	val_scores_sgd.append(val_acc_sgd)
    	if val_acc_mlp > best_acc_mlp:
    		best_mlp = mlp
    		best_acc_mlp = val_acc_mlp
    		best_hls = hl
    		best_reg = reg

# choose the best number of components
#best_n_components = n_components_array[val_scores_lda.index(max(val_scores_lda))]

# print best hyper-parameters
print ("Best hidden_layer_sizes: {}, Best alpha: {}\n".format(best_hls, best_reg))


# Validation accuracy
print("Validation accuracy after removal (MLP): {:.4f}".format(best_mlp.score(Xval, yval)))


# compute MLP train time
print('\nComputing MLP training time after removal ...')
start = time.time()
mlp = MLPClassifier(solver='adam', alpha=best_reg, hidden_layer_sizes=best_hls)
mlp.fit(X_train_new, y_train_new)
print("hidden_layer_sizes: {}, alpha: {}, training took {:.4f} seconds.\n".format(best_hls, best_reg,\
														 time.time() - start))


# plot train set confusion matrix
y_pred_train_mlp = mlp.predict(X_train_new)
conf_mlp_train_re = confusion_matrix(y_train_new, y_pred_train_mlp)
plot_confusion_matrix(conf_mlp_train_re, classes=tick_label,title="Confusion Matrix of MLP after removal (Train)")


# compute MLP test time
start = time.time()
print('\nComputing MLP test time after removal ...')
y_pred_test_mlp = mlp.predict(X_test)
print("hidden_layer_sizes: {}, alpha: {}, test took {:.4f} seconds.".format(best_hls, best_reg,\
														 time.time() - start))
# print test accuracy MLP
print("Test accuracy (MLP): {:.4f}\n".format(mlp.score(X_test, y_test)))

# plot test set confusion matrix
conf_mlp_test_re = confusion_matrix(y_test, y_pred_test_mlp)
plot_confusion_matrix(conf_mlp_test_re, classes=tick_label,title="Confusion Matrix of MLP after removal (Test)")




##################################################
Hyper-parameters tunning for MLP after removal...
##################################################
Best hidden_layer_sizes: 64, Best alpha: 1

Validation accuracy after removal (MLP): 1.0000

Computing MLP training time after removal ...
hidden_layer_sizes: 64, alpha: 1, training took 4.4081 seconds.

Confusion Matrix of MLP after removal (Train)
[[374   0   0   0   0   0   0   0   0   0]
 [  0 364   0   0   0   0   0   0   0   0]
 [  0   0 369   0   0   0   0   0   0   0]
 [  0   0   0 371   0   0   0   0   0   0]
 [  0   0   0   0 369   0   0   0   0   0]
 [  0   0   0   0   0 364   0   0   0   0]
 [  0   0   0   0   0   0 373   0   0   0]
 [  0   0   0   0   0   0   0 382   0   0]
 [  0   0   0   0   0   0   0   0 351   0]
 [  0   0   0   0   0   0   0   0   0 351]]

Computing MLP test time after removal ...
hidden_layer_sizes: 64, alpha: 1, test took 0.0035 seconds.
Test accuracy (MLP): 0.9527

Confusion Matrix of MLP after remo

In [26]:
# compute accuracy per class before and after removal for test set KNN 
print('\n'+40*'#')
print('Accuracy per class (KNN)')
acc_per_class(conf_knn_test, conf_knn_test_re)


# compute accuracy per class before and after removal for test set  Decision Tree
print('\n'+40*'#')
print('Accuracy per class (Decision Tree)')
acc_per_class(conf_dtree_test, conf_dtree_test_re)

# compute accuracy per class before and after removal for test set  Linear Classifier
print('\n'+40*'#')
print('Accuracy per class (Linear Classifier)')
acc_per_class(conf_sgd_test, conf_sgd_test_re)

# compute accuracy per class before and after removal for test set  MLP
print('\n'+40*'#')
print('Accuracy per class (MLP)')
acc_per_class(conf_mlp_test, conf_mlp_test_re)




########################################
Accuracy per class (KNN)
Class    Before Removal   After Removal
 0       1.0000           1.0000
 1       0.9835           0.9835
 2       0.9831           0.9831
 3       0.9727           0.9727
 4       0.9779           0.9779
 5       0.9890           0.9890
 6       0.9945           0.9945
 7       0.9665           0.9665
 8       0.9253           0.9253
 9       0.9556           0.9556

########################################
Accuracy per class (Decision Tree)
Class    Before Removal   After Removal
 0       0.9719           0.9551
 1       0.8846           0.9286
 2       0.8023           0.7740
 3       0.8306           0.8033
 4       0.7790           0.8508
 5       0.8791           0.9121
 6       0.9503           0.9558
 7       0.7542           0.8045
 8       0.8391           0.8391
 9       0.8667           0.8111

########################################
Accuracy per class (Linear Classifier)
Class    Before Removal   After Rem