# Imports

In [1]:
%matplotlib inline

import util
import numpy as np
import zipfile
import os
import doctest
import matplotlib.pyplot as plt
import copy

from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

# Constants

In [2]:
DATUM_WIDTH = 0 # in pixels
DATUM_HEIGHT = 0 # in pixels

# Module Classes

In [3]:
# A datum is a pixel-level encoding of digits or face/non-face edge maps.

# Digits are from the MNIST dataset and face images are from the 
# easy-faces and background categories of the Caltech 101 dataset.


# Each digit is 28x28 pixels, and each face/non-face image is 60x74 
# pixels, each pixel can take the following values:
# 0: no edge (blank)
# 1: gray pixel (+) [used for digits only]
# 2: edge [for face] or black pixel [for digit] (#)

# Pixel data is stored in the 2-dimensional array pixels, which
# maps to pixels on a plane according to standard euclidean axes
# with the first dimension denoting the horizontal and the second
# the vertical coordinate:

# 28 # # # #      #  #
# 27 # # # #      #  #
#  .
#  .
#  .
#  3 # # + #      #  #
#  2 # # # #      #  #
#  1 # # # #      #  #
#  0 # # # #      #  #
#    0 1 2 3 ... 27 28

# For example, the + in the above diagram is stored in pixels[2][3], or
# more generally pixels[column][row].

# The contents of the representation can be accessed directly
# via the getPixel and getPixels methods.

In [4]:
class Datum:
    # Create a new datum from file input (standard MNIST encoding).
    def __init__(self, data, width, height):
        DATUM_HEIGHT = height
        DATUM_WIDTH= width
        self.height = DATUM_HEIGHT
        self.width = DATUM_WIDTH
        if data == None:
              data = [[' ' for i in range(DATUM_WIDTH)] for j in range(DATUM_HEIGHT)] 
        self.pixels = util.arrayInvert(convertToInteger(data)) 

    # Returns the value of the pixel at column, row as 0, or 1.
    def getPixel(self, column, row):
        return self.pixels[column][row]

    # Returns all pixels as a list of lists.
    def getPixels(self):
        return self.pixels
    
    def setPixels(self, pixels):
        self.pixels = pixels

    # Renders the data item as an ascii image.
    def getAsciiString(self):
        rows = []
        data = util.arrayInvert(self.pixels)
        for row in data:
            ascii = map(asciiGrayscaleConversionFunction, row)
            rows.append( "".join(ascii))
        return "\n".join(rows)

    def __str__(self):
        return self.getAsciiString()

# Data processing, cleanup and display functions

In [5]:
#   Reads n data images from a file and returns a list of Datum objects.
#   (Return less then n items if the end of file is encountered).
def loadDataFile(filename, n,width,height): 
    DATUM_WIDTH =width
    DATUM_HEIGHT=height
    fin = readlines(filename)
    fin.reverse()
    items = []
    for i in range(n):
        data = []
        for j in range(height):
            data.append(list(fin.pop()))
        if len(data[0]) < DATUM_WIDTH-1:
            # we encountered end of file...
            print ("Truncating at %d examples (maximum)" % i)
            break
        items.append(Datum(data, DATUM_WIDTH, DATUM_HEIGHT))
    return items

In [6]:
# Opens a file or reads it from the zip archive data.zip
def readlines(filename):
      if(os.path.exists(filename)): 
        return [l[:-1] for l in open(filename).readlines()]
      else: 
        print(os.getcwd())
        z = zipfile.ZipFile('./data.zip')
        liste= z.read(filename).decode("utf-8").split("\n")
        print(len(liste))
        return liste

In [7]:
#   Reads n labels from a file and returns a list of integers.
def loadLabelsFile(filename, n):
    fin = readlines(filename)
    labels = []
    for line in fin[:min(n, len(fin))]:
        if line == '':
            break
        labels.append(int(line))
    return labels

In [8]:
#   Helper function for display purposes.
def asciiGrayscaleConversionFunction(value):
    if(value == 0):
        return ' '
    elif(value == 1):
        return '+'
    elif(value == 2):
        return '#'    

In [9]:
# #   Helper function for file reading.
# def IntegerConversionFunction(character):
#     if(character == ' '):
#         return 0
#     elif(character == '+'):
#         return 1
#     elif(character == '#'):
#         return 2    

In [10]:
#   Helper function for file reading.
def IntegerConversionFunction(character):
    if(character == '+'):
        return 1
    elif(character == '#'):
        return 2
    else:
        return 0

In [11]:
#   Helper function for file reading.
def convertToInteger(data):
    if type(data) != type([]):
        return IntegerConversionFunction(data)
    else:
        return list(map(convertToInteger, data))

In [12]:
def fixFaceDataset(items):
    new_items = copy.deepcopy(items)
    
    list_zeros = [0] * 70
    all_pixels = list()
    
    for i in range(len(new_items)):
        all_pixels = items[i].pixels[0:60]
        
        for j in range(10):
            all_pixels.append(list_zeros)
            
        new_items[i].setPixels(all_pixels)
        
    return new_items

In [13]:
def predictModel(model, data, labels, dataName, modelName):
    
    accuracy = model.score(data, labels) *100
        
    print ("Accuracy of the {} on the {}".format(modelName, dataName),
           "dataset is {0:.2f} %".format(accuracy))
    
    return accuracy

In [14]:
def getModelPredictions(model, data, labels, dataName, modelName):
    
    prediction = np.array([])
    correct_prediction = 0

    for i in range(len(data)):
        
        # Predict Output
        predicted= model.predict([data[i]])[0]
        prediction = np.append(prediction, predicted)
        
        if predicted == labels[i]:
            correct_prediction += 1
    
    accuracy = correct_prediction*100/len(data)
    
    print ("Accuracy of the {} on the {}".format(modelName, dataName),
           "dataset is {0:.2f} %".format(accuracy))
    
    return prediction, accuracy

In [15]:
def getGridSearch(params, modelEstimator, features, labels, n_jobs =os.cpu_count()):
    grid = GridSearchCV(estimator=modelEstimator,
                        param_grid=params,
                        cv=10,
                        n_jobs= n_jobs,
                        verbose=4)
    grid.fit(features, labels)
    return grid

# Digit Dataset

## Training Dataset

In [16]:
digit_training_amount = 5000
digit_training_labels = loadLabelsFile("./data/digitdata/traininglabels", digit_training_amount)
digit_training_items = loadDataFile("./data/digitdata/trainingimages", digit_training_amount,28,28)
digit_training_features= np.array([np.array(item.getPixels()).flatten() for item in digit_training_items])

## Validation Dataset

In [17]:
digit_validation_amount = 1000
digit_validation_labels = loadLabelsFile("./data/digitdata/validationlabels", digit_validation_amount)
digit_validation_items = loadDataFile("./data/digitdata/validationimages", digit_validation_amount, 28, 28)
digit_validation_features= np.array([np.array(item.getPixels()).flatten() for item in digit_validation_items])

## Test Dataset

In [18]:
digit_test_amount = 1000
digit_test_labels = loadLabelsFile("./data/digitdata/testlabels", digit_test_amount)
digit_test_items = loadDataFile("./data/digitdata/testimages", digit_test_amount, 28, 28)
digit_test_features= np.array([np.array(item.getPixels()).flatten() for item in digit_test_items])

# Face dataset

## Training Dataset

In [22]:
face_training_amount = 451
face_training_labels = loadLabelsFile("./data/facedata/facedatatrainlabels", face_training_amount)
face_training_items = fixFaceDataset(loadDataFile("./data/facedata/facedatatrain", face_training_amount,60,70))
face_training_features= np.array([np.array(item.getPixels()).flatten() for item in face_training_items])

## Validation Dataset

In [23]:
face_validation_amount = 301
face_validation_labels = loadLabelsFile("./data/facedata/facedatavalidationlabels", face_validation_amount)
face_validation_items = fixFaceDataset(loadDataFile("./data/facedata/facedatavalidation", face_validation_amount,60,70))
face_validation_features= np.array([np.array(item.getPixels()).flatten() for item in face_validation_items])

## Test Dataset

In [24]:
face_test_amount = 150
face_test_labels = loadLabelsFile("./data/facedata/facedatatestlabels", face_test_amount)
face_test_items = fixFaceDataset(loadDataFile("./data/facedata/facedatatest", face_test_amount,60,70))
face_test_features= np.array([np.array(item.getPixels()).flatten() for item in face_test_items])

# Naive Bayes (NB)

## Digit Dataset

In [23]:
# The var_smoothing parameter of the GaussianNB class in sklearn is a smoothing parameter for the variance of the features. 
# It is used to avoid division by zero when computing the likelihood of the features given the class
params =  {
    'var_smoothing': [1e-9, 1e-10, 1e-8, 1e-6, 1e-4] 
}
digit_NB_Grid = getGridSearch(params, GaussianNB(), digit_training_features, digit_training_labels, 1)
digit_NB_Grid.best_score_, digit_NB_Grid.best_params_

(0.7038, {'var_smoothing': 0.0001})

In [22]:
# Create a Gaussian Classifier with grid search params
digit_NB_model = GaussianNB(var_smoothing= 0.0001)

# Train the model using the training sets
digit_NB_model.fit(digit_training_features, digit_training_labels)

In [24]:
# digit_training_prediction = getModelPredictions(digit_NB_model, digit_training_features, digit_training_labels,
#                                                 "Training", "Naive Bayes")[0]

# digit_validation_prediction = getModelPredictions(digit_NB_model, digit_validation_features, 
#                                                 digit_validation_labels, "Validation", "Naive Bayes")[0]

# digit_test_prediction = getModelPredictions(digit_NB_model, digit_test_features, digit_test_labels, "Test", "Naive Bayes")[0]

In [25]:
predictModel(digit_NB_model, digit_training_features, digit_training_labels, "Training", "Naive Bayes")
predictModel(digit_NB_model, digit_validation_features, digit_validation_labels, "Validation", "Naive Bayes")
predictModel(digit_NB_model, digit_test_features, digit_test_labels, "Test", "Naive Bayes")

Accuracy of the Naive Bayes on the Training dataset is 72.96 %
Accuracy of the Naive Bayes on the Validation dataset is 67.10 %
Accuracy of the Naive Bayes on the Test dataset is 62.40 %


62.4

## Face Dataset

In [28]:
# The var_smoothing parameter of the GaussianNB class in sklearn is a smoothing parameter for the variance of the features. 
# It is used to avoid division by zero when computing the likelihood of the features given the class
params =  {
    'var_smoothing': [1e-9, 1e-10, 1e-8, 1e-6, 1e-4] 
}
face_NB_Grid = getGridSearch(params, GaussianNB(), face_training_features, face_training_labels, 1)
face_NB_Grid.best_score_, face_NB_Grid.best_params_

(0.8403864734299518, {'var_smoothing': 1e-09})

In [25]:
# Create a Gaussian Classifier
face_NB_model = GaussianNB()

# Train the model using the training sets
face_NB_model.fit(face_training_features, face_training_labels)

In [61]:
# face_training_prediction = getModelPredictions(face_NB_model, face_training_features, face_training_labels,
#                                                "Training", "Naive Bayes")
# face_validation_prediction = getModelPredictions(face_NB_model, face_validation_features, face_validation_labels,
#                                                  "Validation", "Naive Bayes")
# face_test_prediction = getModelPredictions(face_NB_model, face_test_features, face_test_labels,
#                                            "Test", "Naive Bayes")

In [30]:
predictModel(face_NB_model, face_training_features, face_training_labels, "Training", "Naive Bayes")
predictModel(face_NB_model, face_validation_features, face_validation_labels, "Validation", "Naive Bayes")
predictModel(face_NB_model, face_test_features, face_test_labels, "Test", "Naive Bayes")

Accuracy of the Naive Bayes on the Training dataset is 94.01 %
Accuracy of the Naive Bayes on the Validation dataset is 84.72 %
Accuracy of the Naive Bayes on the Test dataset is 88.00 %


88.0

# KNN (K Nearest Neighbors)

## Digit dataset 

In [82]:
params =  {
    'n_neighbors': list(range(2, 389)) 
}
digit_KNN_Grid = getGridSearch(params, KNeighborsClassifier(), digit_training_features, digit_training_labels)
digit_KNN_Grid.best_score_, digit_KNN_Grid.best_params_

(0.938, {'n_neighbors': 3})

In [83]:
highestfull_k = 2
highestfull_accuarcy = 0

highest_k = 2
highest_accuarcy = 0
for k in range(2, 389):
    # Create a KNN Classifier
    digit_KNN_model = KNeighborsClassifier(n_neighbors=k)

    # Train the model using the training sets
    digit_KNN_model.fit(digit_training_features, digit_training_labels)

    print("For k={} :".format(k))
    acc1 = predictModel(digit_KNN_model, digit_training_features, digit_training_labels, "Training", "KNN")
    acc2 = predictModel(digit_KNN_model, digit_validation_features, digit_validation_labels, "Validation", "KNN")
    acc3 = predictModel(digit_KNN_model, digit_test_features, digit_test_labels, "Test", "KNN")
    
    if (highestfull_accuarcy < acc1+acc2+acc3):
        highestfull_accuarcy = acc1+acc2+acc3
        highestfull_k = k
    
    if (highest_accuarcy < acc2+acc3):
        highest_accuarcy = acc2 + acc3
        highest_k = k
    
    print ("k={} with highest accuarcy of {}/200".format(highest_k, highest_accuarcy))
    print ("k={} with Full accuarcy of {}/300".format(highestfull_k, highestfull_accuarcy))

    print("\n \n")

For k=2 :
Accuracy of the KNN on the Training dataset is 97.26 %
Accuracy of the KNN on the Validation dataset is 90.50 %
Accuracy of the KNN on the Test dataset is 89.50 %
k=2 with highest accuarcy of 180.0/200
k=2 with Full accuarcy of 277.26/300

 

For k=3 :
Accuracy of the KNN on the Training dataset is 97.52 %
Accuracy of the KNN on the Validation dataset is 91.40 %
Accuracy of the KNN on the Test dataset is 89.80 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=4 :
Accuracy of the KNN on the Training dataset is 96.56 %
Accuracy of the KNN on the Validation dataset is 90.90 %
Accuracy of the KNN on the Test dataset is 90.00 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=5 :
Accuracy of the KNN on the Training dataset is 96.42 %
Accuracy of the KNN on the Validation dataset is 90.60 %
Accuracy of the KNN on the Test dataset is 89.50 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278

Accuracy of the KNN on the Test dataset is 86.30 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=35 :
Accuracy of the KNN on the Training dataset is 91.70 %
Accuracy of the KNN on the Validation dataset is 87.40 %
Accuracy of the KNN on the Test dataset is 86.00 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=36 :
Accuracy of the KNN on the Training dataset is 91.66 %
Accuracy of the KNN on the Validation dataset is 87.20 %
Accuracy of the KNN on the Test dataset is 86.30 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=37 :
Accuracy of the KNN on the Training dataset is 91.46 %
Accuracy of the KNN on the Validation dataset is 87.00 %
Accuracy of the KNN on the Test dataset is 85.60 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=38 :
Accuracy of the KNN on the Training dataset is 91.30 %
Accuracy of the KNN on the Validation dat

Accuracy of the KNN on the Training dataset is 88.96 %
Accuracy of the KNN on the Validation dataset is 85.20 %
Accuracy of the KNN on the Test dataset is 82.70 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=68 :
Accuracy of the KNN on the Training dataset is 88.84 %
Accuracy of the KNN on the Validation dataset is 84.90 %
Accuracy of the KNN on the Test dataset is 82.80 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=69 :
Accuracy of the KNN on the Training dataset is 88.64 %
Accuracy of the KNN on the Validation dataset is 84.90 %
Accuracy of the KNN on the Test dataset is 82.60 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=70 :
Accuracy of the KNN on the Training dataset is 88.64 %
Accuracy of the KNN on the Validation dataset is 84.80 %
Accuracy of the KNN on the Test dataset is 82.60 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

Accuracy of the KNN on the Test dataset is 80.00 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=100 :
Accuracy of the KNN on the Training dataset is 86.88 %
Accuracy of the KNN on the Validation dataset is 82.60 %
Accuracy of the KNN on the Test dataset is 80.20 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=101 :
Accuracy of the KNN on the Training dataset is 86.82 %
Accuracy of the KNN on the Validation dataset is 82.30 %
Accuracy of the KNN on the Test dataset is 80.30 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=102 :
Accuracy of the KNN on the Training dataset is 86.66 %
Accuracy of the KNN on the Validation dataset is 82.40 %
Accuracy of the KNN on the Test dataset is 80.30 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=103 :
Accuracy of the KNN on the Training dataset is 86.64 %
Accuracy of the KNN on the Validation

Accuracy of the KNN on the Training dataset is 85.10 %
Accuracy of the KNN on the Validation dataset is 80.80 %
Accuracy of the KNN on the Test dataset is 77.80 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=133 :
Accuracy of the KNN on the Training dataset is 85.02 %
Accuracy of the KNN on the Validation dataset is 80.80 %
Accuracy of the KNN on the Test dataset is 77.70 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=134 :
Accuracy of the KNN on the Training dataset is 84.96 %
Accuracy of the KNN on the Validation dataset is 80.40 %
Accuracy of the KNN on the Test dataset is 77.70 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=135 :
Accuracy of the KNN on the Training dataset is 84.96 %
Accuracy of the KNN on the Validation dataset is 80.40 %
Accuracy of the KNN on the Test dataset is 77.70 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/

Accuracy of the KNN on the Test dataset is 75.20 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=165 :
Accuracy of the KNN on the Training dataset is 83.52 %
Accuracy of the KNN on the Validation dataset is 79.80 %
Accuracy of the KNN on the Test dataset is 75.10 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=166 :
Accuracy of the KNN on the Training dataset is 83.40 %
Accuracy of the KNN on the Validation dataset is 79.80 %
Accuracy of the KNN on the Test dataset is 75.10 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=167 :
Accuracy of the KNN on the Training dataset is 83.46 %
Accuracy of the KNN on the Validation dataset is 79.70 %
Accuracy of the KNN on the Test dataset is 75.20 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=168 :
Accuracy of the KNN on the Training dataset is 83.32 %
Accuracy of the KNN on the Validation

Accuracy of the KNN on the Training dataset is 81.96 %
Accuracy of the KNN on the Validation dataset is 78.20 %
Accuracy of the KNN on the Test dataset is 73.40 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=198 :
Accuracy of the KNN on the Training dataset is 81.90 %
Accuracy of the KNN on the Validation dataset is 78.00 %
Accuracy of the KNN on the Test dataset is 73.20 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=199 :
Accuracy of the KNN on the Training dataset is 81.88 %
Accuracy of the KNN on the Validation dataset is 77.70 %
Accuracy of the KNN on the Test dataset is 73.20 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=200 :
Accuracy of the KNN on the Training dataset is 81.82 %
Accuracy of the KNN on the Validation dataset is 77.90 %
Accuracy of the KNN on the Test dataset is 73.20 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/

Accuracy of the KNN on the Test dataset is 72.10 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=230 :
Accuracy of the KNN on the Training dataset is 80.46 %
Accuracy of the KNN on the Validation dataset is 76.30 %
Accuracy of the KNN on the Test dataset is 72.10 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=231 :
Accuracy of the KNN on the Training dataset is 80.46 %
Accuracy of the KNN on the Validation dataset is 76.30 %
Accuracy of the KNN on the Test dataset is 71.90 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=232 :
Accuracy of the KNN on the Training dataset is 80.52 %
Accuracy of the KNN on the Validation dataset is 76.10 %
Accuracy of the KNN on the Test dataset is 72.00 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=233 :
Accuracy of the KNN on the Training dataset is 80.50 %
Accuracy of the KNN on the Validation

Accuracy of the KNN on the Training dataset is 79.80 %
Accuracy of the KNN on the Validation dataset is 74.70 %
Accuracy of the KNN on the Test dataset is 70.50 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=263 :
Accuracy of the KNN on the Training dataset is 79.70 %
Accuracy of the KNN on the Validation dataset is 74.60 %
Accuracy of the KNN on the Test dataset is 70.70 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=264 :
Accuracy of the KNN on the Training dataset is 79.64 %
Accuracy of the KNN on the Validation dataset is 74.50 %
Accuracy of the KNN on the Test dataset is 70.60 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=265 :
Accuracy of the KNN on the Training dataset is 79.62 %
Accuracy of the KNN on the Validation dataset is 74.50 %
Accuracy of the KNN on the Test dataset is 70.50 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/

Accuracy of the KNN on the Test dataset is 69.10 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=295 :
Accuracy of the KNN on the Training dataset is 78.38 %
Accuracy of the KNN on the Validation dataset is 73.80 %
Accuracy of the KNN on the Test dataset is 68.90 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=296 :
Accuracy of the KNN on the Training dataset is 78.32 %
Accuracy of the KNN on the Validation dataset is 74.00 %
Accuracy of the KNN on the Test dataset is 69.10 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=297 :
Accuracy of the KNN on the Training dataset is 78.26 %
Accuracy of the KNN on the Validation dataset is 74.00 %
Accuracy of the KNN on the Test dataset is 69.20 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=298 :
Accuracy of the KNN on the Training dataset is 78.26 %
Accuracy of the KNN on the Validation

Accuracy of the KNN on the Training dataset is 77.00 %
Accuracy of the KNN on the Validation dataset is 73.00 %
Accuracy of the KNN on the Test dataset is 67.80 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=328 :
Accuracy of the KNN on the Training dataset is 76.96 %
Accuracy of the KNN on the Validation dataset is 73.00 %
Accuracy of the KNN on the Test dataset is 67.70 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=329 :
Accuracy of the KNN on the Training dataset is 76.94 %
Accuracy of the KNN on the Validation dataset is 73.00 %
Accuracy of the KNN on the Test dataset is 67.70 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=330 :
Accuracy of the KNN on the Training dataset is 76.84 %
Accuracy of the KNN on the Validation dataset is 72.90 %
Accuracy of the KNN on the Test dataset is 67.60 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/

Accuracy of the KNN on the Test dataset is 66.30 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=360 :
Accuracy of the KNN on the Training dataset is 76.08 %
Accuracy of the KNN on the Validation dataset is 72.00 %
Accuracy of the KNN on the Test dataset is 66.20 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=361 :
Accuracy of the KNN on the Training dataset is 76.06 %
Accuracy of the KNN on the Validation dataset is 72.00 %
Accuracy of the KNN on the Test dataset is 66.10 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=362 :
Accuracy of the KNN on the Training dataset is 76.08 %
Accuracy of the KNN on the Validation dataset is 71.90 %
Accuracy of the KNN on the Test dataset is 66.00 %
k=3 with highest accuarcy of 181.2/200
k=3 with Full accuarcy of 278.72/300

 

For k=363 :
Accuracy of the KNN on the Training dataset is 76.08 %
Accuracy of the KNN on the Validation

In [19]:
k = 3
# Create a KNN Classifier
digit_KNN_model = KNeighborsClassifier(n_neighbors=k)

# Train the model using the training sets
digit_KNN_model.fit(digit_training_features, digit_training_labels)

In [32]:
# print("For k={} :".format(k))
# digit_training_prediction = getModelPredictions(digit_KNN_model, digit_training_features, digit_training_labels,
#                                                 "Training", "KNN")[0]
# digit_validation_prediction = getModelPredictions(digit_KNN_model, digit_validation_features, digit_validation_labels,
#                                                   "Validation", "KNN")[0]
# digit_test_prediction = getModelPredictions(digit_KNN_model, digit_test_features, digit_test_labels, "Test", "KNN")[0]

In [20]:
print("For k={} :".format(k))
predictModel(digit_KNN_model, digit_training_features, digit_training_labels, "Training", "KNN")
predictModel(digit_KNN_model, digit_validation_features, digit_validation_labels, "Validation", "KNN")
predictModel(digit_KNN_model, digit_test_features, digit_test_labels, "Test", "KNN")

For k=3 :
Accuracy of the KNN on the Training dataset is 97.52 %
Accuracy of the KNN on the Validation dataset is 91.40 %
Accuracy of the KNN on the Test dataset is 89.80 %


89.8

## Face dataset

In [34]:
# params =  {
#     'n_neighbors': list(range(2, 451)) 
# }
# face_KNN_Grid = getGridSearch(params, KNeighborsClassifier(), face_training_features, face_training_labels)
# face_KNN_Grid.best_score_, face_KNN_Grid.best_params_

In [35]:
k = 101

# Create a KNN Classifier
face_KNN_model = KNeighborsClassifier(n_neighbors=k)

# Train the model using the training sets
face_KNN_model.fit(face_training_features, face_training_labels)

print("For k={} :".format(k))
predictModel(face_KNN_model, face_training_features, face_training_labels, "Training", "KNN")
predictModel(face_KNN_model, face_validation_features, face_validation_labels, "Validation", "KNN")
predictModel(face_KNN_model, face_test_features, face_test_labels, "Test", "KNN")

For k=101 :
Accuracy of the KNN on the Training dataset is 80.04 %
Accuracy of the KNN on the Validation dataset is 75.42 %
Accuracy of the KNN on the Test dataset is 81.33 %


81.33333333333333

In [77]:
highestfull_k = 2
highestfull_accuarcy = 0

highest_k = 2
highest_accuarcy = 0
for k in range(2,face_training_amount):

    # Create a KNN Classifier
    face_KNN_model = KNeighborsClassifier(n_neighbors=k)

    # Train the model using the training sets
    face_KNN_model.fit(face_training_features, face_training_labels)

    print("For k={} :".format(k))
    acc1 = predictModel(face_KNN_model, face_training_features, face_training_labels, "Training", "KNN")
    acc2 = predictModel(face_KNN_model, face_validation_features, face_validation_labels, "Validation", "KNN")
    acc3 = predictModel(face_KNN_model, face_test_features, face_test_labels, "Test", "KNN")
    
    if (highestfull_accuarcy < acc1+acc2+acc3):
        highestfull_accuarcy = acc1+acc2+acc3
        highestfull_k = k
    
    if (highest_accuarcy < acc2+acc3):
        highest_accuarcy = acc2 + acc3
        highest_k = k
    
    print ("k={} with highest accuarcy of {}/200".format(highest_k, highest_accuarcy))
    print ("k={} with Full accuarcy of {}/300".format(highestfull_k, highestfull_accuarcy))

    print("\n \n")

For k=2 :
Accuracy of the KNN on the Training dataset is 51.88 %
Accuracy of the KNN on the Validation dataset is 51.83 %
Accuracy of the KNN on the Test dataset is 51.33 %
k=2 with highest accuarcy of 103.16057585825027/200
k=2 with Full accuarcy of 155.04527652343876/300

 

For k=3 :
Accuracy of the KNN on the Training dataset is 64.52 %
Accuracy of the KNN on the Validation dataset is 54.15 %
Accuracy of the KNN on the Test dataset is 54.00 %
k=3 with highest accuarcy of 108.15282392026577/200
k=3 with Full accuarcy of 172.6761055167181/300

 

For k=4 :
Accuracy of the KNN on the Training dataset is 53.66 %
Accuracy of the KNN on the Validation dataset is 52.16 %
Accuracy of the KNN on the Test dataset is 51.33 %
k=3 with highest accuarcy of 108.15282392026577/200
k=3 with Full accuarcy of 172.6761055167181/300

 

For k=5 :
Accuracy of the KNN on the Training dataset is 60.53 %
Accuracy of the KNN on the Validation dataset is 55.48 %
Accuracy of the KNN on the Test dataset is 57.

Accuracy of the KNN on the Validation dataset is 56.81 %
Accuracy of the KNN on the Test dataset is 54.00 %
k=7 with highest accuarcy of 114.15060908084165/200
k=5 with Full accuarcy of 173.3472116841374/300

 

For k=34 :
Accuracy of the KNN on the Training dataset is 53.66 %
Accuracy of the KNN on the Validation dataset is 54.15 %
Accuracy of the KNN on the Test dataset is 52.00 %
k=7 with highest accuarcy of 114.15060908084165/200
k=5 with Full accuarcy of 173.3472116841374/300

 

For k=35 :
Accuracy of the KNN on the Training dataset is 54.32 %
Accuracy of the KNN on the Validation dataset is 55.15 %
Accuracy of the KNN on the Test dataset is 54.00 %
k=7 with highest accuarcy of 114.15060908084165/200
k=5 with Full accuarcy of 173.3472116841374/300

 

For k=36 :
Accuracy of the KNN on the Training dataset is 54.10 %
Accuracy of the KNN on the Validation dataset is 53.49 %
Accuracy of the KNN on the Test dataset is 52.67 %
k=7 with highest accuarcy of 114.15060908084165/200
k=5 wi

Accuracy of the KNN on the Training dataset is 56.76 %
Accuracy of the KNN on the Validation dataset is 57.14 %
Accuracy of the KNN on the Test dataset is 56.00 %
k=63 with highest accuarcy of 116.46954595791806/200
k=63 with Full accuarcy of 174.78440183374954/300

 

For k=65 :
Accuracy of the KNN on the Training dataset is 59.42 %
Accuracy of the KNN on the Validation dataset is 60.13 %
Accuracy of the KNN on the Test dataset is 59.33 %
k=65 with highest accuarcy of 119.46622369878185/200
k=65 with Full accuarcy of 178.8897270247242/300

 

For k=66 :
Accuracy of the KNN on the Training dataset is 57.43 %
Accuracy of the KNN on the Validation dataset is 58.47 %
Accuracy of the KNN on the Test dataset is 58.00 %
k=65 with highest accuarcy of 119.46622369878185/200
k=65 with Full accuarcy of 178.8897270247242/300

 

For k=67 :
Accuracy of the KNN on the Training dataset is 59.87 %
Accuracy of the KNN on the Validation dataset is 60.80 %
Accuracy of the KNN on the Test dataset is 60.6

Accuracy of the KNN on the Training dataset is 70.51 %
Accuracy of the KNN on the Validation dataset is 69.77 %
Accuracy of the KNN on the Test dataset is 70.67 %
k=93 with highest accuarcy of 145.42857142857144/200
k=93 with Full accuarcy of 218.15584415584416/300

 

For k=95 :
Accuracy of the KNN on the Training dataset is 73.39 %
Accuracy of the KNN on the Validation dataset is 71.43 %
Accuracy of the KNN on the Test dataset is 75.33 %
k=95 with highest accuarcy of 146.76190476190476/200
k=95 with Full accuarcy of 220.154365959244/300

 

For k=96 :
Accuracy of the KNN on the Training dataset is 72.06 %
Accuracy of the KNN on the Validation dataset is 70.10 %
Accuracy of the KNN on the Test dataset is 74.00 %
k=95 with highest accuarcy of 146.76190476190476/200
k=95 with Full accuarcy of 220.154365959244/300

 

For k=97 :
Accuracy of the KNN on the Training dataset is 77.16 %
Accuracy of the KNN on the Validation dataset is 72.76 %
Accuracy of the KNN on the Test dataset is 76.67 

Accuracy of the KNN on the Training dataset is 81.82 %
Accuracy of the KNN on the Validation dataset is 77.74 %
Accuracy of the KNN on the Test dataset is 74.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=125 :
Accuracy of the KNN on the Training dataset is 78.71 %
Accuracy of the KNN on the Validation dataset is 73.75 %
Accuracy of the KNN on the Test dataset is 71.33 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=126 :
Accuracy of the KNN on the Training dataset is 78.49 %
Accuracy of the KNN on the Validation dataset is 74.42 %
Accuracy of the KNN on the Test dataset is 73.33 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=127 :
Accuracy of the KNN on the Training dataset is 75.61 %
Accuracy of the KNN on the Validation dataset is 71.76 %
Accuracy of the KNN on the Test dataset

Accuracy of the KNN on the Training dataset is 55.21 %
Accuracy of the KNN on the Validation dataset is 52.16 %
Accuracy of the KNN on the Test dataset is 56.00 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=154 :
Accuracy of the KNN on the Training dataset is 56.32 %
Accuracy of the KNN on the Validation dataset is 52.49 %
Accuracy of the KNN on the Test dataset is 57.33 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=155 :
Accuracy of the KNN on the Training dataset is 54.55 %
Accuracy of the KNN on the Validation dataset is 51.83 %
Accuracy of the KNN on the Test dataset is 56.00 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=156 :
Accuracy of the KNN on the Training dataset is 54.99 %
Accuracy of the KNN on the Validation dataset is 52.49 %
Accuracy of the KNN on the Test dataset

For k=184 :
Accuracy of the KNN on the Training dataset is 48.78 %
Accuracy of the KNN on the Validation dataset is 49.17 %
Accuracy of the KNN on the Test dataset is 50.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=185 :
Accuracy of the KNN on the Training dataset is 48.56 %
Accuracy of the KNN on the Validation dataset is 49.17 %
Accuracy of the KNN on the Test dataset is 50.00 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=186 :
Accuracy of the KNN on the Training dataset is 48.56 %
Accuracy of the KNN on the Validation dataset is 49.17 %
Accuracy of the KNN on the Test dataset is 50.00 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=187 :
Accuracy of the KNN on the Training dataset is 48.34 %
Accuracy of the KNN on the Validation dataset is 48.84 %
Accuracy of the KNN on the 

Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=215 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=216 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=217 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset

Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=245 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=246 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=247 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550

Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=276 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=277 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=278 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset

Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=307 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=308 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=309 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset

Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=336 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=337 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=338 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset

Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=365 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=366 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=367 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550

Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=395 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=396 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=397 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550

Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=425 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=426 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550387595/200
k=109 with Full accuarcy of 248.9844101823682/300

 

For k=427 :
Accuracy of the KNN on the Training dataset is 48.12 %
Accuracy of the KNN on the Validation dataset is 48.17 %
Accuracy of the KNN on the Test dataset is 48.67 %
k=109 with highest accuarcy of 164.06201550

In [26]:
k = 109

# Create a KNN Classifier
face_KNN_model = KNeighborsClassifier(n_neighbors=k)

# Train the model using the training sets
face_KNN_model.fit(face_training_features, face_training_labels)

In [37]:
# print("For k={} :".format(k))
# face_training_prediction = getModelPredictions(face_KNN_model, face_training_features, face_training_labels,
#                                                "Training", "KNN")[0]
# face_validation_prediction = getModelPredictions(face_KNN_model, face_validation_features, face_validation_labels,
#                                                  "Validation", "KNN")[0]
# face_test_prediction = getModelPredictions(face_KNN_model, face_test_features, face_test_labels, "Test", "KNN")[0]

In [27]:
print("For k={} :".format(k))
predictModel(face_KNN_model, face_training_features, face_training_labels, "Training", "KNN")
predictModel(face_KNN_model, face_validation_features, face_validation_labels, "Validation", "KNN")
predictModel(face_KNN_model, face_test_features, face_test_labels, "Test", "KNN")

For k=109 :
Accuracy of the KNN on the Training dataset is 84.92 %
Accuracy of the KNN on the Validation dataset is 81.40 %
Accuracy of the KNN on the Test dataset is 82.67 %


82.66666666666667

# Multilayer perceptron (MLP)

## Digit dataset

In [39]:
# params =  {
#     'hidden_layer_sizes': [(50,), (100,)],
#     'max_iter': [10, 20, 40],
#     'learning_rate_init': [0.1, 0.5, 0.125, 0.1275, 0.25],
#     'solver':['sgd'],
#     'random_state': [0]
# }
# digit_MLP_Grid = getGridSearch(params, MLPClassifier(), digit_training_features, digit_training_labels, 1)
# digit_MLP_Grid.best_score_, digit_MLP_Grid.best_params_

In [28]:
# Create the MLP classifier
digit_MLP_model = MLPClassifier(hidden_layer_sizes=(100,), max_iter=40, learning_rate_init=0.1275,
                                 solver='sgd', random_state= 0)

# Train the model using the training sets
digit_MLP_model.fit(digit_training_features, digit_training_labels)

In [41]:
# digit_training_prediction = getModelPredictions(digit_MLP_model, digit_training_features, digit_training_labels,
#                                               "Training", "MLP")[0]
# digit_validation_prediction = getModelPredictions(digit_MLP_model, digit_validation_features, digit_validation_labels,
#                                                 "Validation", "MLP")[0]
# digit_test_prediction = getModelPredictions(digit_MLP_model, digit_test_features, digit_test_labels, "Test", "MLP")[0]

In [42]:
predictModel(digit_MLP_model, digit_training_features, digit_training_labels, "Training", "MLP")
predictModel(digit_MLP_model, digit_validation_features, digit_validation_labels, "Validation", "MLP")
predictModel(digit_MLP_model, digit_test_features, digit_test_labels, "Test", "MLP")

Accuracy of the MLP on the Training dataset is 100.00 %
Accuracy of the MLP on the Validation dataset is 93.80 %
Accuracy of the MLP on the Test dataset is 92.60 %


92.60000000000001

## Face dataset

In [51]:
# params =  {
#     'hidden_layer_sizes': [(50,), (100,)],
#     'max_iter': [10, 25, 40],
#     'learning_rate_init': [0.1, 0.051, 0.071, 0.5],
#     'solver':['sgd'],
#     'random_state': [0]
# }
# face_MLP_Grid = getGridSearch(params, MLPClassifier(), face_training_features, face_training_labels, 1)
# face_MLP_Grid.best_score_, face_MLP_Grid.best_params_

In [29]:
# Create the MLP classifier
face_MLP_model = MLPClassifier(hidden_layer_sizes=(100,), max_iter=20, learning_rate_init=0.051,
                    solver='sgd', random_state=0)

# Train the model using the training sets
face_MLP_model.fit(face_training_features, face_training_labels)



In [53]:
# face_training_prediction = getModelPredictions(face_MLP_model, face_training_features, face_training_labels,
#                                                "Training", "MLP")[0]
# face_validation_prediction = getModelPredictions(face_MLP_model, face_validation_features, face_validation_labels,
#                                                  "Validation", "MLP")[0]
# face_test_prediction = getModelPredictions(face_MLP_model, face_test_features, face_test_labels, "Test", "MLP")[0]

In [54]:
predictModel(face_MLP_model, face_training_features, face_training_labels, "Training", "MLP")
predictModel(face_MLP_model, face_validation_features, face_validation_labels, "Validation", "MLP")
predictModel(face_MLP_model, face_test_features, face_test_labels, "Test", "MLP")

Accuracy of the MLP on the Training dataset is 100.00 %
Accuracy of the MLP on the Validation dataset is 89.70 %
Accuracy of the MLP on the Test dataset is 90.67 %


90.66666666666666

# Support Vector Machine (SVM)

## Digit dataset

In [47]:
params =  {
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'gamma': ['scale', 'auto']
}
digit_SVM_Grid = getGridSearch(params, SVC(), digit_training_features, digit_training_labels)
digit_SVM_Grid.best_score_, digit_SVM_Grid.best_params_

Fitting 10 folds for each of 8 candidates, totalling 80 fits


(0.952, {'gamma': 'scale', 'kernel': 'rbf'})

In [30]:
# Create the SVM classifier
digit_SVM_model = SVC(kernel='rbf', gamma='scale')

# Train the model using the training sets
digit_SVM_model.fit(digit_training_features, digit_training_labels)

In [49]:
# digit_training_prediction = getModelPredictions(digit_SVM_model, digit_training_features, digit_training_labels,
#                                                 "Training", "SVM")[0]
# digit_validation_prediction = getModelPredictions(digit_SVM_model, digit_validation_features, digit_validation_labels,
#                                                   "Validation", "SVM")[0]
# digit_test_prediction = getModelPredictions(digit_SVM_model, digit_test_features, digit_test_labels, "Test", "SVM")[0]

In [66]:
predictModel(digit_SVM_model, digit_training_features, digit_training_labels, "Training", "SVM")
predictModel(digit_SVM_model, digit_validation_features, digit_validation_labels, "Validation", "SVM")
predictModel(digit_SVM_model, digit_test_features, digit_test_labels, "Test", "SVM")

Accuracy of the SVM on the Training dataset is 98.70 %
Accuracy of the SVM on the Validation dataset is 93.30 %
Accuracy of the SVM on the Test dataset is 92.10 %


92.10000000000001

## Face dataset

In [73]:
# params =  {
#     'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
#     'gamma': ['scale', 'auto']
# }
# face_SVM_Grid = getGridSearch(params, SVC(), face_training_features, face_training_labels)
# face_SVM_Grid.best_score_, face_SVM_Grid.best_params_

In [70]:
# Create the SVM classifier
face_SVM_model = SVC(kernel='rbf', gamma='auto')

# Train the model using the training sets
face_SVM_model.fit(face_training_features, face_training_labels)

In [71]:
# face_training_prediction = getModelPredictions(face_SVM_model, face_training_features, face_training_labels, 
#                                                "Training", "SVM")[0]
# face_validation_prediction = getModelPredictions(face_SVM_model, face_validation_features, face_validation_labels, 
#                                                  "Validation", "SVM")[0]
# face_test_prediction = getModelPredictions(face_SVM_model, face_test_features, face_test_labels, "Test", "SVM")[0]

In [72]:
predictModel(face_SVM_model, face_training_features, face_training_labels, "Training", "SVM")
predictModel(face_SVM_model, face_validation_features, face_validation_labels, "Validation", "SVM")
predictModel(face_SVM_model, face_test_features, face_test_labels, "Test", "SVM")

Accuracy of the SVM on the Training dataset is 99.78 %
Accuracy of the SVM on the Validation dataset is 91.69 %
Accuracy of the SVM on the Test dataset is 92.00 %


92.0

# Decision Tree (DT)

## Digit dataset

In [75]:
params =  {
    'max_depth': list(range(1, 51)),
    'max_features': list(range(1, 51))
}
digit_DT_Grid = getGridSearch(params, DecisionTreeClassifier(), digit_training_features, digit_training_labels)
digit_DT_Grid.best_score_, digit_DT_Grid.best_params_

(0.7686000000000001, {'max_depth': 14, 'max_features': 50})

In [76]:
# Create the Decision Tree classifier with grid search parameters
digit_DT_model = DecisionTreeClassifier(max_depth= 14, max_features= 50)

# Train the model using the training sets
digit_DT_model.fit(digit_training_features, digit_training_labels)

predictModel(digit_DT_model, digit_training_features, digit_training_labels, "Training", "Decision Tree")
predictModel(digit_DT_model, digit_validation_features, digit_validation_labels, "Validation", "Decision Tree")
predictModel(digit_DT_model, digit_test_features, digit_test_labels, "Test", "Decision Tree")

Accuracy of the Decision Tree on the Training dataset is 98.64 %
Accuracy of the Decision Tree on the Validation dataset is 70.50 %
Accuracy of the Decision Tree on the Test dataset is 68.40 %


68.4

In [31]:
# Create the Decision Tree classifier
digit_DT_model = DecisionTreeClassifier()

# Train the model using the training sets
digit_DT_model.fit(digit_training_features, digit_training_labels)

In [78]:
# digit_training_prediction = getModelPredictions(digit_DT_model, digit_training_features, digit_training_labels, "Training", "Decision Tree")[0]
# digit_validation_prediction = getModelPredictions(digit_DT_model, digit_validation_features, digit_validation_labels, "Validation", "Decision Tree")[0]
# digit_test_prediction = getModelPredictions(digit_DT_model, digit_test_features, digit_test_labels, "Test", "Decision Tree")[0]

In [79]:
predictModel(digit_DT_model, digit_training_features, digit_training_labels, "Training", "Decision Tree")
predictModel(digit_DT_model, digit_validation_features, digit_validation_labels, "Validation", "Decision Tree")
predictModel(digit_DT_model, digit_test_features, digit_test_labels, "Test", "Decision Tree")

Accuracy of the Decision Tree on the Training dataset is 100.00 %
Accuracy of the Decision Tree on the Validation dataset is 76.10 %
Accuracy of the Decision Tree on the Test dataset is 73.50 %


73.5

## Face dataset

In [83]:
params =  {
    'max_depth': list(range(1, 51)),
    'max_features': list(range(1, 51))
}
face_DT_Grid = getGridSearch(params, DecisionTreeClassifier(), face_training_features, face_training_labels)
face_DT_Grid.best_score_, face_DT_Grid.best_params_

Fitting 10 folds for each of 2500 candidates, totalling 25000 fits


(0.6806763285024153, {'max_depth': 39, 'max_features': 48})

In [84]:
# Create the Decision Tree classifier with grid search parameters
face_DT_model = DecisionTreeClassifier(max_depth= 39, max_features= 48)

# Train the model using the training sets
face_DT_model.fit(face_training_features, face_training_labels)
predictModel(face_DT_model, face_training_features, face_training_labels, "Training", "Decision Tree")
predictModel(face_DT_model, face_validation_features, face_validation_labels, "Validation", "Decision Tree")
predictModel(face_DT_model, face_test_features, face_test_labels, "Test", "Decision Tree")

Accuracy of the Decision Tree on the Training dataset is 100.00 %
Accuracy of the Decision Tree on the Validation dataset is 58.80 %
Accuracy of the Decision Tree on the Test dataset is 64.67 %


64.66666666666666

In [32]:
# Create the Decision Tree classifier
face_DT_model = DecisionTreeClassifier()

# Train the model using the training sets
face_DT_model.fit(face_training_features, face_training_labels)

In [87]:
# face_training_prediction = getModelPredictions(face_DT_model, face_training_features, face_training_labels, "Training", "Decision Tree")[0]
# face_validation_prediction = getModelPredictions(face_DT_model, face_validation_features, face_validation_labels, "Validation", "Decision Tree")[0]
# face_test_prediction = getModelPredictions(face_DT_model, face_test_features, face_test_labels, "Test", "Decision Tree")[0]

In [86]:
predictModel(face_DT_model, face_training_features, face_training_labels, "Training", "Decision Tree")
predictModel(face_DT_model, face_validation_features, face_validation_labels, "Validation", "Decision Tree")
predictModel(face_DT_model, face_test_features, face_test_labels, "Test", "Decision Tree")

Accuracy of the Decision Tree on the Training dataset is 100.00 %
Accuracy of the Decision Tree on the Validation dataset is 73.09 %
Accuracy of the Decision Tree on the Test dataset is 66.67 %


66.66666666666666