In [101]:
import numpy as np
import pandas as pd
import sklearn
import math
import matplotlib.pyplot as plt
import seaborn as sn

from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix

In [102]:
train_data =pd.read_csv("basesDatosPr3IMC/train_optdigits.csv", header=None)
test_data = pd.read_csv("basesDatosPr3IMC/test_optdigits.csv", header = None)

# Normalize the values and get the X_train and Y_train

In [103]:
scaler = MinMaxScaler()

normalized_train_data = scaler.fit_transform(train_data)

X_train = normalized_train_data[:,:-1]
Y_train = normalized_train_data[:, -1]

normalized_test_data = scaler.fit_transform(test_data)

X_test = normalized_test_data[:, :-1]
Y_test = normalized_test_data[:, -1]

print(Y_train)

[0.         0.         0.77777778 ... 0.66666667 0.66666667 0.77777778]


In [104]:
label_e = preprocessing.LabelEncoder()
label_e.fit(Y_train)

Y_train_encoded = label_e.transform(Y_train)
Y_test_encoded = label_e.transform(Y_test)

print(Y_test_encoded)

[0 1 2 ... 8 9 8]


# 1. KNeighbours Classifier

In [60]:
knn = KNeighborsClassifier()
knn.fit(X_train, Y_train_encoded)
print(knn)


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')


In [62]:
prediction_test = knn.predict(X_test)
print(prediction_test)

[0 1 1 ... 1 9 8]


In [66]:
precission = knn.score(X_test, Y_test_encoded)
print("The CCR is: ", precission)

The CCR is:  0.9788536449638287


In [83]:
cm = confusion_matrix(Y_test_encoded, prediction_test)
print("Confusion matrix:\n", cm)

Confusion matrix:
 [[178   0   0   0   0   0   0   0   0   0]
 [  0 181   0   0   0   0   1   0   0   0]
 [  0   4 172   0   0   0   0   1   0   0]
 [  0   1   0 178   0   1   0   2   1   0]
 [  0   2   0   0 179   0   0   0   0   0]
 [  0   0   0   0   1 179   0   0   0   2]
 [  0   0   0   0   0   1 180   0   0   0]
 [  0   0   0   0   0   0   0 175   1   3]
 [  0  11   0   2   0   0   0   0 159   2]
 [  0   2   0   2   0   1   0   0   2 173]]


#### Let's try with different configurations

In [137]:
for nn in range(1,9):
    knn = KNeighborsClassifier(n_neighbors=nn)
    knn.fit(X_train, Y_train_encoded)
    
    prediction_train = knn.predict(X_train)
    prediction_test = knn.predict(X_test)
    
    precission_train = knn.score(X_train, Y_train_encoded)
    precission_test = knn.score(X_test, Y_test_encoded)
    
    print("The accuray with ", nn, "neightbours is: Train:", precission_train*100, "Test: ", precission_test*100)

The accuray with  1 neightbours is: Train: 100.0 Test:  97.8297161936561
The accuray with  2 neightbours is: Train: 99.18911849332984 Test:  97.44017807456873
The accuray with  3 neightbours is: Train: 99.29374836515825 Test:  97.88536449638286
The accuray with  4 neightbours is: Train: 99.08448862150145 Test:  97.8297161936561
The accuray with  5 neightbours is: Train: 98.97985874967303 Test:  97.88536449638286
The accuray with  6 neightbours is: Train: 98.87522887784462 Test:  97.60712298274903
The accuray with  7 neightbours is: Train: 98.92754381375883 Test:  97.55147468002225
The accuray with  8 neightbours is: Train: 98.84907140988753 Test:  97.60712298274903


# 2. Logistic Regression

In [109]:
clf = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial').fit(X_train, Y_train_encoded)

prediction_train = clf.predict(X_train)
prediction_test = clf.predict(X_test)

precission_train = clf.score(X_train, Y_train_encoded)
precission_test = clf.score(X_test, Y_test_encoded)

cm = confusion_matrix(Y_test_encoded, prediction_test)
print("Confusion matrix:\n", cm)

print("The accuray is: \n Train:", precission_train*100, "\n Test: ", precission_test*100)

Confusion matrix:
 [[174   0   0   0   1   3   0   0   0   0]
 [  0 169   0   0   0   1   0   0   5   7]
 [  0   3 172   0   0   0   0   1   1   0]
 [  0   0   4 168   0   3   0   2   3   3]
 [  0   1   0   0 176   0   0   1   3   0]
 [  0   0   1   0   0 178   1   0   0   2]
 [  0   1   0   0   2   0 177   0   1   0]
 [  0   0   0   0   2   5   0 163   2   7]
 [  0   9   0   0   0   3   0   0 156   6]
 [  0   1   0   1   4   2   0   0   2 170]]
The accuray is: 
 Train: 98.03818990321737 
 Test:  94.76905954368392


#### Let's try with different configuration

In [135]:
for state in range(0,9):
    clf = LogisticRegression(random_state=state, solver=solver_t, multi_class='multinomial').fit(X_train, Y_train_encoded)

    prediction_train = clf.predict(X_train)
    prediction_test = clf.predict(X_test)

    precission_train = clf.score(X_train, Y_train_encoded)
    precission_test = clf.score(X_test, Y_test_encoded)

    print("The accuray with the state", state, " is: \n Train:", precission_train*100, "\n Test: ", precission_test*100)

The accuray with the state 0  is: 
 Train: 98.03818990321737 
 Test:  94.76905954368392
The accuray with the state 1  is: 
 Train: 98.03818990321737 
 Test:  94.76905954368392
The accuray with the state 2  is: 
 Train: 98.03818990321737 
 Test:  94.76905954368392
The accuray with the state 3  is: 
 Train: 98.03818990321737 
 Test:  94.76905954368392
The accuray with the state 4  is: 
 Train: 98.03818990321737 
 Test:  94.76905954368392
The accuray with the state 5  is: 
 Train: 98.03818990321737 
 Test:  94.76905954368392
The accuray with the state 6  is: 
 Train: 98.03818990321737 
 Test:  94.76905954368392
The accuray with the state 7  is: 
 Train: 98.03818990321737 
 Test:  94.76905954368392
The accuray with the state 8  is: 
 Train: 98.03818990321737 
 Test:  94.76905954368392
