# KNN vs Logistic Regression in Mnist Dataset

In [1]:
from sklearn.datasets import load_digits

In [2]:
data = load_digits().data
images = load_digits().images
targets = load_digits().target

In [3]:
X = images.reshape(-1,64)
y = targets

## Implement KNN

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=.5,random_state=7)
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train,y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [5]:
model.score(X_test,y_test)

0.978865406006674

In [6]:
from sklearn.metrics import confusion_matrix
c_matrix = confusion_matrix(y_test,model.predict(X_test))
c_matrix

array([[95,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0, 89,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  1, 85,  0,  0,  0,  0,  0,  2,  0],
       [ 0,  0,  0, 86,  0,  1,  0,  0,  0,  0],
       [ 0,  0,  0,  0, 85,  0,  0,  1,  0,  0],
       [ 0,  0,  0,  0,  0, 82,  0,  0,  0,  1],
       [ 0,  1,  0,  0,  0,  0, 91,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0, 91,  0,  0],
       [ 0,  4,  1,  2,  0,  0,  0,  0, 93,  0],
       [ 0,  0,  0,  3,  0,  1,  0,  0,  1, 83]], dtype=int64)

In [10]:
from sklearn import metrics
print("Accuracy: ", metrics.accuracy_score(y_test,model.predict(X_test)))

for i in range(len(c_matrix)):
    precision = c_matrix[i][i] / sum(c_matrix[i][:])
    print("Precision {}: {}".format(i,precision))

def sum_num(i):
    total = 0
    for j in range(10):
        total = total + c_matrix[j][i]
    return total

for i in range(len(c_matrix)):
    recall = c_matrix[i][i] / sum_num(i)
    print("Recall {}: {}".format(i,recall))


Accuracy:  0.978865406006674
Precision 0: 1.0
Precision 1: 1.0
Precision 2: 0.9659090909090909
Precision 3: 0.9885057471264368
Precision 4: 0.9883720930232558
Precision 5: 0.9879518072289156
Precision 6: 0.9891304347826086
Precision 7: 1.0
Precision 8: 0.93
Precision 9: 0.9431818181818182
Recall 0: 1.0
Recall 1: 0.9368421052631579
Recall 2: 0.9883720930232558
Recall 3: 0.945054945054945
Recall 4: 1.0
Recall 5: 0.9761904761904762
Recall 6: 1.0
Recall 7: 0.9891304347826086
Recall 8: 0.96875
Recall 9: 0.9880952380952381


## Implement Logistic Regression

In [11]:
from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=.3,random_state = 7)
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [12]:
model.score(X_test,y_test)

0.95

In [13]:
from sklearn.metrics import confusion_matrix
c_matrix = confusion_matrix(y_test,model.predict(X_test))
print(c_matrix)

[[61  0  0  0  0  0  0  0  0  0]
 [ 0 49  0  1  0  1  0  0  1  3]
 [ 0  0 53  0  0  0  0  0  0  0]
 [ 0  0  1 49  0  2  0  0  0  0]
 [ 0  0  0  0 56  0  1  1  0  0]
 [ 0  0  0  0  0 47  0  0  1  0]
 [ 0  0  0  0  0  0 47  0  1  0]
 [ 0  0  0  0  0  0  0 49  0  0]
 [ 0  3  1  0  1  4  0  1 52  0]
 [ 0  0  0  1  0  3  0  0  0 50]]


In [14]:
from sklearn import metrics
print("Accuracy: ", metrics.accuracy_score(y_test,model.predict(X_test)))

for i in range(c_matrix.shape[0]):
    precision = c_matrix[i][i] / sum(c_matrix[i][:])
    print("Precision {}: {}".format(i,precision))

def sum_num(i):
    total = 0
    for j in range(10):
        total = total + c_matrix[j][i]
    return total

for i in range(c_matrix.shape[0]):
    recall = c_matrix[i][i] / sum_num(i)
    print("Recall {}: {}".format(i,recall))

Accuracy:  0.95
Precision 0: 1.0
Precision 1: 0.8909090909090909
Precision 2: 1.0
Precision 3: 0.9423076923076923
Precision 4: 0.9655172413793104
Precision 5: 0.9791666666666666
Precision 6: 0.9791666666666666
Precision 7: 1.0
Precision 8: 0.8387096774193549
Precision 9: 0.9259259259259259
Recall 0: 1.0
Recall 1: 0.9423076923076923
Recall 2: 0.9636363636363636
Recall 3: 0.9607843137254902
Recall 4: 0.9824561403508771
Recall 5: 0.8245614035087719
Recall 6: 0.9791666666666666
Recall 7: 0.9607843137254902
Recall 8: 0.9454545454545454
Recall 9: 0.9433962264150944
