In [89]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.datasets import load_digits
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, LogisticRegression

In [79]:
digitsDS = load_digits()
print(digitsDS.data.shape)
print(digitsDS.feature_names)

(1797, 64)
['pixel_0_0', 'pixel_0_1', 'pixel_0_2', 'pixel_0_3', 'pixel_0_4', 'pixel_0_5', 'pixel_0_6', 'pixel_0_7', 'pixel_1_0', 'pixel_1_1', 'pixel_1_2', 'pixel_1_3', 'pixel_1_4', 'pixel_1_5', 'pixel_1_6', 'pixel_1_7', 'pixel_2_0', 'pixel_2_1', 'pixel_2_2', 'pixel_2_3', 'pixel_2_4', 'pixel_2_5', 'pixel_2_6', 'pixel_2_7', 'pixel_3_0', 'pixel_3_1', 'pixel_3_2', 'pixel_3_3', 'pixel_3_4', 'pixel_3_5', 'pixel_3_6', 'pixel_3_7', 'pixel_4_0', 'pixel_4_1', 'pixel_4_2', 'pixel_4_3', 'pixel_4_4', 'pixel_4_5', 'pixel_4_6', 'pixel_4_7', 'pixel_5_0', 'pixel_5_1', 'pixel_5_2', 'pixel_5_3', 'pixel_5_4', 'pixel_5_5', 'pixel_5_6', 'pixel_5_7', 'pixel_6_0', 'pixel_6_1', 'pixel_6_2', 'pixel_6_3', 'pixel_6_4', 'pixel_6_5', 'pixel_6_6', 'pixel_6_7', 'pixel_7_0', 'pixel_7_1', 'pixel_7_2', 'pixel_7_3', 'pixel_7_4', 'pixel_7_5', 'pixel_7_6', 'pixel_7_7']


In [80]:
df = pd.DataFrame(digitsDS.data, columns=digitsDS.feature_names)
df['target'] = digitsDS.target
df

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7,target
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,2
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,3
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1792,0.0,0.0,4.0,10.0,13.0,6.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,2.0,14.0,15.0,9.0,0.0,0.0,9
1793,0.0,0.0,6.0,16.0,13.0,11.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,16.0,14.0,6.0,0.0,0.0,0
1794,0.0,0.0,1.0,11.0,15.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,9.0,13.0,6.0,0.0,0.0,8
1795,0.0,0.0,2.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,5.0,12.0,16.0,12.0,0.0,0.0,9


In [81]:
# split data and train
X_train, X_test, y_train, y_test = train_test_split(digitsDS.data, digitsDS.target, test_size=0.4, random_state=0)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(1078, 64) (719, 64) (1078,) (719,)


In [82]:
# K Nearest Neighbors
knn = KNeighborsClassifier(n_neighbors=3, weights='uniform')

# fit data
knn.fit(X_train, y_train)

# predict
knn_pred = knn.predict(X_test)

print("Accuracy: \n", metrics.accuracy_score(y_test, knn_pred))
print("Precision: \n", metrics.precision_score(y_test, knn_pred, average="weighted"))
print("Recall: \n", metrics.recall_score(y_test, knn_pred, average="weighted"))
print("F1 Score: \n", metrics.f1_score(y_test, knn_pred, average="weighted"))


Accuracy: 
 0.9847009735744089
Precision: 
 0.985077118872961
Recall: 
 0.9847009735744089
F1 Score: 
 0.9846544130723759


In [83]:
#rnn
rnn = RadiusNeighborsClassifier(radius=35)
rnn.fit(X_train, y_train)
rnn_pred = rnn.predict(X_test)

print("Accuracy: \n", metrics.accuracy_score(y_test, rnn_pred))
print("Precision: \n", metrics.precision_score(y_test, rnn_pred, average="weighted"))
print("Recall: \n", metrics.recall_score(y_test, rnn_pred, average="weighted"))
print("F1 Score: \n", metrics.f1_score(y_test, rnn_pred, average="weighted"))
# print("Confusion Matrix: \n", metrics.confusion_matrix(y_test, rnn_pred))

Accuracy: 
 0.9360222531293463
Precision: 
 0.9392185579634502
Recall: 
 0.9360222531293463
F1 Score: 
 0.9353859101709487


In [84]:
# logistical regression
clf_lr = LogisticRegression(max_iter=4000)
clf_lr.fit(digitsDS.data, digitsDS.target)
clf_lr.fit(X_train, y_train)
lr_pred = clf_lr.predict(X_test)

print("Accuracy: \n", metrics.accuracy_score(y_test, lr_pred))
print("Precision: \n", metrics.precision_score(y_test, lr_pred, average="weighted"))
print("Recall: \n", metrics.recall_score(y_test, lr_pred, average="weighted"))
print("F1 Score: \n", metrics.f1_score(y_test, lr_pred, average="weighted"))

Accuracy: 
 0.9582753824756607
Precision: 
 0.9590172133048501
Recall: 
 0.9582753824756607
F1 Score: 
 0.9583999579547536


In [85]:
# svm linear
clf_svm = SVC(kernel='linear')
clf_svm.fit(X_train, y_train)

svm_pred = clf_svm.predict(X_test)

print("Accuracy: \n", metrics.accuracy_score(y_test, svm_pred))
print("Precision: \n", metrics.precision_score(y_test, svm_pred, average="weighted"))
print("Recall: \n", metrics.recall_score(y_test, svm_pred, average="weighted"))
print("F1 Score: \n", metrics.f1_score(y_test, svm_pred, average="weighted"))

Accuracy: 
 0.9749652294853964
Precision: 
 0.9751589401502595
Recall: 
 0.9749652294853964
F1 Score: 
 0.9749433392708488


In [86]:
# Decision Tree
clf_dt = DecisionTreeClassifier()
clf_dt.fit(X_train, y_train)
dt_pred = clf_dt.predict(X_test)

print("Accuracy: \n", metrics.accuracy_score(y_test, dt_pred))
print("Precision: \n", metrics.precision_score(y_test, dt_pred, average="weighted"))
print("Recall: \n", metrics.recall_score(y_test, dt_pred, average="weighted"))
print("F1 Score: \n", metrics.f1_score(y_test, dt_pred, average="weighted"))

Accuracy: 
 0.8428372739916551
Precision: 
 0.8424748088633742
Recall: 
 0.8428372739916551
F1 Score: 
 0.8411153277008628


In [87]:
# Random Forest
clf_rf = RandomForestClassifier(n_estimators=50)
clf_rf.fit(X_train, y_train)
rf_pred = clf_rf.predict(X_test)

print("Accuracy: \n", metrics.accuracy_score(y_test, rf_pred))
print("Precision: \n", metrics.precision_score(y_test, rf_pred, average="weighted"))
print("Recall: \n", metrics.recall_score(y_test, rf_pred, average="weighted"))
print("F1 Score: \n", metrics.f1_score(y_test, rf_pred, average="weighted"))

Accuracy: 
 0.9666203059805285
Precision: 
 0.9677530140067825
Recall: 
 0.9666203059805285
F1 Score: 
 0.966665199275995


In [88]:
# svm rbf
clf_svc = SVC(kernel='rbf')

#train using the training dataset
clf_svc.fit(X_train, y_train)

# predict values and calculate accuracy and confusion matrix
svm_pred = clf_svc.predict(X_test)


print("Accuracy: \n", metrics.accuracy_score(y_test, svm_pred))
print("Precision: \n", metrics.precision_score(y_test, svm_pred, average="weighted"))
print("Recall: \n", metrics.recall_score(y_test, svm_pred, average="weighted"))
print("F1 Score: \n", metrics.f1_score(y_test, svm_pred, average="weighted"))

Accuracy: 
 0.9888734353268428
Precision: 
 0.9890042703800846
Recall: 
 0.9888734353268428
F1 Score: 
 0.9888751997014


In [90]:
# svm rbf or non-linear was determined to be the best 
cv_scores = cross_val_score(clf_svc, digitsDS.data, digitsDS.target, cv=5, scoring='accuracy' )

# show average kfolds accuracy score
print("Average Accuracy: \n", cv_scores.mean())


Average Accuracy: 
 0.9632838130609718
