In [None]:
#loading the sklearn.datasets module
from sklearn.datasets import load_digits

digits = load_digits()

#print(digits.DESCR)

#checking the sample and target sizes
digits.target[::100]

digits.data.shape

digits.target.shape

digits.images[13]

#returning preprocessed data 
digits.data[13]
#creating the image diagrams
import matplotlib.pyplot as plt

figure, axes = plt.subplots(nrows=4, ncols=6, figsize=(6, 4))

for item in zip(axes.ravel(), digits.images, digits.target):
    axes, image, target = item
    axes.imshow(image, cmap=plt.cm.gray_r)
    axes.set_xticks([])
    axes.set_yticks([])
    axes.set_title(target)
plt.tight_layout()

#Splitting the data for training.
#Here we break our data into training and testing data and use the train_tes_split function
#from sklearn.model_selection to shuffle the data and randomize it and splits the samples in the data array 
#and target in the target array into training and testing sets.
from sklearn.model_selection import train_test_split

#By convention, X is used to represent the sample values and y is used to represent the target
#values
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, random_state=11)

#traing amd testing set sizes
X_train.shape
X_test.shape

#Creating the model
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier()
#Training the model by invoking the fit method that loads the sample and target train sets
knn.fit(X=X_train,y=y_train)

#predicting digit classes
predicted = knn.predict(X=X_test)
expected = y_test

#predicted digits vs expected digits for the first 20 digits
predicted[:20]
expected[:20]

#A list comprehension to locate all the wrong predictions for the entire set
wrong = [(p,e) for (p,e) in zip(predicted, expected) if p != e]

wrong


In [None]:
#Measuring the accuracy of our estiamtor using the estimator method score and confusion matrix method
#the estimator method score
print(f'{knn.score(X_test,y_test):.2%}')

#confusion matrix, this shows the correct and incorrect predicted values for a given class
from sklearn.metrics import confusion_matrix

confusion = confusion_matrix(y_true=expected, y_pred=predicted)
confusion

from sklearn.metrics import classification_report
names =[str(digit) for digit in digits.target_names]
print(classification_report(expected, predicted, target_names=names))

#visualizing the confusion matrix
import pandas as pd 
import seaborn as sns

confusion_df = pd.DataFrame(confusion, index=range(10), columns=range(10))

axes = sns.heatmap(confusion_df, annot=True, cmap='nipy_spectral_r')



In [None]:
#K-Fold Cross-Validation, this enables you to use all of your data for both training and testing
#to get a better sense of how your model will make prediction by repeatedly training and testing
#the model with new portions of datasets it has not seen before.
from sklearn.model_selection import KFold,cross_val_score


kfold = KFold(n_splits=10, random_state=11, shuffle=True)

scores = cross_val_score(estimator=knn, X=digits.data, y=digits.target, cv=kfold)

print(scores)
#calculating the mean accuracy score and standard deviation among 10 accuracy scores
print(f'Mean Accuracy:{scores.mean():.2%}')

print(f'Standard Deviation:{scores.std():.2%}')

In [None]:
#Running multiple models to find the best one
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

estimators = {
    'KNeighborsClassifier':knn,
    'SVC':SVC(gamma='scale'),
    'GaussianNB':GaussianNB()
}

#Executing the models
for estimator_name, estimator_object in estimators.items():
    kfold = KFold(n_splits=10, random_state=11, shuffle=True)
    scores = cross_val_score(estimator=estimator_object, X=digits.data, y=digits.target, cv=kfold)
    print(f'{estimator_name:>20}: ' +
    f'Mean Accuracy= {scores.mean():.2%}; ' +
    f'Standard Deviation= {scores.std():.2%}' )

#Hyperparameter Tuning
for k in range(1, 20, 2):
    kfold = KFold(n_splits=10, random_state=11, shuffle=True)
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(estimator=knn, X=digits.data, y=digits.target, cv=kfold)
    print(f'k={k:<2}; mean accuracy={scores.mean():.2%}; ' + 
    f'standard deviation ={scores.std():.2%}'
    )
