In [1]:
from sklearn.datasets import load_digits
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
digits = load_digits()
target = pd.Series(digits.target)
data = pd.DataFrame(digits.data)

In [3]:
_, axes = plt.subplots(2, 4)
axes[0,0].imshow(data.iloc[0].values.reshape(8,8), cmap='gray_r')
axes[0,1].imshow(data.iloc[99].values.reshape(8,8), cmap='gray_r')
axes[0,2].imshow(data.iloc[199].values.reshape(8,8), cmap='gray_r')
axes[0,3].imshow(data.iloc[299].values.reshape(8,8), cmap='gray_r')
axes[1,0].imshow(data.iloc[999].values.reshape(8,8), cmap='gray_r')
axes[1,1].imshow(data.iloc[1099].values.reshape(8,8), cmap='gray_r')
axes[1,2].imshow(data.iloc[1199].values.reshape(8,8), cmap='gray_r')
axes[1,3].imshow(data.iloc[1299].values.reshape(8,8), cmap='gray_r')

<matplotlib.image.AxesImage at 0x7f4137055ef0>

In [4]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import KFold

In [5]:
def train(k, train_features, train_labels):
    knn = KNeighborsClassifier(n_neighbors = k)
    knn.fit(train_features, train_labels)
    return knn

def test(model, test_features, test_labels):
    predictions = model.predict(test_features)
    train_test_df = pd.DataFrame()
    train_test_df['correct_label'] = test_labels
    train_test_df['predicted_label'] = predictions
    accuracy = sum(train_test_df["predicted_label"] == train_test_df["correct_label"])/len(train_test_df)    
    return accuracy

def cross_validate(k):
    fold_accuracies = []
    kf = KFold(n_splits = 4, random_state=2)
    for train_index, test_index in kf.split(data):
        train_features, test_features = data.loc[train_index], data.loc[test_index]
        train_labels, test_labels = target.loc[train_index], target.loc[test_index]
        model = train(k, train_features, train_labels)
        overall_accuracy = test(model, test_features, test_labels)
        fold_accuracies.append(overall_accuracy)
    return fold_accuracies
        
knn_one_accuracies = cross_validate(1)
np.mean(knn_one_accuracies)

0.9677233358079684

In [6]:
k_values = list(range(1,10))
k_overall_accuracies = []

for k in k_values:
    k_accuracies = cross_validate(k)
    k_mean_accuracy = np.mean(k_accuracies)
    k_overall_accuracies.append(k_mean_accuracy)
    
plt.figure(figsize=(8,4))
plt.title("Mean Accuracy vs. k")
plt.plot(k_values, k_overall_accuracies)

[<matplotlib.lines.Line2D at 0x7f4133736c50>]

In [None]:
from sklearn.neural_network import MLPClassifier
def train_nn(neuron_size, train_features, train_labels):
    mlp = MLPClassifier(hidden_layer_sizes=neuron_size, activation='logistic')
    mlp.fit(train_features, train_labels)
    return mlp

def test(model, test_features, test_labels):
    predictions = model.predict(test_features)
    train_test_df = pd.DataFrame()
    train_test_df['correct_label'] = test_labels
    train_test_df['predicted_label'] = predictions
    accuracy = sum(train_test_df["predicted_label"] == train_test_df["correct_label"])/len(train_test_df)    
    return accuracy

def cross_validate(k):
    fold_accuracies = []
    kf = KFold(n_splits = 4, random_state=2)
    for train_index, test_index in kf.split(data):
        train_features, test_features = data.loc[train_index], data.loc[test_index]
        train_labels, test_labels = target.loc[train_index], target.loc[test_index]
        model = train_nn(k, train_features, train_labels)
        overall_accuracy = test(model, test_features, test_labels)
        fold_accuracies.append(overall_accuracy)
    return fold_accuracies

In [None]:
neurons = [
    (8,),(16,),(32,), (64,), (128,), (256,)
]
nn_one_layer_accuracies = []

for n in neurons:
    nn_accuracies = cross_validate(n)
    nn_mean_accuracy = np.mean(nn_accuracies)
    nn_one_layer_accuracies.append(nn_mean_accuracy)

plt.figure(figsize=(8,4))
plt.title("Mean Accuracy vs. Neurons In Single Hidden Layer")

x = [i[0] for i in neurons]
plt.plot(x, nn_one_layer_accuracies)


Stochastic Optimizer: Maximum iterations reached and the optimization hasn't converged yet.



In [None]:
neurons = [
     (64,64), (128,128), (256,256)
]
nn_two_layer_accuracies = []

for n in neurons:
    nn_accuracies = cross_validate(n)
    nn_mean_accuracy = np.mean(nn_accuracies)
    nn_two_layer_accuracies.append(nn_mean_accuracy)

plt.figure(figsize=(8,4))
plt.title("Mean Accuracy vs. Neurons In Two Hidden Layers")

x = [i[0] for i in neurons]
plt.plot(x, nn_two_layer_accuracies)

In [None]:
def cross_validate_six(k):
    fold_accuracies = []
    kf = KFold(n_splits = 6, random_state=2)
    for train_index, test_index in kf.split(data):
        train_features, test_features = data.loc[train_index], data.loc[test_index]
        train_labels, test_labels = target.loc[train_index], target.loc[test_index]
        model = train_nn(k, train_features, train_labels)
        overall_accuracy = test(model, test_features, test_labels)
        fold_accuracies.append(overall_accuracy)
    return fold_accuracies

In [None]:
neurons = [
     (10,10,10),(64,64,64), (128,128,128)
]
nn_three_layer_accuracies = []

for n in neurons:
    nn_accuracies = cross_validate_six(n)
    nn_mean_accuracy = np.mean(nn_accuracies)
    nn_three_layer_accuracies.append(nn_mean_accuracy)

plt.figure(figsize=(8,4))
plt.title("Mean Accuracy vs. Neurons In Three Hidden Layers")

x = [i[0] for i in neurons]
plt.plot(x, nn_three_layer_accuracies)