In [1]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml("mnist_784", version=1)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [2]:
import numpy as np
import matplotlib.pyplot as plt

X = mnist["data"]
y = mnist["target"].astype(np.uint8)

In [17]:
DIGIT_PX_COUNT = 784
DIGIT_COUNT = 10

# Create a Test set to set aside by splitting data into training and test sets
# Note that the MNIST dataset is already split into a training set of the first 60,000 images and a test set of the final 10,000 images
X_train = X[:60000]
y_train = y[:60000]

X_test = X[60000:]
y_test = y[60000:]

# But wait! If we want the output layer to contain probabilities of each digit, then each row of y must contain the "yes" or "no" of each digit
# also keep in mind that in python, True == 1 and False == 0
# Remember, with tensorflow, numpy array in, tensorflow tensor out
y_digit_ids = np.array([[(digit == y_i) for digit in range(0, DIGIT_COUNT)] for y_i in y])
y_train_digit_ids = y_digit_ids[:60000]
y_test_digit_ids = y_digit_ids[60000:]

In [18]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.losses import BinaryCrossentropy

network = Sequential([
    Dense(units=DIGIT_PX_COUNT, activation='sigmoid'), # [0]: input layer
    Dense(units=20, activation='sigmoid'),             # [1]: hidden layer 1
    Dense(units=16, activation='sigmoid'),             # [2]: hidden layer 2
    Dense(units=DIGIT_COUNT, activation='sigmoid')     # [3]: output layer
])

network.compile(loss=BinaryCrossentropy())

# fit the model
network.fit(X_train, y_train_digit_ids)



<keras.callbacks.History at 0x1e58d045660>

In [19]:
def network_predict(digit):
    digit_probs = network.predict(tf.convert_to_tensor([digit], dtype=tf.float64))[0]
    
    index = 0
    maximum = digit_probs[0]
    for i in range(1, len(digit_probs)):
        prob = digit_probs[i]
        if prob > maximum:
            index = i
            maximum = prob
    
    print(digit_probs)
    print(index)
    return index

In [28]:
index = 7
print(y[index])
network_predict(X.to_numpy()[index])

3
[5.0077047e-02 3.6511976e-02 7.0329462e-03 6.5795869e-01 6.0629961e-04
 1.3208008e-01 4.1289948e-04 1.1321469e-02 1.2653554e-02 6.0194153e-03]
3


3

In [57]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=4, weights='distance')
knn.fit(X_train, y_train)

In [37]:
knn.predict([X.to_numpy()[index]])



array([3], dtype=uint8)

In [59]:
from sklearn.model_selection import cross_val_score

cross_val_score(knn, X_test, y_test, cv=3, scoring="accuracy")

array([0.91841632, 0.9429943 , 0.96189619])

In [82]:
# Exercise 1 complete. Now for exercise 2
from scipy.ndimage import shift

X_train_shifted, X_test_shifted = [], [] # contains all of the training set but with 4 copies containing a shifted version of each image
y_train_shifted, y_test_shifted = [], []
shifts = [[1, 0],
          [-1, 0],
          [0, -1],
          [0, 1]]
for X_train_i, y_train_i, X_test_i, y_test_i in zip(X_train.to_numpy(), y_train, X_test.to_numpy(), y_test):
    X_train_shifted.append(X_train_i)
    y_train_shifted.append(y_train_i)
    
    X_test_shifted.append(X_test_i)
    y_test_shifted.append(y_test_i)
    for i in range(len(shifts)):
        X_train_shifted.append(shift(X_train_i.reshape(28, 28), shifts[i], cval=0).reshape(784,))
        y_train_shifted.append(y_train_i)
        
        X_test_shifted.append(shift(X_test_i.reshape(28, 28), shifts[i], cval=0).reshape(784,))
        y_test_shifted.append(y_test_i)
    
    
knn.fit(X_train_shifted, y_train_shifted)

In [83]:
cross_val_score(knn, X_test_shifted, y_test_shifted, cv=3, scoring="accuracy")

array([0.91954161, 0.94564109, 0.96705868])