In [28]:
!pip install tensorflow



In [79]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.sparse as sparse

In [65]:
from sklearn.datasets import fetch_openml

In [66]:
mnist = fetch_openml('mnist_784', version = 1)

In [67]:
X, Y = mnist["data"]/255, mnist["target"]

x_train, x_test, y_train, y_test = X[:60000], X[60000:], Y[:60000].astype(int), Y[60000:].astype(int)

In [68]:
y_train[0]

5

In [70]:
x_train.shape

(60000, 784)

In [71]:
def augment_feature_vector(X):
    return np.hstack((np.ones([len(X), 1]), X))

# Implementing Softmax regression from scratch using numpy 

In [72]:
#defining the softmax function along with the parameters
def vectorized_compute_probabilities(X, theta):
    
    theta_XT = np.matmul(theta, np.transpose(X))
    #taking a columnwise max:
    c = np.amax(theta_XT, axis = 0)
    #elementwise exponentiation of theta_XT:
    exp_matrix = np.exp(theta_XT - c)
    #computing the normalization factors for each column of H:
    sum_vector = np.sum(exp_matrix, axis = 0)
    
    #broadcasting!
    return exp_matrix/sum_vector
    

In [73]:
def gradient_descent_iteration(X, Y, theta, alpha, lambda_factor):
    
    n = len(Y)
    k = theta.shape[0]
    data = [1]*n
    
    H = vectorized_compute_probabilities(X, theta)
    #more efficient way to implement large sparse arrays:
    M = sparse.coo_matrix((data, (Y, range(n))), shape=(k,n)).toarray()
    
    first_term = np.matmul(M-H, X)*(-1/n)
    second_term = lambda_factor * theta

    return theta - alpha * (first_term + second_term)

In [74]:
def predict(X, theta):

    X = augment_feature_vector(X)
    probabilities = vectorized_compute_probabilities(X, theta)
    return np.argmax(probabilities, axis = 0)

In [75]:
def compute_accuracy(X, Y, theta):
    predictions = predict(X, theta)
    return np.mean(predictions == Y)

In [76]:
def softmax_regression(X, Y, alpha, lambda_factor, k, num_iterations):   
    
    X = augment_feature_vector(X)
    theta = np.zeros([k, X.shape[1]])
    for i in range(num_iterations):
        theta = gradient_descent_iteration(X, Y, theta, alpha, lambda_factor)
    
    return theta

In [77]:
theta_final = softmax_regression(x_train, y_train, alpha = .3, lambda_factor = 1.0e-4, k = 10, num_iterations = 1000)

In [78]:
compute_accuracy(x_test, y_test, theta_final)

0.9193

# Implementing a CNN based model from scratch

In [95]:
import tensorflow as tf
from tensorflow.keras import layers,models
from tensorflow import keras

In [108]:
(X_train, y_train) , (X_test, y_test) = keras.datasets.mnist.load_data()

In [109]:
X_train=X_train/255
X_test=X_test/255

In [110]:
X_train.reshape(-1,28,28,1)

array([[[[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        ...,

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]]],


       [[[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        ...,

        [[0.],
 

In [111]:
X_test.reshape(-1,28,28,1)

array([[[[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        ...,

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]]],


       [[[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        ...,

        [[0.],
 

In [112]:
convolutional_neural_network = models.Sequential([
    layers.Conv2D(filters=25, kernel_size=(3, 3), activation='relu', input_shape=(28,28,1)), 
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

In [114]:
convolutional_neural_network.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
convolutional_neural_network.fit(X_train, y_train, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x2615da96340>

In [115]:
convolutional_neural_network.evaluate(X_test, y_test)



[0.06678522378206253, 0.9901000261306763]