In [2]:
# Create neural network from scratch using numpy

import numpy as np
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
import sklearn.linear_model
import scipy.io
import math



In [3]:
# Display plots inline and change default figure size
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'


In [4]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.W1 = np.random.randn(self.input_size, self.hidden_size) / np.sqrt(self.input_size)
        self.b1 = np.zeros((1, self.hidden_size))
        self.W2 = np.random.randn(self.hidden_size, self.output_size) / np.sqrt(self.hidden_size)
        self.b2 = np.zeros((1, self.output_size))
        
    def forward(self, X):
        self.z = np.dot(X, self.W1) + self.b1
        self.h = np.tanh(self.z)
        self.z2 = np.dot(self.h, self.W2) + self.b2
        exp_scores = np.exp(self.z2)
        self.probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
        return self.probs
    
    def backward(self, X, y, learning_rate):
        delta3 = self.probs
        delta3[range(X.shape[0]), y] -= 1
        dW2 = (self.h.T).dot(delta3)
        db2 = np.sum(delta3, axis=0, keepdims=True)
        delta2 = delta3.dot(self.W2.T) * (1 - np.power(self.h, 2))
        dW1 = np.dot(X.T, delta2)
        db1 = np.sum(delta2, axis=0)
        self.W1 += -learning_rate * dW1
        self.b1 += -learning_rate * db1
        self.W2 += -learning_rate * dW2
        self.b2 += -learning_rate * db2
        
    def calculate_loss(self, X, y):
        num_examples = X.shape[0]
        probs = self.forward(X)
        corect_logprobs = -np.log(probs[range(num_examples), y])
        data_loss = np.sum(corect_logprobs)
        return 1./num_examples * data_loss
    
    def predict(self, X):
        probs = self.forward(X)
        return np.argmax(probs, axis=1)
    
    def build_model(self, X, y, learning_rate, num_passes=20000, print_loss=False):
        np.random.seed(0)

In [6]:
# Import MNIST data
import keras.datasets.mnist as mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Reshape the training and test examples
X_train_flatten = X_train.reshape(X_train.shape[0], -1).T
X_test_flatten = X_test.reshape(X_test.shape[0], -1).T

# Normalize image vectors
X_train = X_train_flatten/255.
X_test = X_test_flatten/255.

# Convert training and test labels to one hot matrices
y_train = y_train.reshape(1, y_train.shape[0])
y_test = y_test.reshape(1, y_test.shape[0])

# One hot encoding
y_train = np.eye(10)[y_train.reshape(-1)].T
y_test = np.eye(10)[y_test.reshape(-1)].T


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [7]:
# Build a model with a 3-dimensional hidden layer
model = NeuralNetwork(input_size=784, hidden_size=100, output_size=10)
model.build_model(X_train, y_train, learning_rate=0.01, num_passes=20000, print_loss=True)


ValueError: shapes (784,60000) and (784,100) not aligned: 60000 (dim 1) != 784 (dim 0)