In [1]:
import numpy as np
import pandas as pd

from models.neural_networks import *

In [2]:
data_file = pd.read_csv("../data/cnn/mnist_train.csv", header=None)
x_data = []
y_data = []
for line_number in range(len(data_file)):
    line = data_file.iloc[line_number, :]
    image = np.array(line.iloc[1:]) / 255.
    label = np.zeros(10)
    label[int(line.iloc[0])] = 1
    y_data.append(label)
    x_data.append(image)

x_data = np.array(x_data)
y_data = np.array(y_data)


idx = np.random.permutation(len(x_data))[:5000]
x_data = x_data[idx]
y_data = y_data[idx]
print(x_data.shape)
print(y_data.shape)

x_data = x_data.reshape([len(x_data), 28, 28, 1])

(500, 784)
(500, 10)


In [4]:
## import your own implementation of softmax
from scipy.special import softmax

LEARNING_RATE = 0.1
LAMBDA = 0.01
EPOCHS = 50

class Softmax(ActivationFunctionsForNN.BaseActivationFunctionForNN):
    def __init__(self):
        pass

    def forward(self, x):
        return softmax(x)

    def backward(self, x):
        return x

cnn = NeuralNetworkModel()

layer1 = ConvolutionalLayer(kernel_size=3, padding=0, stride=1, input_channels=1, output_channels=10,
            learning_rate=LEARNING_RATE, lambda_regularization=LAMBDA)  # -> 26x26x10
layer2=PoolingLayer(stride=2, use_mean_pooling=True)  # -> 13x13x10
layer3 = ConvolutionalLayer(kernel_size=5, padding=0, stride=1, input_channels=10, output_channels=16,
            learning_rate=LEARNING_RATE, lambda_regularization=LAMBDA)  # -> 9x9x16
layer4 = FlattenLayer() # size is automatically calculated -> mx1296
layer5 = LinearLayer(input_shape=1296, output_shape=100,
            learning_rate=LEARNING_RATE, lambda_regularization=LAMBDA) # -> mx100
layer6 = LinearLayer(input_shape=100, output_shape=10,
            learning_rate=LEARNING_RATE/10, activation_function=Softmax(),
                     lambda_regularization=LAMBDA)  # -> mx10

cnn.add_layer(layer1)
cnn.add_layer(layer2)
cnn.add_layer(layer3)
cnn.add_layer(layer4)
cnn.add_layer(layer5)
cnn.add_layer(layer6)

#cnn.loadWeights('cnn_weights.npz')

import time
t1=time.time()
print(f'Number of parameters: {cnn.get_number_of_parameters()}')
costs = cnn.learn(x_data, y_data, EPOCHS, 64)
t2=time.time()
print('time=',t2-t1)

#cnn.saveWeights('cnn_weights.npz')

n_train = x_data.shape[0]
n_correct = 0
for i in range(n_train):
    xi = x_data[i]
    yi = y_data[i]
    yi = np.argmax(yi)
    yhati = cnn.infer(xi)
    yhati = np.argmax(yhati)
    #print('i =', i, 'yi =', yi, 'yhati =', yhati)
    if yhati == yi:
        n_correct += 1

print('n_correct =', n_correct, 'rate =', n_correct/n_train)

import matplotlib.pyplot as plt
figure = plt.figure(figsize=(12, 10), dpi=100)
ax = figure.add_subplot(111)
ax.plot(costs)
figure.show()

Number of parameters: 134826


 25%|██▌       | 2/8 [00:08<00:24,  4.12s/it]


KeyboardInterrupt: 