In [7]:
! [ ! -z "$COLAB_GPU" ] && pip install torch scikit-learn==0.20.* skorch

Collecting scikit-learn==0.20.*
[?25l  Downloading https://files.pythonhosted.org/packages/96/5b/5da31a6572dc6b7b2846a7cfcbe2e060a0e6af0e1059a6516965e40371b7/scikit_learn-0.20.4-cp36-cp36m-manylinux1_x86_64.whl (5.4MB)
[K     |████████████████████████████████| 5.4MB 2.7MB/s 
[?25hCollecting skorch
[?25l  Downloading https://files.pythonhosted.org/packages/fb/1e/cc4e1f23cd1faab06672f309e0857294aaa80c5f84670f4d3d19b08ab10b/skorch-0.7.0-py3-none-any.whl (105kB)
[K     |████████████████████████████████| 112kB 33.1MB/s 
Installing collected packages: scikit-learn, skorch
  Found existing installation: scikit-learn 0.22.1
    Uninstalling scikit-learn-0.22.1:
      Successfully uninstalled scikit-learn-0.22.1
Successfully installed scikit-learn-0.20.4 skorch-0.7.0


In [0]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt

In [0]:
mnist = fetch_openml('mnist_784', cache=False)

In [0]:
X = mnist.data.astype('float32')

y = mnist.target.astype('int64')

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [27]:
X_train.shape, y_train.shape ,X_test.shape, y_test.shape

((52500, 784), (52500,), (17500, 784), (17500,))

In [0]:
m_train=np.zeros((52500, 28,28))
for i in range (52500):
    m_train[i]=np.array(X_train[i].reshape(28,28))

m_test=np.zeros((17500, 28,28))
for i in range (17500):
    m_test[i]=np.array(X_test[i].reshape(28,28))
    



In [0]:
import numpy as np
from conv import Conv3x3
from maxpool import MaxPool2
from softmax import Softmax

# We only use the first 1k examples of each set in the interest of time.
# Feel free to change this if you want.
train_images = m_train[:1000]
train_labels = y_train[:1000]
test_images = m_test[:1000]
test_labels = y_test[:1000]

In [81]:
conv = Conv3x3(8)                  # 28x28x1 -> 26x26x8
pool = MaxPool2()                  # 26x26x8 -> 13x13x8
softmax = Softmax(13 * 13 * 8, 10) # 13x13x8 -> 10

def forward(image, label):
  '''
  Completes a forward pass of the CNN and calculates the accuracy and
  cross-entropy loss.
  - image is a 2d numpy array
  - label is a digit
  '''
  # We transform the image from [0, 255] to [-0.5, 0.5] to make it easier
  # to work with. This is standard practice.
  out = conv.forward((image / 255) - 0.5)
  out = pool.forward(out)
  out = softmax.forward(out)

  # Calculate cross-entropy loss and accuracy. np.log() is the natural log.
  loss = -np.log(out[label])
  acc = 1 if np.argmax(out) == label else 0

  return out, loss, acc

def train(im, label, lr=.005):
  '''
  Completes a full training step on the given image and label.
  Returns the cross-entropy loss and accuracy.
  - image is a 2d numpy array
  - label is a digit
  - lr is the learning rate
  '''
  # Forward
  
  out, loss, acc = forward(im, label)

  # Calculate initial gradient
  gradient = np.zeros(10)
  gradient[label] = -1 / out[label]

  # Backprop
  gradient = softmax.backprop(gradient, lr)
  gradient = pool.backprop(gradient)
  gradient = conv.backprop(gradient, lr)

  return loss, acc

print('MNIST CNN initialized!')

# Train the CNN for 3 epochs
for epoch in range(3):
  print('--- Epoch %d ---' % (epoch + 1))

  # Shuffle the training data
  permutation = np.random.permutation(len(train_images))
  train_images = train_images[permutation]
  train_labels = train_labels[permutation]

  # Train!
  loss = 0
  num_correct = 0
  for i, (im, label) in enumerate(zip(train_images, train_labels)):
    if i > 0 and i % 100 == 99:
      print(
        '[Step %d] Past 100 steps: Average Loss %.3f | Accuracy: %d%%' %
        (i + 1, loss / 100, num_correct)
      )
      loss = 0
      num_correct = 0

    l, acc = train(im, label)
    loss += l
    num_correct += acc

# Test the CNN
print('\n--- Testing the CNN ---')
loss = 0
num_correct = 0
for im, label in zip(test_images, test_labels):
  _, l, acc = forward(im, label)
  loss += l
  num_correct += acc

num_tests = len(test_images)
print('Test Loss:', loss / num_tests)
print('Test Accuracy:', num_correct / num_tests)

MNIST CNN initialized!
--- Epoch 1 ---
[Step 100] Past 100 steps: Average Loss 2.247 | Accuracy: 14%
[Step 200] Past 100 steps: Average Loss 2.109 | Accuracy: 34%
[Step 300] Past 100 steps: Average Loss 1.605 | Accuracy: 54%
[Step 400] Past 100 steps: Average Loss 1.150 | Accuracy: 64%
[Step 500] Past 100 steps: Average Loss 1.063 | Accuracy: 64%
[Step 600] Past 100 steps: Average Loss 0.903 | Accuracy: 68%
[Step 700] Past 100 steps: Average Loss 0.838 | Accuracy: 68%
[Step 800] Past 100 steps: Average Loss 0.872 | Accuracy: 73%
[Step 900] Past 100 steps: Average Loss 0.623 | Accuracy: 81%
[Step 1000] Past 100 steps: Average Loss 0.623 | Accuracy: 80%
--- Epoch 2 ---
[Step 100] Past 100 steps: Average Loss 0.507 | Accuracy: 82%
[Step 200] Past 100 steps: Average Loss 0.533 | Accuracy: 83%
[Step 300] Past 100 steps: Average Loss 0.688 | Accuracy: 79%
[Step 400] Past 100 steps: Average Loss 0.605 | Accuracy: 81%
[Step 500] Past 100 steps: Average Loss 0.543 | Accuracy: 83%
[Step 600] Pas