# Mini Project 6

Exploring Neural Networks and Learning Google Collab

Idea: Use random-generated noise (NaN) entries as well to see if it 1. improves the results and 2. Improves the intuitive 'picture' from the visualized weights

In [0]:
# imports and set-up for session

import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA

## Kaggle Setup and Load Files from API

Commented out so full run does not re-load

In [3]:
# Upload API key from local drive
from google.colab import files
files.upload()

# Kaggle API install
!pip install -q kaggle
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json

# Load the data
!kaggle competitions download -c mnist-digit-classification-2
!mkdir mnist
!unzip mnist_train.csv.zip
!unzip mnist_test.csv.zip
!mv mnist_train.csv mnist 
!mv mnist_test.csv mnist 
!mv mnist_sample.csv mnist
!rm mnist_train.csv.zip
!rm mnist_test.csv.zip
!ls mnist

Saving kaggle.json to kaggle.json
Downloading mnist_sample.csv to /content
  0% 0.00/77.1k [00:00<?, ?B/s]
100% 77.1k/77.1k [00:00<00:00, 27.1MB/s]
Downloading mnist_test.csv.zip to /content
  0% 0.00/2.12M [00:00<?, ?B/s]
100% 2.12M/2.12M [00:00<00:00, 113MB/s]
Downloading mnist_train.csv.zip to /content
 78% 10.0M/12.8M [00:00<00:00, 19.5MB/s]
100% 12.8M/12.8M [00:00<00:00, 29.5MB/s]
Archive:  mnist_train.csv.zip
  inflating: mnist_train.csv         
Archive:  mnist_test.csv.zip
  inflating: mnist_test.csv          
mnist_sample.csv  mnist_test.csv  mnist_train.csv


## Load Training Data Set

Original first

In [4]:
training = pd.read_csv('mnist/mnist_train.csv', index_col=0)
train_X = training.drop('Category',axis=1).values
train_y = training['Category'].values
print(training.shape, train_X.shape, train_y.shape)

(60000, 785) (60000, 784) (60000,)


In [0]:
# conduct PCA
pca = PCA()
pca_X = pca.fit_transform(train_X)
components = len(pca.explained_variance_ratio_[pca.explained_variance_ratio_> 1e-20])

pca_X = pca_X[:,:components]

Create random, unrelated portion. Category NaN (label is 10).
Hope is that training with this will improve generalization by acting as a sort of regularization

In [0]:
num_samples = pd.value_counts(training['Category'].values).max()
rand_X = np.random.randint(0, 256, (num_samples, train_X.shape[1]))
rand_y = np.full((num_samples,), 10) # label 10 means 'not a digit'

full_X = np.concatenate((train_X,rand_X))
full_y = np.concatenate((train_y,rand_y))
ordering = np.random.shuffle(np.arange(0, full_y.shape[0]))
full_X = full_X[ordering][0]
full_y = full_y[ordering][0]

pca_full_X = pca.transform(full_X)[:,:components]

pass

Create one plain and one noised for convolutional network

In [0]:
# reshape and normalize (remember to do the same to the test set)
train_X_2D = train_X.reshape((train_X.shape[0], 28, 28, 1)) / 255.0
full_X_2D = full_X.reshape((full_X.shape[0], 28, 28, 1)) / 255.0

## Create Tensorflow Networks

Build the actual sessions which will be trained and validated. The first without noise input, the second with.

In [0]:
def make_model_basic():
  model_basic = tf.keras.Sequential([
    tf.keras.layers.Dense(1000, activation=tf.nn.relu),
    tf.keras.layers.Dense(640, activation=tf.nn.relu),
    tf.keras.layers.Dense(320, activation=tf.nn.relu),
    tf.keras.layers.Dense(160, activation=tf.nn.relu),
    tf.keras.layers.Dense(80, activation=tf.nn.relu),
    tf.keras.layers.Dense(40, activation=tf.nn.relu),
    tf.keras.layers.Dense(20, activation=tf.nn.relu),
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
  ])
  model_basic.compile(
    optimizer=tf.keras.optimizers.Adadelta(),  # Adam(lr=0.0001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
  )
  return model_basic

def make_model_2D_basic():
  model_basic = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=32, kernel_size=(5,5), padding='Same', activation='relu', input_shape=(28,28,1)),
    tf.keras.layers.Conv2D(filters=32, kernel_size=(5,5), padding='Same', activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=(2,2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='Same', activation='relu'),
    tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='Same', activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation=tf.nn.relu),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
  ])
  model_basic.compile(
    optimizer=tf.keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0),  # Adam(lr=0.0001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
  )
  return model_basic

def make_model_noised():
  model_noised = tf.keras.Sequential([
      tf.keras.layers.Dense(1000, activation=tf.nn.relu),
      tf.keras.layers.Dense(704, activation=tf.nn.relu),
      tf.keras.layers.Dense(352, activation=tf.nn.relu),
      tf.keras.layers.Dense(176, activation=tf.nn.relu),
      tf.keras.layers.Dense(88, activation=tf.nn.relu),
      tf.keras.layers.Dense(44, activation=tf.nn.relu),
      tf.keras.layers.Dense(22, activation=tf.nn.relu),
      tf.keras.layers.Dense(11, activation=tf.nn.softmax)
  ])
  model_noised.compile(
      optimizer=tf.keras.optimizers.Adam(lr=0.0001),
      loss='sparse_categorical_crossentropy',
      metrics=['accuracy']
  )
  return model_noised

def make_model_2D_noised():
  model_noised = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=32, kernel_size=(5,5), padding='Same', activation='relu', input_shape=(28,28,1)),
    tf.keras.layers.Conv2D(filters=32, kernel_size=(5,5), padding='Same', activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=(2,2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='Same', activation='relu'),
    tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='Same', activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation=tf.nn.relu),
    tf.keras.layers.Dense(256, activation=tf.nn.relu),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(11, activation=tf.nn.softmax)
  ])
  model_noised.compile(
    optimizer=tf.keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0),  # Adam(lr=0.0001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
  )
  return model_noised

In [0]:
# helpers for fitting

# annealer
lr_annealing = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_acc', patience=3, factor=0.5, min_lr=0.00001)

# date generator
def make_datagen(data):
    dg = tf.keras.preprocessing.image.ImageDataGenerator(
        featurewise_center=False,
        samplewise_center=False,
        featurewise_std_normalization=False,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range=10,
        zoom_range = 0.1,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=False,
        vertical_flip=False
    )
#     dg.fit(data)
    return dg

## Cross-Validate the Networks

Cross-validation function

In [0]:
def classify_cross_val_score(
  estimator,
  X,
  y,
  cv=4,
  scoring=None,
  fit_params={},
  convert=lambda x:x
):
  score = 0
  for train, test in StratifiedShuffleSplit(cv).split(X,y):
    model = estimator()
    model.fit(X[train], y[train], **fit_params)
    s_part = accuracy_score(y[test], convert(model.predict(X[test])))
    print(s_part)
    score += s_part/cv
  return score

def classify_cross_val_score_generator(
  estimator,
  X,
  y,
  datagen,
  cv=4,
  scoring=None,
  fit_params={},
  convert=lambda x:x,
  batch_size=86
):
  score = 0
  for train, test in StratifiedShuffleSplit(cv).split(X,y):
    model = estimator()
    datagen.fit(X[train])
    model.fit_generator(
        datagen.flow(X[train], y[train], batch_size=batch_size), 
        validation_data=(X[test], y[test]),
        steps_per_epoch=train.shape[0] // batch_size,
        **fit_params
    )
    s_part = accuracy_score(y[test], convert(model.predict(X[test])))
    print(s_part)
    score += s_part/cv
  return score

basic fully-connected

In [0]:
# 5:  0.95879
# 10: 0.99254
# 20: 0.97529
for e in [20]:
  score_basic = classify_cross_val_score(
      make_model_basic,
      train_X,  # pca_X,
      train_y,
      cv=4,
      fit_params={'epochs':e},
      convert = lambda x:np.argmax(x, axis=1)
  )
  print(score_basic)

# model_basic.fit(train_X, train_y, epochs=20)

noised fully-connected

In [0]:
for e in [10]:
  score_noised = classify_cross_val_score(
      make_model_noised,
      full_X,  # pca_full_X,
      full_y,
      cv=4,
      fit_params={'epochs':e},
      convert = lambda x:np.argmax(x, axis=1)
  )
  print(score_noised)

basic CNN

In [0]:
for e in [20]:
  score_basic = classify_cross_val_score_generator(
      make_model_2D_basic,
      train_X_2D,  # pca_X,
      train_y,
      make_datagen(train_X_2D),
      cv=10,
      fit_params={
          'epochs':e,
          'callbacks':[lr_annealing],
      },
      convert = lambda x:np.argmax(x, axis=1)
  )
  print(score_basic)

NaN-added CNN

In [17]:
for e in [20]:
  score_basic = classify_cross_val_score_generator(
      make_model_2D_noised,
      full_X_2D,  # pca_X,
      full_y,
      make_datagen(train_X_2D),
      cv=10,
      fit_params={
          'epochs':e,
          'callbacks':[lr_annealing],
      },
      convert = lambda x:np.argmax(x, axis=1),
      batch_size = 128
  )
  print(score_basic)

Epoch 1/20
 11/469 [..............................] - ETA: 8:45 - loss: 2.3397 - acc: 0.1449

KeyboardInterrupt: ignored

## Final Classification

Train, predict, and save

The CNN with NaN entries performed best in cross-validation; train with that set-up.
Since NaN entries are known not to be in the actual test set, eliminate from predictions

In [18]:
test_X= pd.read_csv('mnist/mnist_test.csv', index_col=0).values
test_pred = pd.read_csv('mnist/mnist_sample.csv', index_col=0)
print(test_X.shape, test_pred.shape)

(10000, 784) (10000, 1)


In [21]:
# train the model
model = make_model_2D_noised()
datagen = make_datagen(full_X_2D)
datagen.fit(full_X_2D)
lr_annealing = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='acc', patience=3, factor=0.5, min_lr=0.00001)

model.fit_generator(
    datagen.flow(train_X_2D, train_y, batch_size=128),
    epochs=30,
    callbacks=[lr_annealing],
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7fb91653fe10>

In [37]:
pred = np.argmax(np.array(model.predict(test_X.reshape(-1,28,28,1)/255))[:,:10], axis=1)
test_pred['Category'] = pred
print(test_pred.head(100))
test_pred.to_csv('mnist_submission.csv', index=True)

     Category
Id           
1           7
2           2
3           1
4           0
5           4
6           1
7           4
8           9
9           5
10          9
11          0
12          6
13          9
14          0
15          1
16          5
17          9
18          7
19          3
20          4
21          9
22          6
23          6
24          5
25          4
26          0
27          7
28          4
29          0
30          1
..        ...
71          7
72          0
73          2
74          9
75          1
76          7
77          3
78          2
79          9
80          7
81          7
82          6
83          2
84          7
85          8
86          4
87          7
88          3
89          6
90          1
91          3
92          6
93          9
94          3
95          1
96          4
97          1
98          7
99          6
100         9

[100 rows x 1 columns]


In [42]:
# download prediction to submit
!ls
files.download('mnist_submission.csv')

mnist  mnist_submission.csv  sample_data
