In [4]:
# imports
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models

In [5]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [6]:
os.environ['TF_ENABLE_ONEDNN_OPTS']

'0'

In [5]:
# load data
data = pd.read_csv('train.csv')
X = data.drop('label', axis=1)
X = tf.convert_to_tensor(X)

y = data.label
y = tf.convert_to_tensor(y)

# split data
X_train, X_valid = X[:30000], X[30000:]
y_train, y_valid = y[:30000], y[30000:]

# reshape data
X_train = tf.reshape(X_train, [30000, 28, 28, 1])
X_valid = tf.reshape(X_valid, [12000, 28, 28, 1])

In [14]:
# iteration 1: basic model
def build_model1(X_train, X_valid, y_train, y_valid):
    model = keras.Sequential([
        layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[28, 28, 1]),
        layers.Conv2D(filters=64, kernel_size=3, activation='relu'),
        layers.Flatten(),
        layers.Dense(units=64, activation='relu'),
        layers.Dense(units=10, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    history = model.fit(x=X_train, y=y_train, epochs=1, validation_data=(X_valid, y_valid), verbose=1)
    history_frame = pd.DataFrame(history.history)
    return model, history_frame


In [15]:
model1, history = build_model1(X_train, X_valid, y_train, y_valid)
history

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 19ms/step - accuracy: 0.8424 - loss: 2.5198 - val_accuracy: 0.9703 - val_loss: 0.1100


Unnamed: 0,accuracy,loss,val_accuracy,val_loss
0,0.927733,0.573757,0.970333,0.110041


In [32]:
# iteration 2: basic model with max pooling - improved training time greatly and accuracy slightly
def build_model2(X_train, X_valid, y_train, y_valid):
    model = keras.Sequential([
        layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[28, 28, 1]),
        layers.MaxPooling2D(padding='same'),
        layers.Conv2D(filters=64, kernel_size=3, activation='relu'),
        layers.MaxPooling2D(padding='same'),
        layers.Flatten(),
        layers.Dense(units=64, activation='relu'),
        layers.Dense(units=10, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    history = model.fit(x=X_train, y=y_train, epochs=1, validation_data=(X_valid, y_valid), verbose=1)
    history_frame = pd.DataFrame(history.history)
    return model, history_frame


In [33]:
model2, history = build_model2(X_train, X_valid, y_train, y_valid) 
history

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - accuracy: 0.8321 - loss: 1.8075 - val_accuracy: 0.9741 - val_loss: 0.0872


Unnamed: 0,accuracy,loss,val_accuracy,val_loss
0,0.9242,0.464541,0.974083,0.087192


In [6]:
# iteration 3: builds on iteration 2 with batch normalization and dropout - improved accuracy (prevents overfitting)
def build_model3(X_train, X_valid, y_train, y_valid):
    model = keras.Sequential([
        layers.BatchNormalization(input_shape=[28, 28, 1]),
        layers.Conv2D(filters=32, kernel_size=3, activation='relu'),
        layers.MaxPooling2D(padding='same'),
        layers.Dropout(0.2),
        layers.BatchNormalization(),
        layers.Conv2D(filters=64, kernel_size=3, activation='relu'),
        layers.MaxPooling2D(padding='same'),
        layers.Dropout(0.2),
        layers.BatchNormalization(),
        layers.Flatten(),
        layers.Dense(units=64, activation='relu'),
        layers.Dropout(0.2),
        layers.BatchNormalization(),
        layers.Dense(units=10, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    history = model.fit(x=X_train, y=y_train, epochs=10, validation_data=(X_valid, y_valid), verbose=1, callbacks=[callback])
    history_frame = pd.DataFrame(history.history)
    return model, history_frame

In [7]:
model3, history = build_model3(X_train, X_valid, y_train, y_valid) 
history

Epoch 1/10


  super().__init__(**kwargs)


[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 9ms/step - accuracy: 0.8525 - loss: 0.4938 - val_accuracy: 0.9817 - val_loss: 0.0638
Epoch 2/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 0.9722 - loss: 0.0996 - val_accuracy: 0.9859 - val_loss: 0.0480
Epoch 3/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 0.9785 - loss: 0.0731 - val_accuracy: 0.9882 - val_loss: 0.0381
Epoch 4/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 0.9840 - loss: 0.0541 - val_accuracy: 0.9846 - val_loss: 0.0517
Epoch 5/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 9ms/step - accuracy: 0.9844 - loss: 0.0508 - val_accuracy: 0.9893 - val_loss: 0.0349
Epoch 6/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 0.9869 - loss: 0.0396 - val_accuracy: 0.9864 - val_loss: 0.0424
Epoch 7/10
[1m938/938[0m [32m━━━━━━━

Unnamed: 0,accuracy,loss,val_accuracy,val_loss
0,0.926433,0.253991,0.981667,0.063841
1,0.973067,0.094766,0.985917,0.047967
2,0.9774,0.073989,0.988167,0.038116
3,0.982067,0.059311,0.984583,0.051669
4,0.9844,0.051222,0.989333,0.034918
5,0.985867,0.042212,0.986417,0.042385
6,0.9877,0.038878,0.990417,0.031448
7,0.989,0.03458,0.989833,0.031405
8,0.988,0.035628,0.98975,0.030682
9,0.989267,0.03278,0.989333,0.032013


In [8]:
# iteration 4: build on iteration 3 with data augmentation
def build_model4(X_train, X_valid, y_train, y_valid):
    data_aug = keras.Sequential([
        layers.Rescaling(scale=1./255),
        layers.RandomZoom(height_factor=(-0.2, 0.2), width_factor=(-0.2, 0.2)),
        layers.RandomRotation(factor=(-0.1, 0.1)),
        layers.RandomContrast(factor=(0.1, 0.2))
    ])

    model = keras.Sequential([
        data_aug,
        layers.BatchNormalization(input_shape=[28, 28, 1]),
        layers.Conv2D(filters=32, kernel_size=3, activation='relu'),
        layers.MaxPooling2D(padding='same'),
        layers.Dropout(0.2),
        layers.BatchNormalization(),
        layers.Conv2D(filters=64, kernel_size=3, activation='relu'),
        layers.MaxPooling2D(padding='same'),
        layers.Dropout(0.2),
        layers.BatchNormalization(),
        layers.Flatten(),
        layers.Dense(units=64, activation='relu'),
        layers.Dropout(0.2),
        layers.BatchNormalization(),
        layers.Dense(units=10, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    history = model.fit(x=X_train, y=y_train, epochs=15, validation_data=(X_valid, y_valid), verbose=1, callbacks=[callback])
    history_frame = pd.DataFrame(history.history)
    return model, history_frame

In [9]:
model4, history = build_model4(X_train, X_valid, y_train, y_valid) 
history

Epoch 1/15


  super().__init__(**kwargs)


[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - accuracy: 0.7707 - loss: 0.7239 - val_accuracy: 0.9731 - val_loss: 0.0856
Epoch 2/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 10ms/step - accuracy: 0.9346 - loss: 0.2200 - val_accuracy: 0.9794 - val_loss: 0.0647
Epoch 3/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 9ms/step - accuracy: 0.9499 - loss: 0.1582 - val_accuracy: 0.9830 - val_loss: 0.0558
Epoch 4/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10ms/step - accuracy: 0.9582 - loss: 0.1393 - val_accuracy: 0.9846 - val_loss: 0.0512
Epoch 5/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 10ms/step - accuracy: 0.9613 - loss: 0.1260 - val_accuracy: 0.9881 - val_loss: 0.0408
Epoch 6/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 10ms/step - accuracy: 0.9641 - loss: 0.1168 - val_accuracy: 0.9853 - val_loss: 0.0482
Epoch 7/15
[1m938/938[0m [32m

Unnamed: 0,accuracy,loss,val_accuracy,val_loss
0,0.868567,0.423209,0.973083,0.085576
1,0.9384,0.201969,0.979417,0.064698
2,0.951667,0.156078,0.983,0.055835
3,0.958467,0.135344,0.984583,0.051234
4,0.9624,0.123491,0.988083,0.040761
5,0.965867,0.114414,0.985333,0.048173
6,0.967667,0.107736,0.987,0.042616
7,0.9688,0.099723,0.987417,0.041313


In [11]:
# iteration 5: pretrained base with trainable head using the augmentation from iteration 4

def convert_to_float(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label

def create_dataset(X_train, y_train, X_valid, y_valid):
    data_aug = keras.Sequential([
        layers.Rescaling(scale=1./255),
        layers.RandomZoom(height_factor=(-0.2, 0.2), width_factor=(-0.2, 0.2)),
        layers.RandomRotation(factor=(-0.1, 0.1)),
        layers.RandomContrast(factor=(0.1, 0.2))
    ])
    # pass data through data augmentation
    X_train = data_aug(X_train)
    X_valid = data_aug(X_valid)
    # convert to float
    X_train = tf.image.convert_image_dtype(X_train, dtype=tf.float32)
    X_valid = tf.image.convert_image_dtype(X_valid, dtype=tf.float32)

    # create dataset and reshape data
    train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train))
    valid_ds = tf.data.Dataset.from_tensor_slices((X_valid, y_valid))
    train_ds = train_ds.map(preprocess_image)
    valid_ds = valid_ds.map(preprocess_image)

    return train_ds, valid_ds

# resize and colourize data for pretrained model
def preprocess_image(images, labels):
    images = tf.image.resize(images, (224, 224))
    images = tf.image.grayscale_to_rgb(images)
    return images, labels

def build_model5(train_ds, valid_ds):
    # add pretrained base model
    base = tf.keras.applications.VGG16(include_top=False, weights='imagenet')
    base.trainable = False
    
    model = keras.Sequential([
        base,
        layers.Flatten(),
        layers.Dense(units=64, activation='relu'),
        layers.Dropout(0.2),
        layers.BatchNormalization(),
        layers.Dense(units=10, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    history = model.fit(
        train_ds,
        validation_data=valid_ds,
        epochs=1,
        verbose=0,
    )
    history_frame = pd.DataFrame(history.history)
    return model, history_frame


In [12]:
train_ds, valid_ds = create_dataset(X_train, y_train, X_valid, y_valid)
train_ds

<_MapDataset element_spec=(TensorSpec(shape=(224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))>

In [None]:
# ==============CAREFUL: building this model uses a lot of memory (20gb+) and can take hours if using a CPU to train=================

# model5, history = build_model5(train_ds, valid_ds) 
# history

In [29]:
# testing
test_data = pd.read_csv('test.csv')
X_test = tf.reshape(test_data, [test_data.shape[0], 28, 28, 1])

def make_preds(model, X_test):
    preds = model.predict(X_test)
    preds = [tf.argmax(pred).numpy() for pred in preds]
    label = [i for i in range(1, len(preds)+1)]
    return pd.concat([pd.Series(label), pd.Series(preds)], axis=1, keys=['ImageId', 'Label'])


In [30]:
submission1 = make_preds(model1, X_test)
submission1.to_csv('submission1.csv', index=False)

[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step


In [34]:
submission2 = make_preds(model2, X_test)
submission2.to_csv('submission2.csv', index=False)

[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step


In [37]:
submission3 = make_preds(model3, X_test)
submission3.to_csv('submission3.csv', index=False)

[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step


In [40]:
submission4 = make_preds(model4, X_test)
submission4.to_csv('submission4.csv', index=False)

[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
