In [None]:
import csv
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPool2D, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
EPOCHS = 30
BATCH_SIZE = 64
IMG_SHAPE = 28
LR = 0.001

In [None]:
labels = []
train = []
with open('data/train.csv') as f:
    reader = csv.reader(f)
    next(reader)
    for row in reader:
        labels.append(int(row[0]))
        for col in row[1:]:
            train.append(int(col))

In [None]:
labels = np.array(labels)
labels = labels.reshape(-1, 1)

In [None]:
train = np.array(train).astype(np.float32)
train = train / 255. #Normalizing between 0 and 1
train = train.reshape(-1, IMG_SHAPE, IMG_SHAPE, 1)

In [None]:
plt.imshow(np.squeeze(train[5])), labels[5]

In [None]:
datagen = ImageDataGenerator(rotation_range=10, zoom_range=0.1, width_shift_range=0.1, height_shift_range=0.1)

In [None]:
datagen.fit(train)

In [None]:
traingen = datagen.flow(train, labels, batch_size=BATCH_SIZE)

In [None]:
model = keras.models.Sequential([
    Conv2D(32, (3,3), padding='same', activation='relu', kernel_initializer='he_uniform'),
    Conv2D(32, (3,3), padding='same', activation='relu', kernel_initializer='he_uniform'),
    BatchNormalization(),
    MaxPool2D(2, 2),
    Dropout(0.4),
    Conv2D(64, (3,3), padding='same', activation='relu', kernel_initializer='he_uniform'),
    Conv2D(3642, (3,3), padding='same', activation='relu', kernel_initializer='he_uniform'),
    BatchNormalization(),
    MaxPool2D(2, 2),
    Dropout(0.4),
    Flatten(),
    Dense(128, activation='relu', kernel_initializer='he_uniform'),
    Dense(10, activation='softmax')
])

In [None]:
# optim = keras.optimizers.SGD(lr=LR, momentum=0.9)

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

In [None]:
history = model.fit(traingen, epochs=EPOCHS)

In [None]:
model.save('mnist_cnn1.h5')

In [None]:
# acc = history.history['sparse_categorical_accuracy']
# val_acc = history.history['val_sparse_categorical_accuracy']

# loss=history.history['loss']
# val_loss=history.history['val_loss']

# epochs_range = range(EPOCHS)

# plt.figure(figsize=(8, 8))
# plt.subplot(1, 2, 1)
# plt.plot(epochs_range, acc, label='Training Accuracy')
# plt.plot(epochs_range, val_acc, label='Validation Accuracy')
# plt.legend(loc='lower right')
# plt.title('Training and Validation Accuracy')

# plt.subplot(1, 2, 2)
# plt.plot(epochs_range, loss, label='Training Loss')
# plt.plot(epochs_range, val_loss, label='Validation Loss')
# plt.legend(loc='upper right')
# plt.title('Training and Validation Loss')
# plt.show()

In [None]:
test = []
with open('data/test.csv') as f:
    reader = csv.reader(f)
    next(reader)
    for row in reader:
        for col in row:
            test.append(int(col))

In [None]:
test = np.array(test).astype(np.float32)
test = test / 255. #Normalizing between 0 and 1
test = test.reshape(-1, IMG_SHAPE, IMG_SHAPE, 1)

In [None]:
pred = model.predict(test)
pred = np.argmax(pred, axis=1)

In [None]:
import pandas as pd

In [None]:
pred = pd.Series(pred, name='Label')

In [None]:
submission = pd.concat([pd.Series(range(1, 28001), name='ImageId'), pred], axis=1)

In [None]:
submission.to_csv('sub.csv', index=False)