In [None]:
import numpy as np 
import pandas as pd

from keras.applications import vgg16
from keras.layers import Input, MaxPooling2D, Conv2D, Flatten, Dense, Conv2DTranspose, Reshape, Lambda, Activation, BatchNormalization, LeakyReLU, Dropout, ZeroPadding2D, UpSampling2D
from keras.models import Model, Sequential, load_model
from keras import backend as K
from keras import optimizers
from keras import utils
from keras.initializers import RandomNormal
from keras.preprocessing.image import ImageDataGenerator

import cv2
import os
from PIL import Image
import json
import os
import time
from tqdm import tqdm
import matplotlib.pyplot as plt


for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train_x = np.load("/kaggle/input/mais-202-fall-2020-kaggle-competition/train_x.npy")
test_x = np.load("/kaggle/input/mais-202-fall-2020-kaggle-competition/test_x.npy")
train_y = pd.read_csv("/kaggle/input/mais-202-fall-2020-kaggle-competition/train_y.csv")
train_y = train_y.Label.to_numpy()

def show_image(arr):
    two_d = (np.reshape(arr, (128, 128)) * 255).astype(np.uint8)
    plt.imshow(two_d, interpolation='nearest')
    plt.show()

show_image(train_x[0])
print(f'Image shape: {train_x[0].shape}')
print(f'Label: {train_y[0]}')
print(f'X train size: {len(train_x)}')
print(f'Y train size: {len(train_y)}')

In [None]:
for i in range(109, 180):
    show_image(test_x[i])
    print(f'image {i}')

In [None]:
num_classes = 10
input_shape = (128,128,1)
batch_size = 30
epochs = 30

In [None]:
threshold = 220
i = 2
test = train_x[i]
denoised = cv2.fastNlMeansDenoising(np.uint8(test),90,25,7,21)
ret,thresh = cv2.threshold(denoised,threshold,255,cv2.THRESH_BINARY)

plt.subplot(1,2,1)
plt.imshow(test,'gray')
plt.xticks([]),plt.yticks([])
plt.subplot(1,2,2)
plt.imshow(thresh, 'gray')
plt.xticks([]),plt.yticks([])

plt.show()

In [None]:
def preprocess(img):
  threshold = 220
  denoised = cv2.fastNlMeansDenoising(np.uint8(img),90,25,7,21)
  ret,thresh = cv2.threshold(denoised,threshold,255,cv2.THRESH_BINARY)
  return thresh

In [None]:
train_x = [preprocess(img) for img in tqdm(train_x)]
test_x = [preprocess(img) for img in tqdm(test_x)]
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
y_train = utils.to_categorical(train_y, num_classes)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

In [None]:
train_datagen = ImageDataGenerator(zoom_range=0.3, rotation_range=30,
                                   width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, fill_mode='nearest',
                                   validation_split=0.1)

In [None]:
train_generator = train_datagen.flow(train_x, y_train, batch_size=batch_size, subset='training')
validation_generator = train_datagen.flow(train_x, y_train, batch_size=batch_size, subset='validation')

In [None]:
x_train = np.asarray(train_x) / 255
x_test = np.asarray(test_x) / 255
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

#y_train = train_y
y_train = utils.to_categorical(train_y, num_classes)

In [None]:
#if wanna resume from last session
model = load_model('2nd_model')

In [None]:
#Attempt at transfer learning using vgg16
vgg = vgg16.VGG16(include_top=False, weights='imagenet', 
                                     input_shape=input_shape)
output = vgg.layers[-1].output
output = keras.layers.Flatten()(output)
vgg_model = Model(vgg.input, output)

vgg_model.trainable = False
for layer in vgg_model.layers:
    layer.trainable = False

In [None]:
model = Sequential()

model.add(Conv2D(16, kernel_size=(3, 3), activation='relu', 
                 input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss='CategoricalCrossentropy',
          optimizer=optimizers.Adam(learning_rate=0.0001),
          metrics=['accuracy'])

model.summary()

In [None]:
history = model.fit(x=x_train, y=y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_split=0.1,
                    verbose=1)

In [None]:
model.save('2nd_model')
!zip -r 2nd_model.zip 2nd_model
rec_model = load_model('2nd_model')
results = rec_model.predict(test_x)

In [None]:
dict_results = {'Id': np.arange(len(results)), 'Label': results}
df_results = pd.DataFrame(data=dict_results)
df_results

In [None]:
compression_opts = dict(method='zip',
                        archive_name='results.csv')  
df_results.to_csv('results.zip', index=False, compression=compression_opts)