<a href="https://colab.research.google.com/github/NglQ/KaggleChallenges/blob/main/digitRecognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q kaggle

In [None]:
from google.colab import files
files.upload()

In [None]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle competitions download -c digit-recognizer

In [None]:
!unzip digit-recognizer.zip
!rm digit-recognizer.zip

In [None]:
import os

os.mkdir('./train')
os.mkdir('./test')

for i in range(10):
  os.mkdir(f'./train/{i}')

In [None]:
import pandas as pd
import numpy as np
from PIL import Image

dataset = pd.read_csv('train.csv')
trainset = dataset.copy().drop(columns='label')
labels = dataset.copy()['label']

testset = pd.read_csv('test.csv')

means = trainset.mean()

for i,row in trainset.iterrows():
  rowNpArray = np.floor(np.array(row, dtype=np.uint8) - means)
  imageOut = Image.new('L',(28,28))
  imageOut.putdata(rowNpArray)
  resizedImageOut = Image.new('L', (32, 32))
  resizedImageOut.paste(imageOut, (2,2))
  resizedImageOut.save(f'/content/train/{labels[i]}/image_{i}.png')

for i,row in testset.iterrows():
  rowNpArray = np.floor(np.array(row, dtype=np.uint8) - means)
  imageOut = Image.new('L',(28,28))
  imageOut.putdata(rowNpArray)
  resizedImageOut = Image.new('L', (32, 32))
  resizedImageOut.paste(imageOut, (2,2))
  resizedImageOut.save(f'/content/test/image_{i}.png')
  print(i)


In [None]:
import tensorflow as tf 
import tensorflow.keras.preprocessing as tf_preproc
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import InceptionResNetV2, EfficientNetB0, VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import datetime, os

train_dir = '/content/train'

trainGenerator = ImageDataGenerator(
    rescale=1./255,
    zoom_range = 0.05,
    fill_mode = "nearest",
    validation_split = 0.2)

train_ds = trainGenerator.flow_from_directory(train_dir, target_size=(32,32), batch_size=70, subset='training')
print(type(train_ds))
val_ds = trainGenerator.flow_from_directory(train_dir, target_size=(32,32), batch_size=70, subset='validation')
model = tf.keras.Sequential()

inc = VGG16(input_shape =(32,32,3), include_top = False)

model.add(inc)

model.add(layers.Flatten())
model.add(layers.Dense(10,activation = "softmax"))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)
cBack = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=25, mode='max', restore_best_weights=True)
checkPointCb = tf.keras.callbacks.ModelCheckpoint(filepath="/content/drive/MyDrive/models/digitRecognizer.h5", monitor='val_accuracy', save_best_only=True)

model.fit(train_ds, epochs = 100, callbacks=[tensorboard_callback,cBack,checkPointCb], validation_data = val_ds)


In [None]:
import tensorflow as tf 
from tensorflow import keras
from tensorflow.keras.preprocessing import image
from pathlib import Path
import pandas as pd

model = keras.models.load_model('/content/drive/MyDrive/models/digitRecognizer.h5')
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

imgAcc = []
idxs = []
for root, dirs, files in os.walk('/content/test'):
  for f in files:
    img = tf.keras.preprocessing.image.load_img(f'{root}/{f}')
    imgArr = image.img_to_array(img) #- means
    # print("len img arr: ", len(imgArr))
    imgArr = np.expand_dims(imgArr, axis = 0)
    imgAcc.append(np.array(imgArr))
    print(f.split('_')[1].split('.')[0])
    idxs.append(int(f.split('_')[1].split('.')[0])+1)

print(max(idxs), len(imgAcc))

imgsToPredict = np.vstack(imgAcc)
results = model.predict(imgsToPredict)

idxsRes = []
for res in results:
  k = [i for i,val in enumerate(res) if val == 1.0]
  if len(k) != 0:
    idxsRes.append(k[0])
  else:
    idxsRes.append(0)

filepath = Path('outDigitRecognizer.csv') 
outDf = pd.DataFrame({'ImageId': idxs, 'Label': idxsRes})
outDf.to_csv(filepath, index = False, header=True)



In [None]:
%reload_ext tensorboard
%tensorboard --logdir logs

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!rm -r train test