## Imports

In [None]:
!pip install wget
!pip install patool
import wget
import patoolib
import os
import cv2
import numpy as np
import math
import matplotlib.pyplot as plt

import keras
from keras.utils import Sequence, to_categorical
from keras import layers
from keras.models import Sequential

import imgaug.augmenters as iaa

url = "https://s.ics.upjs.sk/~shorvat/files/data/mnist.gz"
wget.download(url, "mnist.gz")
patoolib.extract_archive("mnist.gz", outdir="/content")
!tar -xvf '/content/trainingSet.tar' -C '/content/'

## Preprocessing

In [5]:
data_dir = "/content/trainingSet/"
expected_results = [];
paths = [];
less_than_five = [];

# prejdeme kazdym podadresarom(0, 1, 2, ..., 9)
for i in range(10):
  # vytvorime nazov adresara
  number_dir = data_dir + str(i)
  
  # ziskame cesty ku polozkam v adresare $number_dir
  path_to_elements = os.listdir(number_dir)
  
  # vytvorime pole cisel zodpovedajucich obrazkam
  expected_results += [i for j in path_to_elements]

  # pole binarnych hodnot, urcujucich ci je cislo mensie ako 5
  less_than_five += [1 if i < 5 else 0 for j in path_to_elements]

  # pole ciest ku obrazkam
  paths += [number_dir + "/" + j for j in path_to_elements]

print("Array with expected predictions is ready")
print("Array with binary values meaning if a number is less than 5 is ready")
print("Array with paths is ready")

Array with expected predictions is ready
Array with binary values meaning if a number is less than 5 is ready
Array with paths is ready


### For two chars

In [6]:
class Generator(Sequence):
    def __init__(self, paths, expected_results, less_than_five, batch_size, aug = None):
        self.paths, self.expected_results, self.less_than_five = paths, expected_results, less_than_five
        self.batch_size = batch_size
        self.aug = aug

    def __len__(self):
        return math.ceil(len(self.paths) / self.batch_size)

    def __getitem__(self, idx):
        batch_x = self.paths[idx * self.batch_size : (idx + 1) * self.batch_size]
        batch_y = self.expected_results[idx * self.batch_size : (idx + 1) * self.batch_size]
        batch_y2 = self.less_than_five[idx * self.batch_size : (idx + 1) * self.batch_size]

        batch_x = [cv2.imread(x, 0).reshape((28,28,1)) for x in batch_x]
        
        if self.aug != None:
          batch_x = self.aug(images = batch_x)
        
        batch_y = to_categorical(batch_y, num_classes=10) 
        batch_y2 = to_categorical(batch_y2, num_classes=2) 

        return np.array(batch_x), [np.array(batch_y), np.array(batch_y2)]

seq = iaa.Sequential([iaa.GaussianBlur(sigma=(0, 3.0))])

train_gen = Generator(
    paths,
    expected_results,
    less_than_five,
    10,
    aug=seq
)
test_gen = Generator(
    paths,
    expected_results,
    less_than_five,
    10
)

## Model

In [None]:
inp = layers.Input((28, 28, 1))
conv1 = layers.Conv2D(16, (3,3), activation="relu")(inp)
maxp = layers.MaxPool2D(2,2)(conv1)
flt = layers.Flatten()(maxp)

fc = layers.Dense(10, "softmax", name="digit")(flt)
fc2 = layers.Dense(2, "softmax", name="less_than_five")(flt)

model = keras.Model(inputs=[inp], outputs = [fc, fc2])

model.compile(optimizer="adam", loss=keras.losses.categorical_crossentropy, metrics=["accuracy"])
history = model.fit(train_gen, epochs=2, validation_data=test_gen)

## Visualization

In [None]:
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.xlabel('Epoch')
plt.ylabel('Value')
plt.legend()
plt.show()