In [1]:
"""
1. Build a neural network (at least 3 convolutional layers);
2. Do model selection (optimizing hyperparameters or testing different
architectures, performing validation by splitting the train set);
3. Train your network over the full training set;
4. Use the network to predict the examples in the test set;
5. Place the labels in a file, in the same order as you read the test
examples and in the same format of the labels in the training set
"""

'\n1. Build a neural network (at least 3 convolutional layers);\n2. Do model selection (optimizing hyperparameters or testing different\narchitectures, performing validation by splitting the train set);\n3. Train your network over the full training set;\n4. Use the network to predict the examples in the test set;\n5. Place the labels in a file, in the same order as you read the test\nexamples and in the same format of the labels in the training set\n'

In [2]:
# mount the drive dir
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [3]:
# import and init tensorflow
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

import tensorflow as tf
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt

tf.random.set_seed(0)

TensorFlow 2.x selected.


In [0]:
# read train dataset and labels
test_data = pd.read_csv('drive/My Drive/ocr/test-data.csv')
test_target = pd.read_csv('drive/My Drive/ocr/test-target.csv')
train_data = pd.read_csv('drive/My Drive/ocr/train-data.csv')
train_target = pd.read_csv('drive/My Drive/ocr/train-target.csv')

In [0]:
# random split in train_data and train_target in 80% train, 20% validate
train_data_sample = train_data.sample(frac=0.8, random_state=1)
validate_data_sample = train_data.drop(train_data_sample.index)

train_target_sample = train_target.sample(frac=0.8, random_state=1)
validate_target_sample = train_target.drop(train_target_sample.index)

In [0]:
# reformat dataframe
# data
x_train = train_data_sample.values 
x_validate = validate_data_sample.values
x_test = test_data.values
x_train = x_train.reshape(len(train_data_sample), 16, 8)
x_validate = x_validate.reshape(len(validate_data_sample), 16, 8)
x_test = x_test.reshape(len(test_data), 16, 8)
x_train = x_train[..., tf.newaxis]
x_validate = x_validate[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]

# targets
one_hot_encode_train = pd.get_dummies(train_target_sample.iloc[:,0])
y_train = one_hot_encode_train.values
one_hot_encode_validate = pd.get_dummies(validate_target_sample.iloc[:,0])
y_validate = one_hot_encode_validate.values
one_hot_encode_test = pd.get_dummies(test_target.iloc[:,0])
y_test = one_hot_encode_test.values

In [7]:
# sanity check
print(x_train.shape)
print(x_validate.shape)
print(x_test.shape)

print(y_train.shape)
print(y_validate.shape)
print(y_test.shape)

(33376, 16, 8, 1)
(8344, 16, 8, 1)
(10430, 16, 8, 1)
(33376, 26)
(8344, 26)
(10430, 26)


In [0]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Softmax, Flatten

In [0]:
class MnistPerceptron(Model): # inherit from Model
  def __init__(self):
    super().__init__() # initialize Model
    self.flatten = Flatten() # used to flatten pixels 
    self.W = tf.Variable(tf.zeros([128, 26]))# weights
    self.b = tf.Variable(tf.zeros([26]))    # bias
    self.softmax = Softmax()
    

  def call(self, x,training=False): 
    # the  training argument is unused in this model, we will need it later 
    x = self.flatten(x) # flatten images   

    x = tf.dtypes.cast(x, tf.float32)
    #self.W = tf.constant(self.W, dtype=tf.int64)
    
    multiplied = tf.matmul(x, self.W) # matmul, output shape : (batch, 10)
    # we can equivalently do:
    #multiplied = tf.transpose(tf.linalg.matmul(tf.float64(tf.transpose(self.W)), tf.float64(tf.transpose(flat))))

    fwded = multiplied + self.b # broadcast self.b to (batch, 10) and add   

    prob = self.softmax(fwded) # softmax              
    return prob

# Create an instance of the model
perceptron = MnistPerceptron()

In [0]:
### TRAIN ###

In [0]:
# loss function
perceptron_loss = tf.keras.losses.CategoricalCrossentropy()

train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(10000).batch(100)
validate_ds = tf.data.Dataset.from_tensor_slices((x_validate, y_validate)).batch(50)  
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(50)

# choose an optimizer for the training
perceptron_optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)

In [0]:
train_loss_metric = tf.keras.metrics.Mean()
train_accuracy_metric = tf.keras.metrics.CategoricalAccuracy()

test_loss_metric = tf.keras.metrics.Mean()
test_accuracy_metric = tf.keras.metrics.CategoricalAccuracy()

In [0]:
def train_step(images, labels, model, loss_fn, optimizer):
  with tf.GradientTape() as tape: # all the operations within this scope will be recorded in tape
    predictions = model(images, training=True)
    loss = loss_fn(labels, predictions)
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  train_loss_metric(loss)
  train_accuracy_metric(labels, predictions)

In [0]:
from datetime import datetime

def train_loop(epochs, train_ds, model, loss_fn, optimizer):
  for epoch in range(epochs):
      # reset the metrics for the next epoch
    train_loss_metric.reset_states()
    train_accuracy_metric.reset_states()

    start = datetime.now() # save start time 
    for images, labels in train_ds:
      train_step(images, labels, model, loss_fn, optimizer)

    template = 'Epoch {}, Time {}, Loss: {}, Accuracy: {}'
    print(template.format(epoch+1,
                          datetime.now() - start,
                          train_loss_metric.result(),
                          train_accuracy_metric.result()*100))

In [15]:
EPOCHS = 10
train_loop(EPOCHS, train_ds, perceptron, perceptron_loss, perceptron_optimizer)

Epoch 1, Time 0:00:04.516575, Loss: 2.8663129806518555, Accuracy: 32.873924255371094
Epoch 2, Time 0:00:02.761206, Loss: 2.4397799968719482, Accuracy: 39.41455078125
Epoch 3, Time 0:00:02.804536, Loss: 2.19722056388855, Accuracy: 42.923057556152344
Epoch 4, Time 0:00:02.825475, Loss: 2.028562068939209, Accuracy: 47.1506462097168
Epoch 5, Time 0:00:02.877635, Loss: 1.9030202627182007, Accuracy: 50.593238830566406
Epoch 6, Time 0:00:02.790541, Loss: 1.8053250312805176, Accuracy: 53.10103225708008
Epoch 7, Time 0:00:02.829558, Loss: 1.727095603942871, Accuracy: 55.28523254394531
Epoch 8, Time 0:00:02.793644, Loss: 1.662144422531128, Accuracy: 56.79230499267578
Epoch 9, Time 0:00:02.790353, Loss: 1.607544183731079, Accuracy: 58.04170227050781
Epoch 10, Time 0:00:02.800229, Loss: 1.5609805583953857, Accuracy: 59.24616622924805


In [0]:
### evaluation ###

In [0]:
def test_step(images, labels, model, loss_fn):
  predictions = model(images, training=False)
  t_loss = loss_fn(labels, predictions)

  test_loss_metric(t_loss)
  test_accuracy_metric(labels, predictions)
  return predictions

In [0]:
def test_loop(test_ds, model, loss_fn):
  predictions = []

  # reset the metrics for the next epoch
  test_loss_metric.reset_states()
  test_accuracy_metric.reset_states()
 
  for test_images, test_labels in test_ds:
    y = test_step(test_images, test_labels, model, loss_fn)
    predictions.append(y)

  template = 'Test Loss: {}, Test Accuracy: {}'
  print(template.format(test_loss_metric.result(),
                        test_accuracy_metric.result()*100))
  return ([predictions, test_accuracy_metric.result()*100])

In [19]:
tmp = test_loop(validate_ds, perceptron, perceptron_loss)

Test Loss: 1.5452115535736084, Test Accuracy: 59.86337661743164


In [0]:
### deep architectures ###

In [0]:
class MnistConvolutional(Model):
  def __init__(self, in_channels, out_channels, size):
    super().__init__() # setup the model basic functionalities (mandatory)
    initial = tf.random.truncated_normal([size, size, in_channels, out_channels], stddev=0.1)
    self.filters = tf.Variable(initial) # create weights for the filters

  def call(self, x):
    x = tf.dtypes.cast(x, tf.float32)
    res = tf.nn.conv2d(x, self.filters, 1, padding="SAME")
    return res

class MnistFullyConnected(Model):
  def __init__(self, input_shape, output_shape):
    super().__init__() # initialize the model
    self.W = tf.Variable(tf.random.truncated_normal([input_shape, output_shape], stddev=0.1)) # declare weights 
    self.b = tf.Variable(tf.constant(0.1, shape=[1, output_shape]))  # declare biases

  def call(self, x):
    res = tf.matmul(x, self.W) + self.b 
    #print ("weight: ", self.W)
    #print ("bias: ", self.b)
    return res

In [0]:
from tensorflow.keras.layers import MaxPool2D, Dropout

class MnistDeepModel(Model):
  def __init__(self):
    super().__init__()                          # 8,16,1   
    
    self.conv1 = MnistConvolutional(1, 16, 5)   # 8,16,16
    self.pool1 = MaxPool2D([2,2])               # 4,8,16
    self.conv2 = MnistConvolutional(16, 32, 5)  # 4,8,32
    self.pool2 = MaxPool2D([2,2])               # 2,4,32
    self.conv3 = MnistConvolutional(32, 48, 5)  # 2,4,48
    self.pool3 = MaxPool2D([2,2])               # 1,2,48

    self.flatten = Flatten()                  
    self.fc1 = MnistFullyConnected(1*2*48, 1024) 
    self.dropout = Dropout(0.5)               
    self.fc2 = MnistFullyConnected(1024, 26)  
    self.softmax = Softmax()                  

  def call(self, x, training=False):
    
    #print("conv1")
    x = tf.nn.relu(self.conv1(x))
    x = self.pool1(x)

    #print("conv2")
    x = tf.nn.relu(self.conv2(x))
    x = self.pool2(x)

    #print("conv3")
    x = tf.nn.relu(self.conv3(x))
    x = self.pool3(x)

    x = self.flatten(x)
    x = tf.nn.relu(self.fc1(x))

    x = self.dropout(x, training=training) # behavior of dropout changes between train and test
    
    x = self.fc2(x)
    prob = self.softmax(x)
    
    return prob

# Create an instance of the model
network = MnistDeepModel()

In [0]:
network_loss = tf.keras.losses.CategoricalCrossentropy()
network_optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)

In [24]:
EPOCHS = 10
train_loop(EPOCHS, train_ds,  network, network_loss, network_optimizer)

Epoch 1, Time 0:00:09.418029, Loss: 2.465547561645508, Accuracy: 31.35486602783203
Epoch 2, Time 0:00:04.780949, Loss: 1.3273367881774902, Accuracy: 61.562198638916016
Epoch 3, Time 0:00:04.812601, Loss: 0.9714465141296387, Accuracy: 71.75814819335938
Epoch 4, Time 0:00:04.871568, Loss: 0.8025233745574951, Accuracy: 76.23741912841797
Epoch 5, Time 0:00:04.829216, Loss: 0.6999712586402893, Accuracy: 79.44031524658203
Epoch 6, Time 0:00:04.790445, Loss: 0.6294887065887451, Accuracy: 81.2949447631836
Epoch 7, Time 0:00:04.872173, Loss: 0.5797750949859619, Accuracy: 82.73609924316406
Epoch 8, Time 0:00:04.821142, Loss: 0.5424246191978455, Accuracy: 83.65292358398438
Epoch 9, Time 0:00:04.811452, Loss: 0.512635350227356, Accuracy: 84.56076049804688
Epoch 10, Time 0:00:04.828199, Loss: 0.4837630093097687, Accuracy: 85.23789978027344


In [25]:
# loop between 1e-2, 1e-3, 1e-4 and find the best through the validate_sample
EPOCHS = 10
best_accuracy = 0
best_learning_rate = 0
learning_rates = [1e-2, 1e-3, 1e-4]
for learning_rate in learning_rates:
  network = MnistDeepModel()
  network_optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
  train_loop(EPOCHS, train_ds,  network, network_loss, network_optimizer)
  tmp = test_loop(validate_ds, network, network_loss)
  accuracy = tmp[1]
  if (accuracy > best_accuracy):
    best_accuracy = accuracy
    best_learning_rate = learning_rate

Epoch 1, Time 0:00:05.060786, Loss: 0.8836331963539124, Accuracy: 74.14309692382812
Epoch 2, Time 0:00:04.947687, Loss: 0.513607382774353, Accuracy: 84.84239959716797
Epoch 3, Time 0:00:04.910434, Loss: 0.45549508929252625, Accuracy: 86.5472183227539
Epoch 4, Time 0:00:04.962711, Loss: 0.46380290389060974, Accuracy: 86.56519317626953
Epoch 5, Time 0:00:04.896364, Loss: 0.4494195878505707, Accuracy: 87.03559875488281
Epoch 6, Time 0:00:04.906170, Loss: 0.44908809661865234, Accuracy: 87.0655517578125
Epoch 7, Time 0:00:04.831342, Loss: 0.4475567638874054, Accuracy: 87.35318756103516
Epoch 8, Time 0:00:04.877182, Loss: 0.43075424432754517, Accuracy: 87.80860137939453
Epoch 9, Time 0:00:04.780779, Loss: 0.46129941940307617, Accuracy: 87.40711975097656
Epoch 10, Time 0:00:04.872363, Loss: 0.4576101303100586, Accuracy: 87.29925537109375
Test Loss: 0.44554996490478516, Test Accuracy: 87.69175720214844
Epoch 1, Time 0:00:05.183386, Loss: 1.1076533794403076, Accuracy: 67.46764373779297
Epoch 2,

In [0]:
# take the whole training set
x_train = train_data_sample.values
x_train = x_train.reshape(len(train_data_sample), 16, 8)
x_train = x_train[..., tf.newaxis]
one_hot_encode_train = pd.get_dummies(train_target_sample.iloc[:,0])
y_train = one_hot_encode_train.values
train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(10000).batch(100)

In [27]:
# train with the best_learning_rate
network = MnistDeepModel()
network_optimizer = tf.keras.optimizers.Adam(learning_rate=best_learning_rate)
train_loop(EPOCHS, train_ds,  network, network_loss, network_optimizer)

Epoch 1, Time 0:00:04.938634, Loss: 1.1317442655563354, Accuracy: 67.10211181640625
Epoch 2, Time 0:00:04.902868, Loss: 0.5084572434425354, Accuracy: 84.50083923339844
Epoch 3, Time 0:00:04.865430, Loss: 0.39394670724868774, Accuracy: 87.55992126464844
Epoch 4, Time 0:00:04.947098, Loss: 0.3345670998096466, Accuracy: 89.30668640136719
Epoch 5, Time 0:00:04.996534, Loss: 0.28956833481788635, Accuracy: 90.48717498779297
Epoch 6, Time 0:00:04.936914, Loss: 0.26194432377815247, Accuracy: 91.30812072753906
Epoch 7, Time 0:00:04.907091, Loss: 0.24254223704338074, Accuracy: 91.9463119506836
Epoch 8, Time 0:00:04.870573, Loss: 0.2191823571920395, Accuracy: 92.6174545288086
Epoch 9, Time 0:00:04.902683, Loss: 0.20302428305149078, Accuracy: 93.06986999511719
Epoch 10, Time 0:00:04.931508, Loss: 0.18772661685943604, Accuracy: 93.4264144897461


In [28]:
# test with the best_learning_rate
y_pred = test_loop(test_ds, network, network_loss)
y_pred = y_pred[0]

Test Loss: 0.31525084376335144, Test Accuracy: 90.2684555053711


In [0]:
# reconvert in label the predictions
predictions_labeled = ['g'] # list of predictions, init with the first one
converter = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
for i in range(0, len(y_pred)):
  for j in range(0, len(y_pred[i])):
    prediction = np.argmax(y_pred[i][j,:])
    predictions_labeled.append(converter[prediction])

In [0]:
# export the predictions
data = pd.DataFrame(predictions_labeled)
data.to_csv("data.csv", index=False, header=False)