# Download and create dataset for training

### NSIT Dataset. Download link: https://s3.amazonaws.com/nist-srd/SD19/by_class.zip

In [4]:
import os
import json
import config

In [5]:
# Selecting images from JSON file for training
json_file_path = "../training_dataset_NSIT.json"
f = open(json_file_path)
NSIT_data = json.load(f)

In [6]:
# Path to the NSIT dataset
dataset_path = os.path.expanduser('~')+"/Downloads/by_class/"

In [7]:
# Dictionary of class/ digits and the folder used for training
digit_folder = {"0":"30/hsf_0/", "1":"31/hsf_0/", "2":"32/hsf_0/", "3":"33/hsf_0/", "4":"34/hsf_0/", "5":"35/hsf_0/", "6":"36/hsf_0/", "7":"37/hsf_0/", "8":"38/hsf_0/", "9":"39/hsf_0/"}

In [None]:
# To create folders/classes (0-9) with the images converted to gray background and resizing it to 28*28
for digit, folder in digit_folder.items():
    os.mkdir(os.path.expanduser('~')+"/Downloads/dataset/"+digit)
    for image in NSIT_data[digit]:
        image_name = image.split('/')[0]
        im = Image.open(dataset_path+folder+image).convert('L')
        im = im.resize((28,28))
        bw = im.point(lambda x: 0 if x<128 else 255)
        result = ImageOps.colorize(bw, (0,0,0), (174, 181, 174))
        im1 = result.save(os.path.expanduser('~')+"/Downloads/dataset/"+digit+"/"+image_name+".jpg")
print('Created classes 0 - 9')

In [None]:
# To create background class (10)
i =0
os.mkdir(os.path.expanduser('~')+'/Downloads/dataset/10')
while i < 3000: 
    new = Image.new(mode='RGB', size = (28,28), color = (174, 181, 174))
    new = new.resize((28,28))
    new.save(os.path.expanduser('~')+'/Downloads/dataset/10/'+str(i)+'.jpg')
    i+=1
print('Created class 10')

### Existing dataset. Download link: https://drive.google.com/file/d/1bTjKBzN-QsCnrQxznRj-b3Szt8_mLKdL/view?usp=share_link

In [None]:
from zipfile import ZipFile
import os
os.mkdir(os.path.expanduser('~')+'/Downloads/printed_digits_v1')
with ZipFile(os.path.expanduser('~')+'/Downloads/printed_digits_v1.zip', 'r') as zipObj:
   # Extract all the contents of zip file in current directory
   zipObj.extractall(path= os.path.expanduser('~')+'/Downloads/printed_digits_v1')
print('Extraction of dataset complete!')

### NOTE: Combine both the above datasets and place the entire training dataset in ../data/raw

# Training of the model starts here...

In [None]:
# All parameters for training are set in config.py. Please change it as per requirements
from train import main
main()

# Prediction using .h5 model

In [None]:
from predict import pred_using_h5_digit, pred_using_tflite_model

In [None]:
# Trained model path and test data path
model = tf.keras.models.load_model('../models/pre-trained_model/trained_resnet_model_v2_10.h5')

In [None]:
# To get the accuracy on test data and the number of misclassifications
path = '../data/test/0/*'
result, accuracy = pred_using_h5_digit(model, path)
print("Accuracy on test dataset using .h5 model>>>>",accuracy)

### Conversion of .h5 model to .tflite model

In [None]:
# Parameters for path are set in config.py. Please change it as per requirements 
h5_model_path = config.H5_MODEL_PATH
model = tf.keras.models.load_model(h5_model_path)
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
open(config.TF_LITE_SAVE_PATH, "wb").write(tflite_model)

# Prediction using .tflite model

In [None]:
# Trained model path and test data path
model = '../models/tflite_model/trained_resnet_model_v2_10.tflite'

In [None]:
# To get the accuracy on test data and the number of misclassifications
path = '../data/test/0/*'
result, accuracy = pred_using_tflite_model(model, path)
print("Accuracy on test dataset using .tflite model", accuracy)