# Download and create dataset for training

### NSIT Dataset. Download link: https://s3.amazonaws.com/nist-srd/SD19/by_class.zip

In [5]:
import sys
import os
import json
sys.path.append(os.path.realpath('../src'))
import config
import base64
from io import BytesIO
from PIL import Image, ImageOps

In [7]:
# To load the 1st version (0.1) of iteration 3.0 training
NIST_data_iter3_0 = [json.loads(line) for line in open('../training_samples_v01_iter3_0.json', 'r')]

In [8]:
# To load the 2nd version (0.2) of iteration 3.1 training
NIST_data_iter3_1 = [json.loads(line) for line in open('../training_samples_v02_iter3_1.json', 'r')]

## To form the training dataset for the 1st version (0.1) of iteration 3.0 training

In [None]:
# Path to the NSIT dataset
dataset_path = os.path.expanduser('~')+"/Downloads/by_class/"

In [None]:
# To create folders/classes (0-9) with the images converted to gray background and resizing it to 28*28
path = os.path.expanduser('~')+"/Downloads/"
if not os.path.exists(path+'NIST_dataset_iter3_0/'):
    os.mkdir(path+"NIST_dataset_iter3_0/")
for digit, folder in NIST_data_iter3_0[2]['NIST_class_mapping'].items():
    if not os.path.exists(path+'NIST_dataset_iter3_0/'+digit):
        os.mkdir(path+'NIST_dataset_iter3_0/'+digit)
    for image in NIST_data_iter3_0[3]["NIST_data"][digit]:
        image_name = image.split('/')[0]
        image_jpg = image_name.split('.')[0]
        im = Image.open(dataset_path+folder+image).convert('L')
        im = im.resize((28,28))
        bw = im.point(lambda x: 0 if x<128 else 255)
        result = ImageOps.colorize(bw, (0,0,0), (174, 181, 174))
        im1 = result.save(path+"NIST_dataset_iter3_0/"+digit+"/"+image_jpg+".jpg")
print('Created classes 0 - 9 at Downloads/NIST_dataset_iter3_0')

In [None]:
# To create background class (10)
i=0
path = os.path.expanduser('~')+"/Downloads/"
if not os.path.exists(path+'NIST_dataset_iter3_0/10'):
    os.mkdir(path+"/NIST_dataset_iter3_0/10")
# os.mkdir(os.path.expanduser('~')+'/Downloads/dataset_iter3_0/10')
while i < 3000: 
    new = Image.new(mode='RGB', size = (28,28), color = (174, 181, 174))
    new = new.resize((28,28))
    new.save(path+'NIST_dataset_iter3_0/10/'+str(i)+'.jpg')
    i+=1
print('Created classes 0 - 9 at Downloads/NIST_dataset_iter3_0')

## To form the training dataset for the 1st version (0.2) of iteration 3.1 training

In [None]:
# To create production dataset
path = os.path.expanduser('~')+"/Downloads/"
if not os.path.exists(path+'production_dataset_iter3_1/'):
    os.mkdir(path+'production_dataset_iter3_1/')
for folder in NIST_data_iter3_1[2]['production_data']:
    if not os.path.exists(path+'production_dataset_iter3_1/'+folder):
        os.mkdir(path+'production_dataset_iter3_1/'+folder)
    for i, b64_string in enumerate(NIST_data_iter3_1[2]['production_data'][folder]):
        im = Image.open(BytesIO(base64.b64decode(b64_string)))
        im = im.resize((28,28))
        im.save(path+'production_dataset_iter3_1/'+folder+'/'+folder+'_'+str(i)+".png",'PNG')

print('Production dataset created at Downloads/production_dataset_iter3_1')

In [None]:
# To create NIST misclassifications dataset
# Create a folder named 'NIST_data_iter3_1' in Downloads where the data would be created
path = os.path.expanduser('~')+"/Downloads/"
if not os.path.exists(path+'NIST_dataset_iter3_1/'):
    os.mkdir(path+"NIST_dataset_iter3_1/")
for digit, folder in NIST_data_iter3_1[3]['NIST_class_mapping'].items():
    if not os.path.exists(path+'NIST_dataset_iter3_1/'+digit):
        os.mkdir(path+"NIST_dataset_iter3_1/"+digit)
    for image in NIST_data_iter3_1[4]["NIST_data"][digit]:
        image_name = image.split('/')[0]
        image_jpg = image_name.split('.')[0]
        im = Image.open(dataset_path+folder+image).convert('L')
        im = im.resize((28,28))
        im.save(path+"NIST_dataset_iter3_1/"+digit+"/"+image_jpg+".jpg")
print('NIST dataset created at Downloads/NIST_dataset_iter3_1')

### Existing dataset. Download link: https://drive.google.com/file/d/1bTjKBzN-QsCnrQxznRj-b3Szt8_mLKdL/view?usp=share_link

In [None]:
from zipfile import ZipFile
import os
os.mkdir(os.path.expanduser('~')+'/Downloads/printed_digits_v1')
with ZipFile(os.path.expanduser('~')+'/Downloads/printed_digits_v1.zip', 'r') as zipObj:
   # Extract all the contents of zip file in current directory
   zipObj.extractall(path= os.path.expanduser('~')+'/Downloads/printed_digits_v1')
print('Extraction of dataset complete!')

### NOTE: Combine both existing dataset and iteration 3.0 training dataset or existing dataset and iteration 3.1 training dataset and place the entire training dataset in ../data/raw

# Training of the model starts here...

In [None]:
# All parameters for training are set in config.py. Please change it as per requirements
from train import main
main()

# Prediction using .h5 model

In [None]:
from predict import pred_using_h5_digit, pred_using_tflite_model

In [None]:
# Trained model path and test data path
model = tf.keras.models.load_model('../models/pre-trained_model/trained_resnet_model_v2_10.h5')

In [None]:
# To get the accuracy on test data and the number of misclassifications
path = '../data/test/0/*'
result, accuracy = pred_using_h5_digit(model, path)
print("Accuracy on test dataset using .h5 model>>>>",accuracy)

### Conversion of .h5 model to .tflite model

In [None]:
# Parameters for path are set in config.py. Please change it as per requirements 
h5_model_path = config.H5_MODEL_PATH
model = tf.keras.models.load_model(h5_model_path)
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
open(config.TF_LITE_SAVE_PATH, "wb").write(tflite_model)

# Prediction using .tflite model

In [None]:
# Trained model path and test data path
model = '../models/tflite_model/trained_resnet_model_v2_10.tflite'

In [None]:
# To get the accuracy on test data and the number of misclassifications
path = '../data/test/0/*'
result, accuracy = pred_using_tflite_model(model, path)
print("Accuracy on test dataset using .tflite model", accuracy)