# Main notebook #
The notebook below contains data corresponding to the neural network, as well as an interactive test of the neural network.

In [None]:
from matplotlib import pyplot as plot
import matplotlib as mpl
import numpy as np
from PIL import Image
import sqlite3
import keras 
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten 
from keras.layers import Conv2D, MaxPooling2D 
from keras import backend as K 
from keras.utils import plot_model
from keras.optimizers import Adam
from ipywidgets import ColorPicker, HBox, link, Button, Output, Dropdown, BoundedIntText, Output, FileUpload
from ipycanvas import Canvas as RoughCanvas, hold_canvas
from io import BytesIO
import lzma
import os.path

def decompress_db():
    with open('./data/char.db.lz', 'rb') as f:
        cdata = f.read()
    data = lzma.decompress(cdata)
    with open('./data/char.db', 'wb') as f:
        f.write(data)

if not os.path.exists('./data/char.db'):
    decompress_db()

def load_train_data(cat=None, count=-1):
    conn = sqlite3.connect("./data/char.db")
    cur = conn.cursor()
    if cat is None:
        if count < 0:
            cur = cur.execute("SELECT data, cat FROM characters WHERE is_test=0")
        else:
            cur = cur.execute("SELECT data, cat FROM characters WHERE is_test=0 LIMIT ?", (count,))
    else:
        cats = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
        cat_table = {cats[i]: i for i in range(len(cats))}
        catid = cat_table[cat]
        if count < 0:
            cur = cur.execute("SELECT data, cat FROM characters WHERE is_test=0 AND cat=?", (catid,))
        else:
            cur = cur.execute("SELECT data, cat FROM characters WHERE is_test=0 AND cat=? LIMIT ?", (catid, count))
    out = cur.fetchall()
    cur.close()
    conn.close()
    out_x = np.array([[x for x in n[0]] for n in out])
    out_x = out_x.reshape((out_x.shape[0], 28, 28, 1)).astype('float32')/255
    out_y = np.array([[1.0 if out[i][1] == j else 0.0 for j in range(36)] for i in range(len(out))])
    return out_x, out_y

def load_test_data(cat=None, count=-1):
    conn = sqlite3.connect("./data/char.db")
    cur = conn.cursor()
    if cat is None:
        if count < 0:
            cur = cur.execute("SELECT data, cat FROM characters WHERE is_test=1")
        else:
            cur = cur.execute("SELECT data, cat FROM characters WHERE is_test=1 LIMIT ?", (count,))
    else:
        cats = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
        cat_table = {cats[i]: i for i in range(len(cats))}
        catid = cat_table[cat]
        if count < 0:
            cur = cur.execute("SELECT data, cat FROM characters WHERE is_test=1 AND cat=?", (catid,))
        else:
            cur = cur.execute("SELECT data, cat FROM characters WHERE is_test=1 AND cat=? LIMIT ?", (catid, count))
    out = cur.fetchall()
    cur.close()
    conn.close()
    out_x = np.array([[x for x in n[0]] for n in out])
    out_x = out_x.reshape((out_x.shape[0], 28, 28, 1)).astype('float32')/255
    out_y = np.array([[1.0 if out[i][1] == j else 0.0 for j in range(36)] for i in range(len(out))])
    return out_x, out_y

## Data preperation

The following graphs show the distribution of data elements among categories. The images are seperated into two sets: Training and Testing. Testing images account for about 20% of the total number of images for each category.

In [None]:
def show_input_data_count():
    conn = sqlite3.connect("./data/char.db")
    cur = conn.cursor()
    cur = cur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='char_count';")
    if True:
        cur = cur.execute("""
        CREATE TABLE IF NOT EXISTS char_count(
            countid INTEGER PRIMARY KEY AUTOINCREMENT,
            catid INTEGER,
            train_count INTEGER,
            test_count INTEGER,
            FOREIGN KEY(catid) REFERENCES category(catid)
        )
        """)
        cur = cur.execute("""SELECT COUNT(*) FROM char_count""")
        if cur.fetchall()[0][0] == 0:
            for catid in range(36):
                cur = cur.execute("""
                INSERT INTO char_count(catid, train_count, test_count) VALUES(?, 
                    (SELECT COUNT(*) FROM characters WHERE cat=? AND is_test=0), 
                    (SELECT COUNT(*) FROM characters WHERE cat=? AND is_test=1))
        """, (catid, catid, catid))
            conn.commit()
    data_cats = {}
    for catid in range(36):
        cur = cur.execute("""
        SELECT train_count, test_count, catid FROM char_count WHERE catid=?
""", (catid,))
        data_cats['0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'[catid]] = cur.fetchall()[0]
    notest = [x[0] for x in data_cats.values()]
    yestest = [x[1] for x in data_cats.values()]
    fig, ax = plot.subplots()  # Create a figure containing a single axes.
    ax.bar(data_cats.keys(), notest, label='Training')
    ax.bar(data_cats.keys(), yestest, bottom=notest, label='Testing')
    ax.set_xlabel("Category")
    ax.set_ylabel("Image Count")
    ax.legend()
    display(ax)
show_input_data_count()

The cell below allows you to see the training data in a visual form. 

In [None]:
cat_dropdown = Dropdown(options=[c for c in '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'], 
                        value='0', 
                        description='Category:', 
                        disabled=False)
idx_bound_int = BoundedIntText(value=0, min=0, max=1000, description='Index: ', disabled=False)
test_set_display_out = Output()
with test_set_display_out:
    display(Image.fromarray(np.zeros((28, 28), np.int8)).convert("RGB"))
def button_show_click(self: Button):
    global cat_dropdown
    global idx_bound_int
    global test_set_display_out
    c1_imgs, c1_cats = load_test_data(cat_dropdown.value, 1000)
    base = np.zeros((28, 28), np.int8)
    base2 = c1_imgs[idx_bound_int.value].reshape(28, 28)
    for y in range(28):
        for x in range(28):
            base[y][x] = int(255 * base2[y][x])
    with test_set_display_out:
        test_set_display_out.clear_output()
        display(Image.fromarray(base).convert("RGB"))
btn_show = Button(description='Show', disabled=False)
btn_show.on_click(button_show_click)
display(cat_dropdown)
display(idx_bound_int)
display(btn_show)
display(test_set_display_out)

## Model configuration ##

The cell below defines the function which will either retrieve the model from a file, or compile the model as well as set its initial state. 

In [None]:
def train(model, train_x, train_y, test_x, test_y):
    global score
    global cat_scores
    global epoch_count
    model.fit(
        train_x, train_y, 
        batch_size = 128, 
        epochs = 1, 
        verbose = 1, 
        validation_data = (test_x, test_y)
    )

    score = model.evaluate(test_x, test_y, verbose = 1) 

    scores.append(score)
    import sys
    def test_nn(model: keras.Model):
        results = {}
        write = sys.stdout.write
        for c in '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ':
            write(f"Testing {c}...")
            to_test, to_test_cat = load_test_data(c)
            score = model.evaluate(to_test, to_test_cat, verbose = 0) 
            results[c] = score
            write("\r")
        write("\nDone.")
        return results
    cat_scores.append(test_nn(model))
    epoch_count += 1
    model.save("./model")
    import json
    with open('./model_stats.json', 'w') as f:
        json.dump({'epoch_count': epoch_count, 'scores': scores, 'cat_scores': cat_scores}, f)

def init_model(from_file = False):
    global scores
    global cat_scores
    global epoch_count
    if from_file:
        model = load_model('./model')
        import json
        with open('./model_stats.json', 'r') as f:
            json_data = json.load(f)
        epoch_count = json_data['epoch_count']
        scores = json_data['scores']
        cat_scores = json_data['cat_scores']
    else:
        model = Sequential() 
        model.add(Conv2D(32, kernel_size = (3, 3),  
           activation = 'relu', input_shape = (28,28,1))) 
        model.add(Conv2D(64, (3, 3), activation = 'relu')) 
        model.add(MaxPooling2D(pool_size = (2, 2))) 
        model.add(Conv2D(128, (3, 3), activation = 'relu')) 
        model.add(MaxPooling2D(pool_size = (2, 2))) 
        model.add(Conv2D(256, (3, 3), activation = 'relu')) 
        model.add(MaxPooling2D(pool_size = (2, 2))) 
        # model.add(Dropout(0.25))
        model.add(Flatten()) 
        model.add(Dense(96, activation = 'relu')) 
        # model.add(Dropout(0.50)) 
        model.add(Dense(36, activation = 'softmax'))

        scores = []
        cat_scores = []
        epoch_count = 0
        
        model.compile(loss = keras.losses.categorical_crossentropy,metrics=['accuracy'])
    return model

The cell below loads the model. Optionally, it will train one epoch (one pass of the training data) for the neural network. It also records the overall scores and the categorical scores of the neural network.

In [None]:
def train_main():
    global model
    model = init_model(False)
    train_x, train_y = load_train_data()
    test_x, test_y = load_test_data()
    train(model, train_x, train_y, test_x, test_y)
    
def train_preload_main():
    global model
    model = init_model(True)
    train_x, train_y = load_train_data()
    test_x, test_y = load_test_data()
    train(model, train_x, train_y, test_x, test_y)

def test_main():
    global model
    model = init_model(True)

# To train the model, replace the below line with 'train_preload_main()'.
test_main()

## Neural network data ##

In the figure below, we can see that the accuracy value of the neural network increases as the number of epochs increases. The increase in accuracy in approximately logarithmic.

In [None]:
fig, axs = plot.subplots(1, 2)  # Create a figure containing a single axes.
fig.set_figwidth(12)
acc = [n[1] for n in scores]
loss = [n[0] for n in scores]
axs[0].plot([i for i in range(1, len(acc) + 1)], acc)
axs[0].set_xlabel("Epochs")
axs[0].set_ylabel("Accuracy")
axs[1].plot([i for i in range(1, len(loss) + 1)], loss)
axs[1].set_xlabel("Epochs")
axs[1].set_ylabel("Loss")
display(axs[0])
display(axs[1])

Below is a list of accuracy ratings for each category of character. An accuracy rating of 1.00 means a 100% accuracy on 
the test set.

In [None]:
%matplotlib inline
epoch_dropdown = Dropdown(options=[i+1 for i in range(epoch_count)], 
                        value=epoch_count, 
                        description='Epoch number:', 
                        disabled=False)
graph_display_out = Output()
with graph_display_out:
    graph_display_out.clear_output()
    fig, ax = plot.subplots()  # Create a figure containing a single axes.
    ax.bar(cat_scores[epoch_dropdown.value-1].keys(), [x[1] for x in cat_scores[epoch_dropdown.value-1].values()])
    ax.set_ybound(0.50, 1.0)
    ax.set_xlabel("Category")
    ax.set_ylabel("Accuracy")
    plot.show(fig)
def button_gshow_click(self: Button):
    global epoch_dropdown
    global graph_display_out
    with graph_display_out:
        graph_display_out.clear_output()
        fig, ax = plot.subplots()  # Create a figure containing a single axes.
        ax.bar(cat_scores[epoch_dropdown.value-1].keys(), [x[1] for x in cat_scores[epoch_dropdown.value-1].values()])
        ax.set_ybound(0.50, 1.0)
        ax.set_xlabel("Category")
        ax.set_ylabel("Accuracy")
        plot.show(fig)
btn_gshow = Button(description='Show', disabled=False)
btn_gshow.on_click(button_gshow_click)
display(epoch_dropdown)
display(btn_gshow)
display(graph_display_out)

As we can see, the categories "0", "5", and "I" have the lowest accuracy ratings. All of these categories are close in shape to other categories: 
* "0" is very close to "O"; 
* "5" is somewhat close to "S"
* "I" is close to "1", although it does have some distinct features which make it a bit easier to distinguish.

## Test network ##

In the box below, draw a letter or number. On the right will be a list of predictions that the network has predicted the letter or number to be along with the network's confidence in that prediction. 

In [None]:
canvas_scl = 4
canvas = RoughCanvas(width=28*canvas_scl, height=28*canvas_scl, sync_image_data=True)
canvas.scale(canvas_scl)

drawing = False
position = None
shape = []
canvas_data = None

pred_out = Output()

def on_click_predict(this: Button):
    global model
    global canvas_data
    global pred_output
    if canvas_data is not None:
        pred_out.clear_output(wait=True)
        with pred_out:
            input_data = np.array([b for b in Image.fromarray(canvas_data).convert('L').resize((28, 28)).tobytes()]).reshape((1, 28, 28, 1)).astype('float32')/255
            output = model(input_data, training=False)
            for i in list(reversed(np.argsort(output, axis=1)[0]))[:5]:
                print(f'{"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i]}: {output[0][i] * 100:.02f}% confident')

def on_mouse_down(x, y):
    global drawing
    global position
    global shape
    global canvas_data

    drawing = True
    position = (x / canvas_scl, y / canvas_scl)
    shape = [position]


def on_mouse_move(x, y):
    global drawing
    global position
    global shape
    global canvas_data

    if not drawing:
        return

    with hold_canvas():
        canvas.fill_rect(position[0], position[1], 1, 1)
        # canvas.stroke_line(position[0], position[1], x / canvas_scl, y / canvas_scl)

        position = (x / canvas_scl, y / canvas_scl)

    canvas_data = canvas.get_image_data(0, 0, 28, 28)
    
    shape.append(position)


def on_mouse_up(x, y):
    global drawing
    global position
    global shape
    global canvas_data

    drawing = False

    with hold_canvas():
        canvas.fill_rect(position[0], position[1], 1, 1)
        # canvas.stroke_line(position[0], position[1], x / canvas_scl, y / canvas_scl)
        # canvas.fill_polygon(shape)

    canvas_data = canvas.get_image_data(0, 0, 28*canvas_scl, 28*canvas_scl)
    
    on_click_predict(None)
    shape = []


canvas.on_mouse_down(on_mouse_down)
canvas.on_mouse_move(on_mouse_move)
canvas.on_mouse_up(on_mouse_up)

canvas.stroke_style = "#000000"
with hold_canvas():
    canvas.fill_style = "#000000"
    canvas.fill_rect(0, 0, 28*canvas_scl, 28*canvas_scl)
    canvas.fill_style = "#ffffff"
canvas.stroke_style = "#ffffff"

f_upload = FileUpload(accept='image/*', multiple=False)

def on_click_reset(this: Button):
    global canvas
    global canvas_data
    canvas.stroke_style = "#000000"
    with hold_canvas():
        canvas.fill_style = "#000000"
        canvas.fill_rect(0, 0, 28*canvas_scl, 28*canvas_scl)
        canvas.fill_style = "#ffffff"
    canvas.stroke_style = "#ffffff"
    canvas_data = None
    pred_out.clear_output()

def on_file_upload(change: dict):
    global f_upload
    global canvas
    global canvas_data
    if len(f_upload.data) == 0:
        return
    imin = Image.open(BytesIO(f_upload.data[0])).resize((28, 28)).convert("RGB")
    x_data = np.array(imin)
    with hold_canvas():
        canvas.put_image_data(x_data)
#     canvas_data = canvas.get_image_data(0, 0, 28*canvas_scl, 28*canvas_scl)
    canvas_data = x_data
    on_click_predict(None)
    f_upload.value.clear()
    f_upload._counter = 0
    

f_upload.observe(on_file_upload, 'value')
# b_predict = Button(description="Predict", disabled=False)
# b_predict.on_click(on_click_predict)
b_reset = Button(description="Reset", disabled=False)
b_reset.on_click(on_click_reset)
display(HBox((b_reset,f_upload)))
HBox((canvas, pred_out))