In [None]:
install = False
if install:
    %pip install nengo_dl
    %pip install --force-reinstall -v nengo==3.2 
    %pip install --force-reinstall -v tensorflow==2.11
    !(mkdir datasets; \
    cd datasets; \
    wget https://biometrics.nist.gov/cs_links/EMNIST/gzip.zip; \
    unzip gzip.zip; cd gzip; gunzip emnist-balanced-train-images-idx3-ubyte.gz; \
    gunzip emnist-balanced-train-labels-idx1-ubyte.gz)

In [None]:
import numpy as np
import tensorflow as tf
import os
import nengo_dl
import matplotlib.pyplot as plt

In [None]:
import struct
from string import ascii_uppercase
from segmentation import segmentoutletters

def read_images(file_path):
    with open(file_path, 'rb') as f:
        _, num_images, rows, cols = struct.unpack(">IIII", f.read(16))
        images = np.fromfile(f, dtype=np.uint8).reshape(num_images, rows, cols)
    return images

def read_labels(file_path):
    with open(file_path, 'rb') as f:
        magic, num_labels = struct.unpack(">II", f.read(8))
        labels = np.fromfile(f, dtype=np.uint8)
    return labels

def read_validation_data(n_steps=30):
    labels = {
            "(BARCODE)0003": "AT_02001/2",
            "(BARCODE)0007": "AT_02001/6",
            "(BARCODE)0015": "AT_02002/4",
            "BG_0005": "BG_01001/5",
            "BG_0012": "BG_01002/2",
            "BG_0014": "BG_01002/4",
        }

    a = list(range(36)) + [1, 1]
    b = list(map(str, range(10))) + list(ascii_uppercase) + ["/", "_"]
    convert_classes = dict(zip(b, a))

    for i, name in zip(range(6), os.listdir("Dane_przyciete")):
        segmentation_output, _ = segmentoutletters(name)
        segmentation_labels = np.array([convert_classes[i] for i in labels[name[:-4]]])
        validation_images = np.vstack((validation_images, segmentation_output)) if i else segmentation_output
        validation_labels = np.vstack((validation_labels, segmentation_labels.reshape((-1, 1)))) if i else segmentation_labels.reshape((-1, 1))
    
    validation_labels = np.tile(validation_labels[:, :, None], (1, n_steps, 1))
    return validation_images, validation_labels

def preprocess(images, labels, n_steps=1, portion=1):
    # choose only a portion of the samples, delete lowercase letters
    indices = []
    for i in range(36):
      idcs = np.where(labels == i)[0][::portion]
      indices += list(idcs)
    images = images[indices, ...]
    images = np.moveaxis(images, 2, 1)
    labels = labels[indices]
    # flatten images
    images = images.reshape((images.shape[0], -1))
    # add time
    images = np.tile(images[:, None, :], (1, n_steps, 1))
    labels = np.tile(labels[:, None, None], (1, n_steps, 1))
    return images, labels

In [None]:

train_images = read_images('datasets/gzip/emnist-balanced-train-images-idx3-ubyte')
train_labels = read_labels('datasets/gzip/emnist-balanced-train-labels-idx1-ubyte')

n_steps = 30
train_images, train_labels = preprocess(train_images, train_labels, 1, 1)
validation_images, validation_labels = read_validation_data(30)
# stack data to be able to evaluate with minibatch_size
validation_images = np.tile(validation_images, (3, 1, 1))
validation_labels = np.tile(validation_labels, (3, 1, 1))

In [None]:
plt.figure(figsize=(12, 4))
for i in range(3):
    plt.subplot(1, 3, i + 1)
    plt.imshow(train_images[i, 0, :].reshape((28, 28)), cmap="gray")
    plt.axis("off")
    plt.title(str(train_labels[i, 0, :]))

In [None]:
plt.figure(figsize=(12, 4))
for i in range(3):
    plt.subplot(1, 3, i + 1)
    plt.imshow(validation_images[i, 0, :].reshape((28, 28)), cmap="gray")
    plt.axis("off")
    plt.title(str(validation_labels[i, 0, :]))

In [None]:
from network import net
model, out_p, out_p_filt = net()

In [None]:
def classification_accuracy(y_true, y_pred):
    return tf.metrics.sparse_categorical_accuracy(y_true[:, -1], y_pred[:, -1])
minibatch_size = 180
sim = nengo_dl.Simulator(model, minibatch_size=minibatch_size)
sim.compile(
        optimizer=tf.optimizers.RMSprop(0.001),
        loss={out_p: tf.losses.SparseCategoricalCrossentropy(from_logits=True)},
        metrics=classification_accuracy
    )

In [None]:
# training loop
max_epochs = 30
patience = 5
history = {'loss' : [], 'val_loss' : [], 'accuracy' : [], "val_accuracy" : []}
RUN_ID = "dropout_123456" # do zmiany przy douczaniu (żeby nienadpisywały się wagi)
prams_dir = "wagi_epoki/"
# sim.load_params(f'wagi_epoki/params_epoch_1_dropout_0')
for epoch in range(max_epochs):
    print(f"Epoch: {epoch}/{max_epochs}")
    stats = sim.fit(train_images, {out_p : train_labels}, epochs=1)
    print("Validation results:")
    val_stats = sim.evaluate(validation_images, {out_p_filt : validation_labels})
    val_loss = val_stats['loss']
    if epoch > 100:
        if history['val_loss'][-patience] < val_loss:
            best_epoch = min(range(epoch-1), key=lambda e: history['val_loss'][e])
            print("Training stopped due to overfitting")
            print(f"Best validation_results at epoch {best_epoch}")
            break
    sim.save_params(prams_dir + f"params_epoch_{epoch}_{RUN_ID}")
    history['loss'].append(stats.history['loss'])
    history['val_loss'].append(val_loss)
    history['accuracy'].append(stats.history['out_p_classification_accuracy'])
    history['val_accuracy'].append(val_stats['out_p_classification_accuracy'])

In [None]:
plt.figure(figsize=(12, 4))
ax = plt.subplot(1, 2, 1)
plt.plot(history['loss'], color='red', marker='.')
plt.plot(history['val_loss'], color='blue', marker='.')
plt.xticks(np.arange(epoch))
ax.legend(["Trainning loss", "Validation loss"])

ax = plt.subplot(1, 2, 2)
plt.plot(history['accuracy'], color='red', marker='.')
plt.plot(history['val_accuracy'], color='blue', marker='.')
plt.xticks(np.arange(epoch))
ax.legend(["Trainning accuracy", "Validation accuracy"])

In [None]:
sim.close()