In [29]:
import numpy as np
import tensorflowjs as tfjs

from keras.datasets import mnist
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical

from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.optimizers import Adam
import builtins
open = builtins.open


In [30]:
label_path = "../website/prisma/seeding/labels.bin"
digits_path = "../website/prisma/seeding/digits.bin"
model_path = "new_model"

In [31]:
def split_data(x: np.ndarray, y: np.ndarray) -> tuple[tuple[np.ndarray, np.ndarray], tuple[np.ndarray, np.ndarray]]:
    i = int(len(y) * 0.8)
    return (x[:i], y[:i]), (x[i:], y[i:])


# Read labels
labels: np.ndarray
with open(label_path, "rb") as f:
    labels = np.frombuffer(f.read(), dtype=np.uint8)

# Read digits
digits: np.ndarray
size = 28*28
with open(digits_path, "rb") as f:
    digit_buffer = np.frombuffer(f.read(), dtype=np.uint8)

    num_images = len(labels)
    digits = np.empty((num_images, 28, 28))

    for n in range(num_images):
        img = digit_buffer[n*size:(n+1)*size]
        img = img.reshape((28, 28))
        digits[n] = img


# Remove some zeroes
num_zeroes = len(labels[np.where(labels == 0)])
# print(num_zeroes)
for i in range(10):
    print(i, len(labels[np.where(labels == i)]))

target_zeroes = num_zeroes // 11

digits_temp = np.empty((num_images - target_zeroes, 28, 28))
labels_temp = np.empty((num_images - target_zeroes))

zeroes = 0
j = 0
for i in range(num_images):
    if (labels[i] == 0):
        if (zeroes >= target_zeroes):
            continue
        zeroes += 1
    labels_temp[j] = labels[i]
    digits_temp[j] = digits[i]
    j += 1

# Split into train and test
(x_train, y_train), (x_test, y_test) = split_data(digits, labels)

x_train = x_train.astype(np.float32) / 255.0
x_test = x_test.astype(np.float32) / 255.0

x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

0 4415
1 315
2 362
3 386
4 372
5 363
6 367
7 374
8 371
9 370


In [32]:
datagen = ImageDataGenerator(
            featurewise_center=False,  # set input mean to 0 over the dataset
            samplewise_center=False,  # set each sample mean to 0
            featurewise_std_normalization=False,  # divide inputs by std of the dataset
            samplewise_std_normalization=False,  # divide each input by its std
            zca_whitening=False,  # apply ZCA whitening
            rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
            zoom_range = 0.1, # Randomly zoom image 
            width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
            height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
            horizontal_flip=False,  # randomly flip images
            vertical_flip=False)  # randomly flip images

In [33]:
datagen.fit(x_train)

In [34]:
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

In [35]:
model = Sequential()

model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', strides=1, padding='same', data_format='channels_last',
                 input_shape=(28,28,1)))
model.add(BatchNormalization())
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', strides=1, padding='same', data_format='channels_last'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2), strides=2, padding='valid' ))
model.add(Dropout(0.25))

model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', strides=1, padding='same', data_format='channels_last'))
model.add(BatchNormalization())
model.add(Conv2D(filters=64, kernel_size=(3, 3), strides=1, padding='same', activation='relu', data_format='channels_last'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2), padding='valid', strides=2))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Dense(1024, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

In [36]:
optimizer = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)
model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])
model.summary()



Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 28, 28, 32)        320       
                                                                 
 batch_normalization_6 (Bat  (None, 28, 28, 32)        128       
 chNormalization)                                                
                                                                 
 conv2d_5 (Conv2D)           (None, 28, 28, 32)        9248      
                                                                 
 batch_normalization_7 (Bat  (None, 28, 28, 32)        128       
 chNormalization)                                                
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 14, 14, 32)        0         
 g2D)                                                            
                                                      

In [37]:
batch_size = 64
epochs = 20

In [38]:
model.fit(datagen.flow(x_train, y_train, batch_size = batch_size), epochs = epochs, 
                              validation_data = (x_test, y_test), verbose=1, 
                              steps_per_epoch=x_train.shape[0] // batch_size)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x311951f10>

In [39]:
tfjs.converters.save_keras_model(model, "./models/tfjs_model")