# Critical Bug Found

There is a bug where gradients aren't applied the same way when using custom fit function. This could be causing the main struggles with MNIST

In [2]:
# Import Tensorflow and Keras layers
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras import Model

import math
import numpy as np
import random
import sys

from utils import idx_load

___

## Dataset

In [4]:
# Training images
training_images = idx_load("datasets/mnist/train-images.idx3-ubyte")
training_images.shape

(60000, 28, 28)

In [5]:
# Training labels
training_labels = idx_load("datasets/mnist/train-labels.idx1-ubyte")
training_labels.shape

(60000,)

In [6]:
# Normalize the datasets
training_images = training_images.reshape(len(training_images), 28*28) / 255.0

___

## Bugged Model

This model is a very minimal implementation of the basic training pattern. This reproduces a critical bug that is preventing the NTask model from properly learning MNIST data

In [41]:
class BugModel(Model):
    
    def train_step(self, data):

        x, y = data
        
        # Forward pass and apply gradients
        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)
            loss = self.loss(y, y_pred)
            gradients = tape.gradient(loss, self.trainable_variables)
            self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
            
        # Metrics
        for metric in self.metrics:
            metric.update_state(y, y_pred)
        return {m.name: m.result() for m in self.metrics}
    
    
    def fit(self, x_train, y_train, epochs=1, verbose=0, batch_size=32, use_real_fit=True):
        if use_real_fit:
            super(BugModel, self).fit(x_train, y_train, epochs=epochs, verbose=verbose, batch_size=batch_size)
        else:
            for epoch in range(epochs):
                for batch in range(0, len(y_train), batch_size):
                    x = x_train[batch:batch+batch_size]
                    y = y_train[batch:batch+batch_size]
                    self.train_step((x, y))

___

### The Model

In [51]:
def test(x_train, y_train, epochs=1, batch_size=32, verbose=1, use_real_fit=True, seed=5):
    # Set the random seed for all used libraries
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
    # Create the model
    inp = Input((28*28,))
    x = Dense(128, activation="relu")(inp)
    x = Dense(1, activation="sigmoid")(x)
    model = BugModel(inputs=inp, outputs=x)
    
    # Compile the model
    model.compile(
        loss=tf.keras.losses.BinaryCrossentropy(),
        optimizer=tf.keras.optimizers.SGD(1e-4)
    )
    
    # Train the model
    model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, use_real_fit=use_real_fit, verbose=verbose)
    
    # Calculate and display the accuracy
    result = (np.round(model(x_train)).astype(int).flatten() == y_train).sum()
    print(f"{result}/{len(y_train)}; Accuracy: {100*result/len(y_train):.2f}%")

___

### The Task

In [52]:
# MNIST number is even
x_train = training_images
y_train = np.array([int(i % 2 == 0) for i in training_labels])

In [53]:
# Verify on the first 10 the dataset seems correct...
print(training_labels[:10])
print(y_train[:10])

[5 0 4 1 9 2 1 3 1 4]
[0 1 1 0 0 1 0 0 0 1]


In [58]:
test(x_train, y_train, epochs=10, batch_size=64, verbose=0, use_real_fit=True)

48394/60000; Accuracy: 80.66%


In [57]:
test(x_train, y_train, epochs=10, batch_size=64, verbose=0, use_real_fit=False)

30606/60000; Accuracy: 51.01%
