## Imports

In [4]:
import numpy as np
from PIL import Image
import matplotlib
matplotlib.use('Agg') # Use non-interactive backend to prevent GUI issues.
import matplotlib.pyplot as plt
import time
import os
from tqdm import tqdm


## Image Processing and Dataset Creation 

In [17]:
class BorderDataset:
    """
    Loads and processes the 50x50 border image, converting it into a dataset
    of normalized coordinates and binary labels.
    """
    NETHERLANDS_ORANGE = np.array([255, 165, 0])
    BELGIUM_PURPLE = np.array([138, 43, 226])

    def __init__(self, image_path: str):
        """
        Initializes the dataset by loading and processing the image.
        """
        self.image_path = image_path
        self.binary_mask = self._create_binary_mask()
        self.height, self.width = self.binary_mask.shape
        self.pixels = self._prepare_pixels()
        print(f"Dataset created from '{self.image_path}' with {len(self.pixels)} points.")

    def _create_binary_mask(self) -> np.ndarray:
        """
        Opens the image and converts it to a 0-1 binary mask.
        """
        try:
            img = Image.open(self.image_path).convert('RGB')
        except FileNotFoundError:
            print(f"Warning: Image file not found. Creating a dummy image at '{self.image_path}'.")
            img = self._create_dummy_image()
        
        img_array = np.array(img)
        dist_to_orange = np.linalg.norm(img_array - self.NETHERLANDS_ORANGE, axis=2)
        dist_to_purple = np.linalg.norm(img_array - self.BELGIUM_PURPLE, axis=2)
        mask = (dist_to_purple < dist_to_orange).astype(int)
        return mask

    def _create_dummy_image(self) -> Image:
        """
        Creates and saves a fallback 50x50 dummy image.
        """
        dummy_array = np.zeros((50, 50, 3), dtype=np.uint8)
        dummy_array[:, :] = self.NETHERLANDS_ORANGE
        dummy_array[10:25, 10:40] = self.BELGIUM_PURPLE
        dummy_array[30:45, 15:35] = self.BELGIUM_PURPLE
        img = Image.fromarray(dummy_array)
        
        directory = os.path.dirname(self.image_path)
        if directory and not os.path.exists(directory):
            os.makedirs(directory)
            
        img.save(self.image_path)
        return img

    def _prepare_pixels(self) -> list:
        """
        Generates a list of ((x, y), label) tuples for each pixel.
        """
        pixel_data = []
        for y in range(self.height):
            for x in range(self.width):
                normalized_x = x / (self.width - 1)
                normalized_y = y / (self.height - 1)
                label = self.binary_mask[y, x]
                pixel_data.append(((normalized_x, normalized_y), label))
        return pixel_data

    def get_shuffled_data(self) -> list:
        """
        Returns a randomly shuffled copy of the dataset.
        """
        shuffled_pixels = self.pixels.copy()
        np.random.shuffle(shuffled_pixels)
        return shuffled_pixels

class ReLU:
    """Rectified Linear Unit activation function."""
    def forward(self, x): return np.maximum(0, x)
    def backward(self, x): return (x > 0).astype(float)

class Sigmoid:
    """Sigmoid activation function."""
    def forward(self, x): return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
    def backward(self, x):
        s = self.forward(x)
        return s * (1 - s)

class Linear:
    """A single fully-connected neural network layer."""
    def __init__(self, input_width, output_width, activation):
        self.weights = np.random.randn(input_width, output_width) * np.sqrt(2. / input_width)
        self.biases = np.zeros((1, output_width))
        self.activation = activation()
        self.input_data, self.z = None, None
        self.grad_weights = np.zeros_like(self.weights)
        self.grad_biases = np.zeros_like(self.biases)

    def forward(self, x):
        self.input_data = x
        self.z = np.dot(x, self.weights) + self.biases
        return self.activation.forward(self.z)

    def backward(self, grad_output):
        delta = grad_output * self.activation.backward(self.z)
        self.grad_weights += np.dot(self.input_data.T, delta)
        self.grad_biases += np.sum(delta, axis=0, keepdims=True)
        return np.dot(delta, self.weights.T)

class BCE:
    """Binary Cross-Entropy loss function."""
    def loss(self, y_true, y_pred):
        y_pred = np.clip(y_pred, 1e-12, 1 - 1e-12)
        return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    
    def backward(self, y_true, y_pred):
        y_pred = np.clip(y_pred, 1e-12, 1 - 1e-12)
        return (y_pred - y_true) / (y_pred * (1 - y_pred)) / y_true.size

class Model:
    """
    The main neural network model class.
    """
    def __init__(self, layers, loss_function='bce'):
        self.layers = layers
        self.loss_fn = BCE()

    def forward(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def backward(self, grad_loss):
        for layer in reversed(self.layers):
            grad_loss = layer.backward(grad_loss)

    def train(self, x, y):
        y_pred = self.forward(x)
        loss = self.loss_fn.loss(y, y_pred)
        self.backward(self.loss_fn.backward(y, y_pred))
        return loss

    def predict(self, x):
        return self.forward(x)

    def zero_grad(self):
        for layer in self.layers:
            layer.grad_weights.fill(0)
            layer.grad_biases.fill(0)

    def update(self, learning_rate, grad_clip_value=1.0):
        for layer in self.layers:
            np.clip(layer.grad_weights, -grad_clip_value, grad_clip_value, out=layer.grad_weights)
            np.clip(layer.grad_biases, -grad_clip_value, grad_clip_value, out=layer.grad_biases)
            layer.weights -= learning_rate * layer.grad_weights
            layer.biases -= learning_rate * layer.grad_biases
        self.zero_grad()

    def save_to(self, path):
        params = {f'w_{i}': l.weights for i, l in enumerate(self.layers)}
        params.update({f'b_{i}': l.biases for i, l in enumerate(self.layers)})
        np.savez(path, **params)
        print(f"Model saved to {path}")

    def load_from(self, path):
        data = np.load(path)
        for i, layer in enumerate(self.layers):
            if layer.weights.shape != data[f'w_{i}'].shape or layer.biases.shape != data[f'b_{i}'].shape:
                raise ValueError(f"Architecture mismatch in layer {i}.")
            layer.weights, layer.biases = data[f'w_{i}'], data[f'b_{i}']
        print(f"Model loaded from {path}")

def positional_encoding(coords, num_frequencies):
    """
    Encodes coordinates to a higher dimension using sine and cosine functions.
    """
    frequencies = 2**np.linspace(0, num_frequencies-1, num_frequencies)
    encoded_coords = []
    for coord in coords.T: 
        sines = np.sin(2 * np.pi * coord[:, None] * frequencies)
        cosines = np.cos(2 * np.pi * coord[:, None] * frequencies)
        encoded_coords.extend([sines, cosines])
    return np.hstack(encoded_coords)

def plot_final_summary(history, model, dataset, run_folder, username, num_frequencies):
    """
    Generates and saves a single, consolidated plot.
    """
    print("Generating final summary plot...")
    plt.close('all') 
    
    h, w = dataset.height, dataset.width
    x_coords = np.linspace(0, 1, w)
    y_coords = np.linspace(0, 1, h)
    grid_x, grid_y = np.meshgrid(x_coords, y_coords)
    all_coords_raw = np.vstack([grid_x.ravel(), grid_y.ravel()]).T
    
    all_coords_encoded = positional_encoding(all_coords_raw, num_frequencies)
    
    predictions_raw = model.predict(all_coords_encoded)
    prediction_map = (predictions_raw > 0.5).astype(int).reshape((h, w))
    ground_truth_map = dataset.binary_mask
    error_map = np.abs(ground_truth_map - prediction_map)

    fig = plt.figure(figsize=(20, 12))
    gs = fig.add_gridspec(2, 3, height_ratios=[1, 1.2])
    fig.suptitle(f'Training and Prediction Summary - {username}', fontsize=20)

    ax_loss = fig.add_subplot(gs[0, 0])
    ax_loss.plot(history['loss'])
    ax_loss.set_title("Average Loss vs. Epochs")
    ax_loss.set_xlabel("Epoch"); ax_loss.set_ylabel("Loss"); ax_loss.grid(True)

    ax_acc = fig.add_subplot(gs[0, 1])
    ax_acc.plot(history['accuracy'])
    ax_acc.set_title("Accuracy vs. Epochs")
    ax_acc.set_xlabel("Epoch"); ax_acc.set_ylabel("Accuracy"); ax_acc.grid(True)
    
    ax_stats = fig.add_subplot(gs[0, 2])
    final_acc = history['accuracy'][-1]
    final_loss = history['loss'][-1]
    total_epochs = len(history['loss'])
    stats_text = (f"Final Accuracy: {final_acc:.4f}\n\n"
                  f"Final Loss: {final_loss:.4f}\n\n"
                  f"Total Epochs: {total_epochs}")
    ax_stats.text(0.5, 0.5, stats_text, ha='center', va='center', fontsize=14)
    ax_stats.axis('off')
    ax_stats.set_title("Final Stats")

    ax_gt = fig.add_subplot(gs[1, 0])
    ax_gt.imshow(ground_truth_map, cmap='viridis', interpolation='nearest')
    ax_gt.set_title("Ground Truth"); ax_gt.axis('off')

    ax_pred = fig.add_subplot(gs[1, 1])
    ax_pred.imshow(prediction_map, cmap='viridis', interpolation='nearest')
    ax_pred.set_title("Model Prediction"); ax_pred.axis('off')

    ax_err = fig.add_subplot(gs[1, 2])
    ax_err.imshow(error_map, cmap='hot', interpolation='nearest')
    ax_err.set_title("Error Map"); ax_err.axis('off')

    plt.tight_layout(rect=[0, 0.03, 1, 0.96])
    
    plot_path = os.path.join(run_folder, "final_summary.png")
    plt.savefig(plot_path)
    print(f"Plot saved to file: {plot_path}")
    
    plt.show() 
    plt.close(fig) 


def training_procedure(model, dataset, epochs, batch_size, initial_learning_rate, patience, username, num_frequencies, is_silent=False):
    """
    Handles the main training loop for all tasks.
    """
    data = dataset if isinstance(dataset, list) else dataset.get_shuffled_data()
    X_raw = np.array([item[0] for item in data])
    Y = np.array([item[1] for item in data]).reshape(-1, 1)
    
    X_encoded = positional_encoding(X_raw, num_frequencies)
    n_samples = len(data)
    
    history = {'loss': [], 'accuracy': []}
    
    timestamp = time.strftime("%Y%m%d-%H%M%S")
    run_folder = f"runs/{timestamp}"
    os.makedirs(run_folder, exist_ok=True)
    if not is_silent:
        print(f"Saving results to {run_folder}")

    learning_rate = initial_learning_rate
    for epoch in range(epochs):
        if epoch > 0 and epoch % 100 == 0:
            learning_rate /= 2
            if not is_silent:
                print(f"\nLearning rate decayed to {learning_rate}\n")

        epoch_loss = 0
        permutation = np.random.permutation(n_samples)
        X_shuffled, Y_shuffled = X_encoded[permutation], Y[permutation]
        
        batch_iterator = range(0, n_samples, batch_size)
        if not is_silent:
            batch_iterator = tqdm(batch_iterator, desc=f"Epoch {epoch+1}/{epochs} (LR: {learning_rate})")

        for i in batch_iterator:
            batch_X = X_shuffled[i:i+batch_size]
            batch_Y = Y_shuffled[i:i+batch_size]
            loss = model.train(batch_X, batch_Y)
            model.update(learning_rate)
            epoch_loss += loss * batch_X.shape[0]
            if not is_silent:
                batch_iterator.set_postfix(loss=f"{loss:.4f}")
        
        avg_epoch_loss = epoch_loss / n_samples
        
        if np.isnan(avg_epoch_loss):
            print("\nCRITICAL ERROR: Loss has become NaN. Training cannot continue.")
            return None

        y_pred_full = model.predict(X_encoded)
        predictions = (y_pred_full > 0.5).astype(int)
        accuracy = np.mean(predictions == Y)
        
        history['loss'].append(avg_epoch_loss)
        history['accuracy'].append(accuracy)
        
        if not is_silent:
            if (epoch + 1) % 5 == 0 or epoch == epochs - 1:
                print(f"Epoch {epoch+1}/{epochs} | Avg Loss: {avg_epoch_loss:.4f} | Accuracy: {accuracy:.4f}")

        if epoch >= patience:
            if history['loss'][-1] >= (1.0 - 0.005) * history['loss'][-1 - patience]:
                if not is_silent:
                    print(f"Early stopping triggered at epoch {epoch+1}.")
                    if (epoch + 1) % 5 != 0:
                        print(f"Final Epoch {epoch+1}/{epochs} | Avg Loss: {avg_epoch_loss:.4f} | Accuracy: {accuracy:.4f}")
                break
    
    if not is_silent:
        print("\nTraining complete. Saving model...")
        model.save_to(os.path.join(run_folder, "final_model.npz"))
        print("Model saved. Now preparing final summary plot...")
        if not isinstance(dataset, list):
            plot_final_summary(history, model, dataset, run_folder, username, num_frequencies)

    return history

# This code will run when you execute the cell.
# It creates the border_dataset object in the main scope of the notebook,
# so it will be available for the next cell to use.

print("Executing Section 1.1: Dataset Creation")
image_file = "Dataset/border.png"
border_dataset = BorderDataset(image_file)

print("\nExecuting Section 1.2: Main Model Training")

num_frequencies = 10
input_dim = 4 * num_frequencies 

model = Model([
    Linear(input_dim, 256, ReLU),
    Linear(256, 256, ReLU),
    Linear(256, 1, Sigmoid)
])

print("Model created. Starting training on border map...")
# The history object is captured here to prevent the notebook from auto-printing it.
history = training_procedure(
    model=model,
    dataset=border_dataset,
    epochs=250, 
    batch_size=128,
    initial_learning_rate=0.01,
    patience=100,
    username="sudershan.sarraf",
    num_frequencies=num_frequencies,
    is_silent=False
)



Executing Section 1.1: Dataset Creation
Dataset created from 'Dataset/border.png' with 2500 points.

Executing Section 1.2: Main Model Training
Model created. Starting training on border map...
Saving results to runs/20251013-175009


Epoch 1/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 119.61it/s, loss=0.4729]
Epoch 2/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 248.47it/s, loss=0.4900]
Epoch 3/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 128.60it/s, loss=0.4701]
Epoch 4/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 224.35it/s, loss=0.4680]
Epoch 5/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 194.72it/s, loss=0.4201]


Epoch 5/250 | Avg Loss: 0.4446 | Accuracy: 0.8004


Epoch 6/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 103.74it/s, loss=0.3226]
Epoch 7/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 198.21it/s, loss=0.4635]
Epoch 8/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 171.52it/s, loss=0.4122]
Epoch 9/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 212.93it/s, loss=0.3992]
Epoch 10/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 215.67it/s, loss=0.3172]


Epoch 10/250 | Avg Loss: 0.3987 | Accuracy: 0.8164


Epoch 11/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 164.68it/s, loss=0.4421]
Epoch 12/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 241.48it/s, loss=0.3959]
Epoch 13/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 182.83it/s, loss=0.4175]
Epoch 14/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 212.02it/s, loss=0.4112]
Epoch 15/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 210.04it/s, loss=0.3997]


Epoch 15/250 | Avg Loss: 0.3712 | Accuracy: 0.8288


Epoch 16/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 188.05it/s, loss=0.3238]
Epoch 17/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 120.87it/s, loss=0.3705]
Epoch 18/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 100.09it/s, loss=0.4412]
Epoch 19/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 64.95it/s, loss=0.3283]
Epoch 20/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 38.99it/s, loss=0.2899]


Epoch 20/250 | Avg Loss: 0.3533 | Accuracy: 0.8400


Epoch 21/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 231.91it/s, loss=0.3651]
Epoch 22/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 168.44it/s, loss=0.3134]
Epoch 23/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 218.67it/s, loss=0.3493]
Epoch 24/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 256.36it/s, loss=0.3758]
Epoch 25/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 236.48it/s, loss=0.3432]


Epoch 25/250 | Avg Loss: 0.3402 | Accuracy: 0.8488


Epoch 26/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 218.99it/s, loss=0.2717]
Epoch 27/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 221.56it/s, loss=0.3948]
Epoch 28/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 334.89it/s, loss=0.2400]
Epoch 29/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 206.74it/s, loss=0.3337]
Epoch 30/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 228.57it/s, loss=0.3313]


Epoch 30/250 | Avg Loss: 0.3295 | Accuracy: 0.8556


Epoch 31/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 204.68it/s, loss=0.2635]
Epoch 32/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 228.98it/s, loss=0.3332]
Epoch 33/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 288.96it/s, loss=0.3000]
Epoch 34/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 210.38it/s, loss=0.3447]
Epoch 35/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 256.21it/s, loss=0.3867]


Epoch 35/250 | Avg Loss: 0.3204 | Accuracy: 0.8588


Epoch 36/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 170.29it/s, loss=0.4026]
Epoch 37/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 239.88it/s, loss=0.2874]
Epoch 38/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 247.46it/s, loss=0.4362]
Epoch 39/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 201.70it/s, loss=0.4223]
Epoch 40/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 237.08it/s, loss=0.3239]


Epoch 40/250 | Avg Loss: 0.3128 | Accuracy: 0.8696


Epoch 41/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 274.53it/s, loss=0.2458]
Epoch 42/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 207.28it/s, loss=0.3532]
Epoch 43/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 264.13it/s, loss=0.2889]
Epoch 44/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 234.96it/s, loss=0.2479]
Epoch 45/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 247.72it/s, loss=0.3198]


Epoch 45/250 | Avg Loss: 0.3055 | Accuracy: 0.8752


Epoch 46/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 217.09it/s, loss=0.3133]
Epoch 47/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 243.23it/s, loss=0.2848]
Epoch 48/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 270.69it/s, loss=0.2364]
Epoch 49/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 223.40it/s, loss=0.2468]
Epoch 50/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 239.41it/s, loss=0.2803]


Epoch 50/250 | Avg Loss: 0.2992 | Accuracy: 0.8728


Epoch 51/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 230.99it/s, loss=0.2816]
Epoch 52/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 261.83it/s, loss=0.1871]
Epoch 53/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 245.76it/s, loss=0.2066]
Epoch 54/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 156.67it/s, loss=0.2955]
Epoch 55/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 261.07it/s, loss=0.3253]


Epoch 55/250 | Avg Loss: 0.2924 | Accuracy: 0.8796


Epoch 56/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 205.09it/s, loss=0.3034]
Epoch 57/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 226.56it/s, loss=0.2602]
Epoch 58/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 196.69it/s, loss=0.3574]
Epoch 59/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 166.44it/s, loss=0.2842]
Epoch 60/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 251.37it/s, loss=0.3792]


Epoch 60/250 | Avg Loss: 0.2863 | Accuracy: 0.8828


Epoch 61/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 234.02it/s, loss=0.1837]
Epoch 62/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 230.32it/s, loss=0.1896]
Epoch 63/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 262.36it/s, loss=0.2632]
Epoch 64/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 216.23it/s, loss=0.2794]
Epoch 65/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 257.41it/s, loss=0.2460]


Epoch 65/250 | Avg Loss: 0.2802 | Accuracy: 0.8876


Epoch 66/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 209.08it/s, loss=0.2203]
Epoch 67/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 206.79it/s, loss=0.2772]
Epoch 68/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 216.64it/s, loss=0.2815]
Epoch 69/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 212.53it/s, loss=0.2594]
Epoch 70/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 251.12it/s, loss=0.2898]


Epoch 70/250 | Avg Loss: 0.2743 | Accuracy: 0.8896


Epoch 71/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 133.91it/s, loss=0.1842]
Epoch 72/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 243.77it/s, loss=0.2874]
Epoch 73/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 247.43it/s, loss=0.2471]
Epoch 74/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 233.06it/s, loss=0.2427]
Epoch 75/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 241.08it/s, loss=0.3260]


Epoch 75/250 | Avg Loss: 0.2685 | Accuracy: 0.8956


Epoch 76/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 202.07it/s, loss=0.2845]
Epoch 77/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 220.27it/s, loss=0.3482]
Epoch 78/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 211.35it/s, loss=0.2103]
Epoch 79/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 178.53it/s, loss=0.2982]
Epoch 80/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 211.77it/s, loss=0.2859]


Epoch 80/250 | Avg Loss: 0.2629 | Accuracy: 0.8956


Epoch 81/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 212.55it/s, loss=0.2836]
Epoch 82/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 223.53it/s, loss=0.2286]
Epoch 83/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 224.51it/s, loss=0.2837]
Epoch 84/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 279.39it/s, loss=0.3053]
Epoch 85/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 233.85it/s, loss=0.2849]


Epoch 85/250 | Avg Loss: 0.2573 | Accuracy: 0.8996


Epoch 86/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 212.30it/s, loss=0.2878]
Epoch 87/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 233.47it/s, loss=0.2008]
Epoch 88/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 210.18it/s, loss=0.2962]
Epoch 89/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 158.12it/s, loss=0.2645]
Epoch 90/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 214.61it/s, loss=0.2825]


Epoch 90/250 | Avg Loss: 0.2520 | Accuracy: 0.9024


Epoch 91/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 184.35it/s, loss=0.2498]
Epoch 92/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 224.34it/s, loss=0.2163]
Epoch 93/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 244.83it/s, loss=0.3193]
Epoch 94/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 216.67it/s, loss=0.2842]
Epoch 95/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 189.60it/s, loss=0.2761]


Epoch 95/250 | Avg Loss: 0.2463 | Accuracy: 0.9052


Epoch 96/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 196.83it/s, loss=0.2860]
Epoch 97/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 210.70it/s, loss=0.2048]
Epoch 98/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 163.99it/s, loss=0.2593]
Epoch 99/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 224.96it/s, loss=0.1926]
Epoch 100/250 (LR: 0.01): 100%|██████████| 20/20 [00:00<00:00, 239.38it/s, loss=0.1725]


Epoch 100/250 | Avg Loss: 0.2413 | Accuracy: 0.9088

Learning rate decayed to 0.005



Epoch 101/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 195.39it/s, loss=0.1869]
Epoch 102/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 210.74it/s, loss=0.1936]
Epoch 103/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 226.33it/s, loss=0.2099]
Epoch 104/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 211.26it/s, loss=0.2357]
Epoch 105/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 119.63it/s, loss=0.2198]


Epoch 105/250 | Avg Loss: 0.2377 | Accuracy: 0.9100


Epoch 106/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 192.64it/s, loss=0.2682]
Epoch 107/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 223.04it/s, loss=0.1456]
Epoch 108/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 244.07it/s, loss=0.2448]
Epoch 109/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 191.66it/s, loss=0.1311]
Epoch 110/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 219.73it/s, loss=0.2568]


Epoch 110/250 | Avg Loss: 0.2352 | Accuracy: 0.9120


Epoch 111/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 205.93it/s, loss=0.2906]
Epoch 112/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 204.70it/s, loss=0.2939]
Epoch 113/250 (LR: 0.005): 100%|██████████| 20/20 [-1:59:58<00:00, -8.36it/s, loss=0.1444]
Epoch 114/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 202.89it/s, loss=0.2713]
Epoch 115/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 204.90it/s, loss=0.2248]


Epoch 115/250 | Avg Loss: 0.2328 | Accuracy: 0.9140


Epoch 116/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 181.57it/s, loss=0.2248]
Epoch 117/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 210.28it/s, loss=0.2150]
Epoch 118/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 226.32it/s, loss=0.2847]
Epoch 119/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 184.63it/s, loss=0.2716]
Epoch 120/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 257.31it/s, loss=0.1931]


Epoch 120/250 | Avg Loss: 0.2300 | Accuracy: 0.9156


Epoch 121/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 258.09it/s, loss=0.2659]
Epoch 122/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 145.12it/s, loss=0.2231]
Epoch 123/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 237.18it/s, loss=0.2764]
Epoch 124/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 190.44it/s, loss=0.2858]
Epoch 125/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 178.34it/s, loss=0.2098]


Epoch 125/250 | Avg Loss: 0.2275 | Accuracy: 0.9192


Epoch 126/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 185.37it/s, loss=0.2497]
Epoch 127/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 231.05it/s, loss=0.2617]
Epoch 128/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 203.26it/s, loss=0.1911]
Epoch 129/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 183.13it/s, loss=0.2231]
Epoch 130/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 239.10it/s, loss=0.2310]


Epoch 130/250 | Avg Loss: 0.2251 | Accuracy: 0.9168


Epoch 131/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 205.39it/s, loss=0.2063]
Epoch 132/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 273.76it/s, loss=0.2278]
Epoch 133/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 234.25it/s, loss=0.2033]
Epoch 134/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 206.26it/s, loss=0.2438]
Epoch 135/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 186.09it/s, loss=0.1893]


Epoch 135/250 | Avg Loss: 0.2227 | Accuracy: 0.9196


Epoch 136/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 223.24it/s, loss=0.1938]
Epoch 137/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 105.28it/s, loss=0.1485]
Epoch 138/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 251.93it/s, loss=0.1524]
Epoch 139/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 153.67it/s, loss=0.1543]
Epoch 140/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 238.26it/s, loss=0.3078]


Epoch 140/250 | Avg Loss: 0.2202 | Accuracy: 0.9208


Epoch 141/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 225.62it/s, loss=0.1767]
Epoch 142/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 226.10it/s, loss=0.1872]
Epoch 143/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 114.65it/s, loss=0.1526]
Epoch 144/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 194.49it/s, loss=0.2140]
Epoch 145/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 170.12it/s, loss=0.1628]


Epoch 145/250 | Avg Loss: 0.2177 | Accuracy: 0.9224


Epoch 146/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 209.48it/s, loss=0.2047]
Epoch 147/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 227.95it/s, loss=0.1885]
Epoch 148/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 215.12it/s, loss=0.1350]
Epoch 149/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 201.26it/s, loss=0.3558]
Epoch 150/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 207.96it/s, loss=0.2993]


Epoch 150/250 | Avg Loss: 0.2153 | Accuracy: 0.9240


Epoch 151/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 161.68it/s, loss=0.2749]
Epoch 152/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 188.11it/s, loss=0.1387]
Epoch 153/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 223.82it/s, loss=0.2113]
Epoch 154/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 143.99it/s, loss=0.2573]
Epoch 155/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 201.37it/s, loss=0.2127]


Epoch 155/250 | Avg Loss: 0.2130 | Accuracy: 0.9248


Epoch 156/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 189.75it/s, loss=0.2023]
Epoch 157/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 211.43it/s, loss=0.2018]
Epoch 158/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 239.09it/s, loss=0.1258]
Epoch 159/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 211.16it/s, loss=0.2696]
Epoch 160/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 216.45it/s, loss=0.2022]


Epoch 160/250 | Avg Loss: 0.2105 | Accuracy: 0.9236


Epoch 161/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 187.65it/s, loss=0.2814]
Epoch 162/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 255.06it/s, loss=0.1662]
Epoch 163/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 141.37it/s, loss=0.2603]
Epoch 164/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 194.19it/s, loss=0.3104]
Epoch 165/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 225.97it/s, loss=0.1852]


Epoch 165/250 | Avg Loss: 0.2083 | Accuracy: 0.9272


Epoch 166/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 216.54it/s, loss=0.1592]
Epoch 167/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 227.21it/s, loss=0.2184]
Epoch 168/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 203.51it/s, loss=0.2036]
Epoch 169/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 180.86it/s, loss=0.1858]
Epoch 170/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 195.62it/s, loss=0.1385]


Epoch 170/250 | Avg Loss: 0.2059 | Accuracy: 0.9264


Epoch 171/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 151.78it/s, loss=0.2645]
Epoch 172/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 243.60it/s, loss=0.2440]
Epoch 173/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 221.52it/s, loss=0.1759]
Epoch 174/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 221.99it/s, loss=0.1757]
Epoch 175/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 227.84it/s, loss=0.2384]


Epoch 175/250 | Avg Loss: 0.2036 | Accuracy: 0.9288


Epoch 176/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 197.38it/s, loss=0.1698]
Epoch 177/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 200.23it/s, loss=0.2094]
Epoch 178/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 233.85it/s, loss=0.1821]
Epoch 179/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 221.79it/s, loss=0.2915]
Epoch 180/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 232.14it/s, loss=0.2561]


Epoch 180/250 | Avg Loss: 0.2012 | Accuracy: 0.9300


Epoch 181/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 203.29it/s, loss=0.2985]
Epoch 182/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 205.13it/s, loss=0.1814]
Epoch 183/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 264.16it/s, loss=0.3397]
Epoch 184/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 206.01it/s, loss=0.1473]
Epoch 185/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 241.53it/s, loss=0.2204]


Epoch 185/250 | Avg Loss: 0.1991 | Accuracy: 0.9312


Epoch 186/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 258.96it/s, loss=0.1224]
Epoch 187/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 233.04it/s, loss=0.1878]
Epoch 188/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 141.11it/s, loss=0.2847]
Epoch 189/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 228.40it/s, loss=0.1270]
Epoch 190/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 170.29it/s, loss=0.1567]


Epoch 190/250 | Avg Loss: 0.1968 | Accuracy: 0.9320


Epoch 191/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 175.41it/s, loss=0.2000]
Epoch 192/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 224.37it/s, loss=0.1669]
Epoch 193/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 209.53it/s, loss=0.1722]
Epoch 194/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 256.72it/s, loss=0.1942]
Epoch 195/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 242.80it/s, loss=0.2048]


Epoch 195/250 | Avg Loss: 0.1947 | Accuracy: 0.9332


Epoch 196/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 182.34it/s, loss=0.1582]
Epoch 197/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 264.20it/s, loss=0.2320]
Epoch 198/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 243.15it/s, loss=0.1405]
Epoch 199/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 177.65it/s, loss=0.1670]
Epoch 200/250 (LR: 0.005): 100%|██████████| 20/20 [00:00<00:00, 229.92it/s, loss=0.3136]


Epoch 200/250 | Avg Loss: 0.1923 | Accuracy: 0.9340

Learning rate decayed to 0.0025



Epoch 201/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 200.75it/s, loss=0.2903]
Epoch 202/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 223.84it/s, loss=0.1879]
Epoch 203/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 219.83it/s, loss=0.1801]
Epoch 204/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 157.05it/s, loss=0.1720]
Epoch 205/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 241.67it/s, loss=0.2927]


Epoch 205/250 | Avg Loss: 0.1908 | Accuracy: 0.9344


Epoch 206/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 233.61it/s, loss=0.1291]
Epoch 207/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 208.10it/s, loss=0.1958]
Epoch 208/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 226.70it/s, loss=0.2002]
Epoch 209/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 188.20it/s, loss=0.1870]
Epoch 210/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 204.06it/s, loss=0.1743]


Epoch 210/250 | Avg Loss: 0.1897 | Accuracy: 0.9356


Epoch 211/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 179.24it/s, loss=0.1852]
Epoch 212/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 247.62it/s, loss=0.2082]
Epoch 213/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 210.17it/s, loss=0.1704]
Epoch 214/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 229.45it/s, loss=0.2048]
Epoch 215/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 201.17it/s, loss=0.2182]


Epoch 215/250 | Avg Loss: 0.1886 | Accuracy: 0.9368


Epoch 216/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 220.85it/s, loss=0.1888]
Epoch 217/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 249.99it/s, loss=0.2592]
Epoch 218/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 211.96it/s, loss=0.1460]
Epoch 219/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 235.52it/s, loss=0.1752]
Epoch 220/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 239.25it/s, loss=0.1863]


Epoch 220/250 | Avg Loss: 0.1875 | Accuracy: 0.9376


Epoch 221/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 171.37it/s, loss=0.1546]
Epoch 222/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 253.03it/s, loss=0.2340]
Epoch 223/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 240.41it/s, loss=0.2122]
Epoch 224/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 241.48it/s, loss=0.2142]
Epoch 225/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 242.29it/s, loss=0.1896]


Epoch 225/250 | Avg Loss: 0.1864 | Accuracy: 0.9372


Epoch 226/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 198.53it/s, loss=0.1876]
Epoch 227/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 232.05it/s, loss=0.1891]
Epoch 228/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 233.44it/s, loss=0.1892]
Epoch 229/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 211.08it/s, loss=0.1259]
Epoch 230/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 217.24it/s, loss=0.2236]


Epoch 230/250 | Avg Loss: 0.1854 | Accuracy: 0.9384


Epoch 231/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 191.54it/s, loss=0.2166]
Epoch 232/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 285.32it/s, loss=0.1842]
Epoch 233/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 243.86it/s, loss=0.2001]
Epoch 234/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 256.07it/s, loss=0.2016]
Epoch 235/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 204.35it/s, loss=0.1692]


Epoch 235/250 | Avg Loss: 0.1844 | Accuracy: 0.9392


Epoch 236/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 191.72it/s, loss=0.1833]
Epoch 237/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 236.73it/s, loss=0.1398]
Epoch 238/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 202.06it/s, loss=0.2237]
Epoch 239/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 195.38it/s, loss=0.1567]
Epoch 240/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 232.30it/s, loss=0.2034]


Epoch 240/250 | Avg Loss: 0.1834 | Accuracy: 0.9392


Epoch 241/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 197.14it/s, loss=0.1911]
Epoch 242/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 270.70it/s, loss=0.2402]
Epoch 243/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 216.05it/s, loss=0.1597]
Epoch 244/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 225.89it/s, loss=0.1068]
Epoch 245/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 198.73it/s, loss=0.2199]


Epoch 245/250 | Avg Loss: 0.1822 | Accuracy: 0.9404


Epoch 246/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 202.86it/s, loss=0.1411]
Epoch 247/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 212.06it/s, loss=0.2452]
Epoch 248/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 174.33it/s, loss=0.2307]
Epoch 249/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 230.40it/s, loss=0.1682]
Epoch 250/250 (LR: 0.0025): 100%|██████████| 20/20 [00:00<00:00, 202.07it/s, loss=0.1481]


Epoch 250/250 | Avg Loss: 0.1812 | Accuracy: 0.9416

Training complete. Saving model...
Model saved to runs/20251013-175009/final_model.npz
Model saved. Now preparing final summary plot...
Generating final summary plot...
Plot saved to file: runs/20251013-175009/final_summary.png


  plt.show()


In [19]:
import numpy as np
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import time
import os
from tqdm import tqdm

# This script is fully self-contained and can be run in a new cell.
# It includes all necessary class and function definitions.

class BorderDataset:
    """
    Loads and processes the 50x50 border image, converting it into a dataset
    of normalized coordinates and binary labels.
    """
    NETHERLANDS_ORANGE = np.array([255, 165, 0])
    BELGIUM_PURPLE = np.array([138, 43, 226])

    def __init__(self, image_path: str):
        self.image_path = image_path
        self.binary_mask = self._create_binary_mask()
        self.height, self.width = self.binary_mask.shape
        self.pixels = self._prepare_pixels()
        print(f"Dataset created from '{self.image_path}' with {len(self.pixels)} points.")

    def _create_binary_mask(self) -> np.ndarray:
        try:
            img = Image.open(self.image_path).convert('RGB')
        except FileNotFoundError:
            print(f"Warning: Image file not found. Creating a dummy image at '{self.image_path}'.")
            img = self._create_dummy_image()
        
        img_array = np.array(img)
        dist_to_orange = np.linalg.norm(img_array - self.NETHERLANDS_ORANGE, axis=2)
        dist_to_purple = np.linalg.norm(img_array - self.BELGIUM_PURPLE, axis=2)
        mask = (dist_to_purple < dist_to_orange).astype(int)
        return mask

    def _create_dummy_image(self) -> Image:
        dummy_array = np.zeros((50, 50, 3), dtype=np.uint8)
        dummy_array[:, :] = self.NETHERLANDS_ORANGE
        dummy_array[10:25, 10:40] = self.BELGIUM_PURPLE
        dummy_array[30:45, 15:35] = self.BELGIUM_PURPLE
        img = Image.fromarray(dummy_array)
        
        directory = os.path.dirname(self.image_path)
        if directory and not os.path.exists(directory):
            os.makedirs(directory)
            
        img.save(self.image_path)
        return img

    def _prepare_pixels(self) -> list:
        pixel_data = []
        for y in range(self.height):
            for x in range(self.width):
                normalized_x = x / (self.width - 1)
                normalized_y = y / (self.height - 1)
                label = self.binary_mask[y, x]
                pixel_data.append(((normalized_x, normalized_y), label))
        return pixel_data

    def get_shuffled_data(self) -> list:
        shuffled_pixels = self.pixels.copy()
        np.random.shuffle(shuffled_pixels)
        return shuffled_pixels

class ReLU:
    """Rectified Linear Unit activation function."""
    def forward(self, x): return np.maximum(0, x)
    def backward(self, x): return (x > 0).astype(float)

class Sigmoid:
    """Sigmoid activation function."""
    def forward(self, x): return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
    def backward(self, x):
        s = self.forward(x)
        return s * (1 - s)

class Linear:
    """A single fully-connected neural network layer."""
    def __init__(self, input_width, output_width, activation):
        self.weights = np.random.randn(input_width, output_width) * np.sqrt(2. / input_width)
        self.biases = np.zeros((1, output_width))
        self.activation = activation()
        self.input_data, self.z = None, None
        self.grad_weights = np.zeros_like(self.weights)
        self.grad_biases = np.zeros_like(self.biases)

    def forward(self, x):
        self.input_data = x
        self.z = np.dot(x, self.weights) + self.biases
        return self.activation.forward(self.z)

    def backward(self, grad_output):
        delta = grad_output * self.activation.backward(self.z)
        self.grad_weights += np.dot(self.input_data.T, delta)
        self.grad_biases += np.sum(delta, axis=0, keepdims=True)
        return np.dot(delta, self.weights.T)

class BCE:
    """Binary Cross-Entropy loss function."""
    def loss(self, y_true, y_pred):
        y_pred = np.clip(y_pred, 1e-12, 1 - 1e-12)
        return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    
    def backward(self, y_true, y_pred):
        y_pred = np.clip(y_pred, 1e-12, 1 - 1e-12)
        return (y_pred - y_true) / (y_pred * (1 - y_pred)) / y_true.size

class Model:
    """The main neural network model class."""
    def __init__(self, layers, loss_function='bce'):
        self.layers = layers
        self.loss_fn = BCE()

    def forward(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def backward(self, grad_loss):
        for layer in reversed(self.layers):
            grad_loss = layer.backward(grad_loss)

    def train(self, x, y):
        y_pred = self.forward(x)
        loss = self.loss_fn.loss(y, y_pred)
        self.backward(self.loss_fn.backward(y, y_pred))
        return loss

    def predict(self, x):
        return self.forward(x)

    def zero_grad(self):
        for layer in self.layers:
            layer.grad_weights.fill(0)
            layer.grad_biases.fill(0)

    def update(self, learning_rate, grad_clip_value=1.0):
        for layer in self.layers:
            np.clip(layer.grad_weights, -grad_clip_value, grad_clip_value, out=layer.grad_weights)
            np.clip(layer.grad_biases, -grad_clip_value, grad_clip_value, out=layer.grad_biases)
            layer.weights -= learning_rate * layer.grad_weights
            layer.biases -= learning_rate * layer.grad_biases
        self.zero_grad()

def positional_encoding(coords, num_frequencies):
    """Encodes coordinates to a higher dimension using sine and cosine functions."""
    frequencies = 2**np.linspace(0, num_frequencies-1, num_frequencies)
    encoded_coords = []
    for coord in coords.T: 
        sines = np.sin(2 * np.pi * coord[:, None] * frequencies)
        cosines = np.cos(2 * np.pi * coord[:, None] * frequencies)
        encoded_coords.extend([sines, cosines])
    return np.hstack(encoded_coords)

def training_procedure(model, dataset, epochs, batch_size, initial_learning_rate, patience, username, num_frequencies, is_silent=False):
    """Handles the main training loop for all tasks."""
    data = dataset if isinstance(dataset, list) else dataset.get_shuffled_data()
    X_raw = np.array([item[0] for item in data])
    Y = np.array([item[1] for item in data]).reshape(-1, 1)
    
    X_encoded = positional_encoding(X_raw, num_frequencies)
    n_samples = len(data)
    
    history = {'loss': [], 'accuracy': []}
    
    for epoch in range(epochs):
        epoch_loss = 0
        permutation = np.random.permutation(n_samples)
        X_shuffled, Y_shuffled = X_encoded[permutation], Y[permutation]
        
        for i in range(0, n_samples, batch_size):
            batch_X = X_shuffled[i:i+batch_size]
            batch_Y = Y_shuffled[i:i+batch_size]
            loss = model.train(batch_X, batch_Y)
            model.update(initial_learning_rate)
            epoch_loss += loss * batch_X.shape[0]
        
        avg_epoch_loss = epoch_loss / n_samples
        
        if np.isnan(avg_epoch_loss):
            print("\nCRITICAL ERROR: Loss has become NaN.")
            return None

        y_pred_full = model.predict(X_encoded)
        predictions = (y_pred_full > 0.5).astype(int)
        accuracy = np.mean(predictions == Y)
        
        history['loss'].append(avg_epoch_loss)
        history['accuracy'].append(accuracy)

        if epoch >= patience:
            if history['loss'][-1] >= (1.0 - 0.005) * history['loss'][-1 - patience]:
                if not is_silent:
                    print(f"Early stopping triggered at epoch {epoch+1}.")
                break
    return history

def xor_training_procedure(model, dataset, epochs, batch_size, learning_rate, patience):
    """A simplified training loop for the XOR test without positional encoding."""
    X = np.array([item[0] for item in dataset])
    Y = np.array([item[1] for item in dataset]).reshape(-1, 1)
    n_samples = len(dataset)
    
    history = {'loss': [], 'accuracy': []}
    
    for epoch in range(epochs):
        epoch_loss = 0
        permutation = np.random.permutation(n_samples)
        X_shuffled, Y_shuffled = X[permutation], Y[permutation]
        
        for i in range(0, n_samples, batch_size):
            batch_X = X_shuffled[i:i+batch_size]
            batch_Y = Y_shuffled[i:i+batch_size]
            loss = model.train(batch_X, batch_Y)
            model.update(learning_rate)
            epoch_loss += loss * batch_X.shape[0]
        
        avg_epoch_loss = epoch_loss / n_samples
        
        if np.isnan(avg_epoch_loss):
            return None

        y_pred_full = model.predict(X)
        predictions = (y_pred_full > 0.5).astype(int)
        accuracy = np.mean(predictions == Y)
        
        history['loss'].append(avg_epoch_loss)
        history['accuracy'].append(accuracy)

        if accuracy == 1.0 and epoch > 100: # Stop once 100% is reached
            break
            
    return history

# Sanity Check: XOR Test
def execute_xor_test():
    """Performs the XOR sanity check to verify the MLP implementation."""
    print("\nExecuting Sanity Check: The XOR Problem")
    xor_data = [((0, 0), 0), ((0, 1), 1), ((1, 0), 1), ((1, 1), 0)]
    
    # The model for XOR takes 2 raw inputs, not encoded ones.
    model = Model([
        Linear(2, 8, ReLU),
        Linear(8, 1, Sigmoid)
    ])
    
    print("Training model on XOR data...")
    # Use the dedicated XOR training loop
    history = xor_training_procedure(
        model=model, dataset=xor_data, epochs=3000, batch_size=4,
        learning_rate=0.1, patience=200
    )
    
    if history:
        final_accuracy = history['accuracy'][-1]
        print(f"XOR test final accuracy: {final_accuracy:.4f}")
        if final_accuracy == 1.0:
            print("XOR Test PASSED: Model achieved 100% accuracy.")
        else:
            print("XOR Test FAILED: Model did not achieve 100% accuracy.")

# Sanity Check: Gradient Check
def execute_gradient_check():
    """Performs a gradient check by comparing analytical and numerical gradients."""
    print("\nExecuting Sanity Check: Gradient Check")
    
    x = np.array([[0.5, 0.5]])
    y = np.array([[1]])
    num_frequencies = 4
    input_dim = 4 * num_frequencies
    x_encoded = positional_encoding(x, num_frequencies)

    model = Model([Linear(input_dim, 2, ReLU), Linear(2, 1, Sigmoid)])
    
    model.train(x_encoded, y)
    analytical_grads = [np.copy(l.grad_weights) for l in model.layers]
    model.zero_grad()

    epsilon = 1e-5
    numerical_grads = [np.zeros_like(l.weights) for l in model.layers]
    
    for l_idx, layer in enumerate(model.layers):
        for i in range(layer.weights.shape[0]):
            for j in range(layer.weights.shape[1]):
                original_weight = layer.weights[i, j]
                
                layer.weights[i, j] = original_weight + epsilon
                loss_plus = model.loss_fn.loss(y, model.forward(x_encoded))
                
                layer.weights[i, j] = original_weight - epsilon
                loss_minus = model.loss_fn.loss(y, model.forward(x_encoded))
                
                layer.weights[i, j] = original_weight
                
                numerical_grads[l_idx][i, j] = (loss_plus - loss_minus) / (2 * epsilon)

    total_relative_error = 0
    print("Comparing analytical and numerical gradients...")
    for l_idx in range(len(model.layers)):
        numerator = np.linalg.norm(analytical_grads[l_idx] - numerical_grads[l_idx])
        denominator = np.linalg.norm(analytical_grads[l_idx]) + np.linalg.norm(numerical_grads[l_idx])
        relative_error = numerator / denominator if denominator > 1e-8 else 0
        total_relative_error += relative_error
        print(f"Layer {l_idx} Relative Error: {relative_error}")

    if total_relative_error < 1e-6:
        print("Gradient Check PASSED.")
    else:
        print("Gradient Check FAILED.")

# Map Prediction and Analysis
def execute_analysis(dataset):
    """Trains models with varying architectures and plots the results."""
    print("\nExecuting Map Prediction and Analysis")
    num_frequencies = 10
    input_dim = 4 * num_frequencies

    print("\nAnalyzing effect of network depth...")
    depths = [2, 3, 4, 5]
    width = 128
    depth_accuracies = []
    for depth in depths:
        print(f"Training model with depth {depth} and width {width}...")
        layers = [Linear(input_dim, width, ReLU)]
        for _ in range(depth - 1):
            layers.append(Linear(width, width, ReLU))
        layers.append(Linear(width, 1, Sigmoid))
        model = Model(layers)
        
        history = training_procedure(model, dataset, epochs=150, batch_size=128, initial_learning_rate=0.01, patience=20, username=f"Depth_{depth}", num_frequencies=num_frequencies, is_silent=True)
        if history:
            final_accuracy = history['accuracy'][-1]
            depth_accuracies.append(final_accuracy)
            print(f"Final accuracy for depth {depth}: {final_accuracy:.4f}")

    print("\nAnalyzing effect of network width...")
    widths = [32, 64, 128, 256]
    depth = 3
    width_accuracies = []
    for width in widths:
        print(f"Training model with depth {depth} and width {width}...")
        layers = [Linear(input_dim, width, ReLU)]
        for _ in range(depth - 1):
            layers.append(Linear(width, width, ReLU))
        layers.append(Linear(width, 1, Sigmoid))
        model = Model(layers)

        history = training_procedure(model, dataset, epochs=150, batch_size=128, initial_learning_rate=0.01, patience=20, username=f"Width_{width}", num_frequencies=num_frequencies, is_silent=True)
        if history:
            final_accuracy = history['accuracy'][-1]
            width_accuracies.append(final_accuracy)
            print(f"Final accuracy for width {width}: {final_accuracy:.4f}")

    plt.close('all')
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    fig.suptitle('Architecture Analysis')
    ax1.plot(depths, depth_accuracies, marker='o')
    ax1.set_title("Accuracy vs. Depth (Width=128)")
    ax1.set_xlabel("Number of Hidden Layers"); ax1.set_ylabel("Final Accuracy"); ax1.grid(True)
    
    ax2.plot(widths, width_accuracies, marker='o')
    ax2.set_title("Accuracy vs. Width (Depth=3)")
    ax2.set_xlabel("Neurons per Hidden Layer"); ax2.set_ylabel("Final Accuracy"); ax2.grid(True)
    
    analysis_plot_path = 'runs/architecture_analysis.png'
    os.makedirs('runs', exist_ok=True)
    plt.savefig(analysis_plot_path)
    print(f"Analysis plot saved to {analysis_plot_path}")
    plt.show()

# Final Challenge
def execute_challenge(dataset):
    """Attempts to solve the final challenge goals."""
    print("\nExecuting Final Challenge")
    num_frequencies = 10
    input_dim = 4 * num_frequencies
    
    print("\nGoal 1: Minimize model size for >91% accuracy.")
    small_model = Model([
        Linear(input_dim, 64, ReLU),
        Linear(64, 64, ReLU),
        Linear(64, 1, Sigmoid)
    ])
    training_procedure(small_model, dataset, epochs=250, batch_size=128, initial_learning_rate=0.01, patience=30, username="SmallModel_Challenge", num_frequencies=num_frequencies)

    print("\nGoal 2: Minimize training samples for >91% accuracy.")
    fast_model = Model([
        Linear(input_dim, 256, ReLU),
        Linear(256, 256, ReLU),
        Linear(256, 1, Sigmoid)
    ])
    training_procedure(fast_model, dataset, epochs=100, batch_size=256, initial_learning_rate=0.02, patience=20, username="FastConverge_Challenge", num_frequencies=num_frequencies)

def main_additions():
    """Main function to execute the additional sections of Q1."""
    RUN_SANITY_CHECKS = True
    RUN_ANALYSIS = False
    RUN_CHALLENGE = False

    if RUN_SANITY_CHECKS:
        execute_xor_test()
        execute_gradient_check()

    # Create the dataset object only if needed for analysis or challenge
    if RUN_ANALYSIS or RUN_CHALLENGE:
        print("Creating border_dataset object for analysis/challenge...")
        border_dataset = BorderDataset(image_path="Dataset/border.png")
        if RUN_ANALYSIS:
            execute_analysis(border_dataset)

        if RUN_CHALLENGE:
            execute_challenge(border_dataset)

# This call will now run the functions in this cell.
main_additions()




Executing Sanity Check: The XOR Problem
Training model on XOR data...
XOR test final accuracy: 1.0000
XOR Test PASSED: Model achieved 100% accuracy.

Executing Sanity Check: Gradient Check
Comparing analytical and numerical gradients...
Layer 0 Relative Error: 2.0770530088677715e-11
Layer 1 Relative Error: 1.1733377223073913e-11
Gradient Check PASSED.
