In [None]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import os
import datetime
import wandb
from tqdm import tqdm
import random
import time

class Identity:
    def forward(self,z) : return z
    def backward(self,z): return np.ones(z.shape)

class Sigmoid:
    def forward(self,z) : return 1/(1 + np.exp(-np.clip(z, -500 , 500)))
    def backward(self,z):
        s = self.forward(z)
        return s * (1-s)

class Tanh:
    def forward(self,z) : return np.tanh(z)
    def backward(self,z): return 1 - np.tanh(z) ** 2

class ReLU:
    def forward(self,z) : return np.maximum(0,z)
    def backward(self,z): return (z >0).astype(float)

class MSE:
    def forward(self, y_true , y_pred):
        return np.mean((y_pred - y_true)**2)

    def backward(self, y_true , y_pred):
        return 2*(y_pred - y_true) / y_true.size

class BCE:
    def forward(self, y_true , y_pred):
        epsilon = 1e-5
        y_pred = np.clip(y_pred , epsilon , 1-epsilon)
        return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

    def backward(self, y_true , y_pred):
       epsilon = 1e-5
       y_pred = np.clip(y_pred , epsilon , 1-epsilon)
       return ((1 - y_true) / ( 1 - y_pred) - y_true / y_pred) / y_true.size


class Linear:

    def __init__(self ,input_width  , output_width , activation ):
        limit = np.sqrt(6 / (input_width + output_width))
        self.weights = np.random.uniform(-limit , limit, ( input_width, output_width))
        self.biases = np.zeros((1 , output_width))
        self.activation = activation()
        self.input , self.z , self.a = None, None, None
        self.grad_weights_cumulative = np.zeros_like(self.weights)
        self.grad_biases_cumulative = np.zeros_like(self.biases)

    def forward(self, input_data):
        self.input , self.z = input_data , np.dot(input_data , self.weights) + self.biases
        self.a = self.activation.forward(self.z)
        return self.a

    def backward( self , upstream_gradient):
        d_z = upstream_gradient * self.activation.backward(self.z)
        self.grad_weights_cumulative += np.dot(self.input.T , d_z)
        self.grad_biases_cumulative += np.sum(d_z , axis = 0 , keepdims= True)
        return np.dot(d_z , self.weights.T)


class Model:
    def __init__(self, layers , loss_function):
        self.layers , self.loss_fn = layers, BCE() if loss_function.lower() == 'bce' else MSE()

    def forward(self, x):
            for layer in self.layers : x = layer.forward(x)
            return x

    def backward(self, y_true , y_pred):
            grad = self.loss_fn.backward(y_true , y_pred)
            for layer in reversed(self.layers): grad = layer.backward(grad)

    def train(self,x,y):
            y_pred = self.forward(x)
            loss = self.loss_fn.forward(y , y_pred)
            self.backward(y , y_pred)
            return loss , y_pred

    def calculate_loss(self , x , y):
            y_pred = self.forward(x)
            loss = self.loss_fn.forward(y , y_pred)
            return loss

    def zero_grad(self):
            for layer in self.layers:
                layer.grad_weights_cumulative.fill(0)
                layer.grad_biases_cumulative.fill(0)

    def update(self, learning_rate):
            for layer in self.layers:
                layer.weights -= learning_rate * layer.grad_weights_cumulative
                layer.biases -= learning_rate * layer.grad_biases_cumulative

    def clip_gradient(self,threshold):

      all_grads = [self.layers[i].grad_weights_cumulative.ravel() for i in range(len(self.layers))]
      all_grads += [self.layers[i].grad_biases_cumulative for i in range(len(self.layers))]
      all_grads_flat = np.concatenate(all_grads)

      norm = np.linalg.norm(all_grads_flat)
      if norm > threshold:
        clip_factor= threshold / norm

        for layer in self.layers:
          layer.grad_weights_cumulative *= clip_factor
          layer.grad_biases_cumulative *= clip_factor

    def predict(self,x):
            return self.forward(x)

    def count_parameters(self):
        return sum(l.weights.size + l.biases.size for l in self.layers)

class ImageBorderDataset:

    def __init__(self, image_path):
        self.pixels,self.width ,self.height = self._load_and_process_image(image_path)

    def _load_and_process_image(self , image_path):
        try: image = Image.open(image_path).convert('RGB')
        except FileNotFoundError:
            arr = np.zeros((50,50,3), dtype=np.uint8)
            arr[:25,:,:] = [255 , 165 ,0]
            arr[25:,:,:] = [138 ,43 ,226]
            image = Image.fromarray(arr)

        img_array = np.array(image)
        height ,width,_ = img_array.shape
        data = [(((x / (width-1) ,y / (height -1))) , 1 if all(img_array[y,x][:2] < [150,50]) and img_array[y,x][2] > 100 else 0) for y in range(height) for x in range(width)]
        return data, width , height

    def get_shuffled_data(self):
        shuffled  = self.pixels[:]
        random.shuffle(shuffled)
        return shuffled


def evaluate(model, dataset):

    ORANGE , PURPLE ,RED = [255,165,0],[138,43,226],[255,0,0]
    coords = np.array([p[0] for p in dataset.pixels])
    true_labels = np.array([p[1] for p in dataset.pixels])
    pred_labels = (model.predict(coords) > 0.5).astype(int).flatten()
    accuracy  = np.mean(pred_labels == true_labels) * 100
    return accuracy, None


def train_model(model,dataset,config):

    run_name = config['name']
    pbar = tqdm(range(config['epochs']) , desc = f"Training {run_name}", leave = True)

    samples_seen = 0
    final_accuracy = 0

    for epoch in pbar:
        data , epoch_loss = dataset.get_shuffled_data(), 0.0
        for i ,(coords , label) in enumerate(data):
            loss,_ = model.train(np.array([coords]) ,np.array([[label]]))
            epoch_loss += loss
            if (i+1) % config['grad_accumulation_steps'] == 0:
                model.update(config['learning_rate'])

        samples_seen += len(data)
        model.update(config['learning_rate'])

        current_accuracy, _ = evaluate(model, dataset)
        pbar.set_description(f"Epoch {epoch+1} / {config['epochs']} | Loss : {epoch_loss/len(data):.4f} | Accuracy : {current_accuracy:.2f}%")

        if config.get('stop_at_target', False) and current_accuracy >= config['target_accuracy']:
            print(f"Reached target accuracy of {config['target_accuracy']} at epoch {epoch+1}")
            final_accuracy = current_accuracy
            break
        final_accuracy = current_accuracy
    return{'sample_seen' : samples_seen , 'accuracy' : final_accuracy}



if __name__ == "__main__":
    border_dataset = ImageBorderDataset('/content/border.png')
    TARGET_ACCURACY = 91.0


    print(f"1.5 Goal 1: Minimize Model Size to reach {TARGET_ACCURACY}% Accuracy")


    min_param_layers = [Linear(2, 48, Tanh)] + \
                       [Linear(48, 48, Tanh) for _ in range(3)] + \
                       [Linear(48, 1, Sigmoid)]

    min_param_model = Model(layers=min_param_layers, loss_function='bce')


    config_goal1 = {
        'name': "Goal1_Final_4x48_Tanh",
        'epochs': 150,
        'learning_rate': 0.01,
        'grad_accumulation_steps': 64,
        'patience': 10,
        'relative_loss_threshold': 0.01,
        'target_accuracy': TARGET_ACCURACY,
        'stop_at_target': True
    }

    print(f"Testing Corrected Architecture: 4 hidden layers, width 48 with Tanh.")
    print(f"Total Parameters: {min_param_model.count_parameters()}")


    result_goal1 = train_model(min_param_model, border_dataset, config_goal1)

    print("\n--- Goal 1 Summary ---")
    print(f"Architecture Parameters: {min_param_model.count_parameters()}")
    print(f"Final Accuracy: {result_goal1['accuracy']:.2f}%")
    if result_goal1['accuracy'] >= TARGET_ACCURACY:
        print("Success: Target accuracy achieved.")
    else:
        print("Failure: Target accuracy not met.")


    print(f" 1.5 Goal 2: Minimize Training Samples to reach {TARGET_ACCURACY}% Accuracy")

    min_samples_model = Model(layers=[Linear(2, 48, Tanh)] + \
                                     [Linear(48, 48, Tanh) for _ in range(3)] + \
                                     [Linear(48, 1, Sigmoid)],
                              loss_function='bce')

    config_goal2 = {
        'name': "Goal2_Final_GradClip",
        'epochs': 60,
        'learning_rate': 0.02,
        'grad_accumulation_steps': 128,
        'patience': 10,
        'relative_loss_threshold': 0.01,
        'target_accuracy': TARGET_ACCURACY,
        'grad_clip_threshold': 1.0
    }

    print(f"Testing with LR={config_goal2['learning_rate']} and Gradient Clipping (Threshold={config_goal2['grad_clip_threshold']})")

    pbar = tqdm(range(config_goal2['epochs']), desc=f"Training {config_goal2['name']}", leave=True)
    samples_seen = 0
    final_accuracy = 0

    for epoch in pbar:
        data, epoch_loss = border_dataset.get_shuffled_data(), 0.0
        for i, (coords, label) in enumerate(data):
            loss, _ = min_samples_model.train(np.array([coords]), np.array([[label]]))
            epoch_loss += loss
            if (i+1) % config_goal2['grad_accumulation_steps'] == 0:

                min_samples_model.clip_gradient(threshold=config_goal2['grad_clip_threshold'])
                min_samples_model.update(config_goal2['learning_rate'])


        min_samples_model.clip_gradient(threshold=config_goal2['grad_clip_threshold'])
        min_samples_model.update(config_goal2['learning_rate'])
        samples_seen += len(data)

        current_accuracy, _ = evaluate(min_samples_model, border_dataset)
        pbar.set_description(f"Loss: {epoch_loss/len(data):.4f} | Acc: {current_accuracy:.2f}%")

        if current_accuracy >= TARGET_ACCURACY:
            print(f"\n Target {TARGET_ACCURACY}% accuracy reached!")
            final_accuracy = current_accuracy
            break
        final_accuracy = current_accuracy

    print("\n--- Goal 2 Summary ---")
    print(f"Final Accuracy: {final_accuracy:.2f}%")
    print(f"Minimum Samples to Converge: {samples_seen}")
    if final_accuracy >= TARGET_ACCURACY:
        print("Success: Target accuracy achieved with minimal samples.")
    else:
        print("Failure: Target accuracy not met within the given epochs.")

In [None]:

1.5: FINAL CHALLENGE (Final & Robust Attempt)

--- Goal 1: Minimize Model Size to reach 91.0% ---
Testing Final Architecture: 4 hidden layers, width 48 with Tanh. Parameters: 7249
Epoch 121/150 | Loss: 0.2506 | Acc: 91.04%:  80%|████████  | 120/150 [01:20<00:20,  1.50it/s]
Target 91.0% accuracy reached!

--- Goal 1 Summary ---
Architecture Parameters: 7249
Final Accuracy: 91.04%
Success: Target accuracy achieved.

--- Goal 2: Minimize Samples to reach 91.0% ---
Testing with LR=0.02 and Gradient Clipping (Threshold=1.0)
Loss: 0.3486 | Acc: 83.24%: 100%|██████████| 60/60 [00:39<00:00,  1.54it/s]
--- Goal 2 Summary ---
Final Accuracy: 83.24%
Minimum Samples to Converge: 150000
Failure: Target accuracy not met within the given epochs.