In [None]:
# imports
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, models
from torchvision.datasets import ImageFolder
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
import re
from torchvision.models import EfficientNet_B4_Weights

# 1. Data Preparation

In [None]:
def extract_number(f):
    match = re.search(r'\d+', f)
    return int(match.group()) if match else float('inf')

class TestDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_files = sorted( os.listdir(root_dir), key=extract_number)
            #[f for f in os.listdir(root_dir) if f.endswith(('.jpg', '.jpeg', '.png'))],
            #key=lambda x: int(os.path.splitext(x)[0])
        
    
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.root_dir, img_name)
        
        try:
            image = Image.open(img_path).convert('RGB')
        except:
            print(f"Error loading {img_name}, skipping...")
            return self.__getitem__((idx + 1) % len(self))
        
        if self.transform:
            image = self.transform(image)
            
        return image, img_name


# 2. Data Transforms

In [None]:
train_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# 3. Model Setup

In [None]:
def create_model(num_classes=200):
    #model = models.efficientnet_b4(pretrained=True)
    weights = EfficientNet_B4_Weights.DEFAULT
    model = models.efficientnet_b4(weights=weights)

    
    # Freeze base layers
    for param in model.parameters():
        param.requires_grad = False
        
    # Replace classifier
    num_ftrs = model.classifier[1].in_features
    model.classifier = nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(num_ftrs, 512),
    nn.ReLU(),
    nn.Linear(512, num_classes)
    )
    return model

# 4. Training Loop

In [None]:
def train_model():
    # Initialize
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = create_model().to(device)
    
    # Data Loaders
    train_dataset = ImageFolder(root='../data/train', transform=train_transform)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
    
    # Test Loader
    test_dataset = TestDataset(root_dir='../data/test', transform=test_transform)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)
    
    # Training Setup
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=1e-3)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    
    # Training Loop
    for epoch in range(1):
        model.train()
        running_loss = 0.0
        
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
            images = images.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        scheduler.step()
        print(f"Epoch {epoch+1} Loss: {running_loss/len(train_loader):.4f}")
    
    return model

# 5. Submission Generation

In [None]:
def generate_submission(model, test_loader, device):
    model.eval()
    predictions = []
    filenames = []
    
    with torch.no_grad():
        for images, names in tqdm(test_loader, desc="Generating Predictions"):
            images = images.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            predictions.extend(preds.cpu().numpy())
            filenames.extend(names)
    
    # Create mapping
    class_to_idx = ImageFolder(root='data/train').class_to_idx
    idx_to_class = {v: k for k, v in class_to_idx.items()}
    
    # Create DataFrame
    """df = pd.DataFrame({
        'ID': filenames,
        'label': [idx_to_class[p] for p in predictions]
    })
    
    # Sort numerically
    df['num'] = df['ID'].apply(lambda x: int(os.path.splitext(x)[0]))
    df = df.sort_values('num').drop('num', axis=1)
    df.to_csv('../submission.csv', index=False)"""

df = pd.DataFrame({
    'ID': filenames,
    'label': [idx_to_class[p] for p in predictions]
})

# Extract numeric part from filenames like "image1.jpg"
df['num'] = df['ID'].apply(lambda x: int(re.findall(r'\d+', x)[0]))
df = df.sort_values('num').drop('num', axis=1)
df.to_csv('../src/data.csv',index=False)

# Main Execution of data generation

In [None]:
if __name__ == "__main__":
    trained_model = train_model()
    test_loader = DataLoader(
        TestDataset(root_dir='../data/test', transform=test_transform),
        batch_size=32, shuffle=False, num_workers=4
    )
    generate_submission(trained_model, test_loader, 
                       device=torch.device("cuda" if torch.cuda.is_available() else "cpu"))

# Activation functions and their derivatives

In [None]:
def sigmoid(z):
    # z = np.clip(z, -500, 500)
    return 1/(1+np.exp(-z))

def deriv_sigmoid(Z):
    # print(Z.min(), Z.max())
    return Z*(1-Z)

def relu(Z):
    return np.maximum(0, Z)

def relu_derivative(Z):
    return Z > 0

def softmax(Z):
    expZ = np.exp(Z - np.max(Z, axis=0, keepdims=True))  # for stability
    return expZ / np.sum(expZ, axis=0, keepdims=True)

def compute_loss(AL, Y):
    """
    Cross-entropy loss
    AL: predictions (softmax output), shape (200, m)
    Y: true labels (one-hot), shape (200, m)
    """
    m = Y.shape[1]
    loss = -np.sum(Y * np.log(AL + 1e-9)) / m
    return loss

# Read from csv file or data frame

In [None]:
def load_bird_dataset(csv_path):
    df = pd.read_csv(csv_path, header=None)
    y = df.iloc[:, 0].values
    pixel_data = df.iloc[:, 1:].values
    num_samples = pixel_data.shape[0]
    image_size = 224
    channel_size = image_size * image_size

    R = pixel_data[:, :channel_size].reshape(num_samples, image_size, image_size)
    G = pixel_data[:, channel_size:2*channel_size].reshape(num_samples, image_size, image_size)
    B = pixel_data[:, 2*channel_size:].reshape(num_samples, image_size, image_size)

    X = np.stack([R, G, B], axis=-1)

    return X, y

# Neurons in layers stored in an array

In [None]:
def get_layer_sizes(n, point1, point2):
    """
    Returns the list of layer sizes (input + hidden + output)
    point1 and point2 are (layer_index, neuron_count) pairs
    """
    x1, y1 = point1
    x2, y2 = point2
    slope = (y2 - y1) / (x2 - x1)
    intercept = y1 - slope * x1
    
    layer_sizes = []
    for i in range(n + 1):  # Including input and output layer
        neurons = int(round(slope * i + intercept))
        layer_sizes.append(neurons)
    return layer_sizes

# Init Parameters

In [None]:
def init_parameters(n, point1, point2, seed=42):
    """
    Initializes weights and biases using the layer sizes.
    Returns a dictionary of parameters.
    """
    np.random.seed(seed)
    layer_sizes = get_layer_sizes(n, point1, point2)
    parameters = {}

    for l in range(1, len(layer_sizes)):
        parameters[f"W{l}"] = np.random.randn(layer_sizes[l], layer_sizes[l-1]) * np.sqrt(2. / layer_sizes[l-1])
        parameters[f"b{l}"] = np.zeros((layer_sizes[l], 1))
    
    return parameters

# Forward Propagation

In [None]:
def forward_propagation(x, parameters):
    """
    x: input column vector of shape (150528, 1)
    parameters: dictionary of W and b
    Returns final output (softmax probabilities) and cache of intermediate values.
    """
    A = x
    cache = {"A0": x}
    L = len(parameters) // 2  # number of layers

    for l in range(1, L):
        Z = parameters[f"W{l}"] @ A + parameters[f"b{l}"]
        A = relu(Z)
        cache[f"Z{l}"] = Z
        cache[f"A{l}"] = A

    # Output layer (softmax)
    ZL = parameters[f"W{L}"] @ A + parameters[f"b{L}"]
    AL = softmax(ZL)
    cache[f"Z{L}"] = ZL
    cache[f"A{L}"] = AL

    return AL, cache

# Backward Propagation

In [None]:
def backward_propagation(parameters, cache, X, Y):
    """
    Returns gradients dW, db for each layer
    """
    grads = {}
    m = X.shape[1]
    L = len(parameters) // 2  # number of layers

    # Output layer gradient
    AL = cache[f"A{L}"]
    dZL = AL - Y
    grads[f"dW{L}"] = (1 / m) * dZL @ cache[f"A{L-1}"].T
    grads[f"db{L}"] = (1 / m) * np.sum(dZL, axis=1, keepdims=True)

    dA_prev = parameters[f"W{L}"].T @ dZL

    # Hidden layers
    for l in reversed(range(1, L)):
        dZ = dA_prev * relu_derivative(cache[f"Z{l}"])
        grads[f"dW{l}"] = (1 / m) * dZ @ cache[f"A{l-1}"].T
        grads[f"db{l}"] = (1 / m) * np.sum(dZ, axis=1, keepdims=True)
        if l > 1:
            dA_prev = parameters[f"W{l}"].T @ dZ

    return grads

# Update Parameters

In [None]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2
    for l in range(1, L + 1):
        parameters[f"W{l}"] -= learning_rate * grads[f"dW{l}"]
        parameters[f"b{l}"] -= learning_rate * grads[f"db{l}"]
    return parameters

# Train the model

In [None]:
def train_model(X, Y, n, point1, point2, epochs=100, learning_rate=0.01, print_loss=True):
    """
    X: input data (150528, m)
    Y: one-hot labels (200, m)
    """
    parameters = init_parameters(n, point1, point2)

    for epoch in range(epochs):
        # Forward Propagation
        AL, cache = forward_propagation(X, parameters)

        # Loss
        loss = compute_loss(AL, Y)

        # Backward Propagation
        grads = backward_propagation(parameters, cache, X, Y)

        # Update Parameters
        parameters = update_parameters(parameters, grads, learning_rate)

        if print_loss and epoch % 10 == 0:
            print(f"Epoch {epoch}: Loss = {loss:.4f}")

    return parameters

# Testing and predicting

In [None]:
def make_predictions(X, parameters):
    """
    X: input image(s), shape (150528, m)
    Returns: predictions, shape (m,)
    """
    AL, _ = forward_propagation(X, parameters)
    predictions = np.argmax(AL, axis=0)
    return predictions

In [None]:
def test_prediction(index, X, Y, parameters):
    """
    index: index of the image in dataset
    X: shape (150528, m)
    Y: shape (200, m) — one-hot labels
    parameters: learned parameters
    """
    current_image = X[:, index:index+1]  # shape (150528, 1)
    prediction = make_predictions(current_image, parameters)[0]
    label = np.argmax(Y[:, index])

    print("Prediction:", prediction)
    print("Label:", label)
    
    img = current_image.reshape(224, 224, 3)
    plt.imshow(img.astype(np.uint8))
    plt.title(f"Predicted: {prediction}, Label: {label}")
    plt.axis('off')
    plt.show()

# Accuracy vs no of layers

In [None]:
def compute_accuracy(predictions, labels):
    """
    predictions: shape (m,)
    labels: shape (200, m) — one-hot
    """
    true_labels = np.argmax(labels, axis=0)
    return np.mean(predictions == true_labels)

def plot_accuracy_vs_layers(X, Y, layer_range, point1, point2, epochs=50, learning_rate=0.01):
    """
    X: input data (150528, m)
    Y: one-hot labels (200, m)
    layer_range: list or range of layer counts to try (e.g., range(2, 9))
    """
    accuracies = []

    for n_layers in layer_range:
        print(f"Training with {n_layers} layers...")
        parameters = train_model(X, Y, n_layers, point1, point2, epochs=epochs, learning_rate=learning_rate, print_loss=False)
        preds = make_predictions(X, parameters)
        acc = compute_accuracy(preds, Y)
        print(f"Accuracy for {n_layers} layers: {acc:.4f}")
        accuracies.append(acc)

    plt.figure(figsize=(8, 5))
    plt.plot(list(layer_range), accuracies, marker='o')
    plt.title("Accuracy vs Number of Layers")
    plt.xlabel("Number of Layers")
    plt.ylabel("Accuracy")
    plt.grid(True)
    plt.show()

# Main function for final model