# `Setup`

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import datetime as dt
from math import ceil
import gc

gc.collect()
try:
    # Mounting Colab Drive if possible
    from google.colab import drive
    drive.mount('/content/drive')

    # Cloning repo for colab
    if 'aml_itu' in os.getcwd():
        %cd aml_itu/
        !git pull https://github.com/RasKrebs/aml_itu
        !git checkout -b sparse_filtering
    else:
        !git clone -b sparse_test https://github.com/RasKrebs/aml_itu
        %cd aml_itu/
    os.environ["COLAB"] = "True"

except:
    # Changing directory into aml_itu
    if os.getcwd().split('/')[-1] != 'aml_itu': os.chdir(os.path.abspath('.').split('aml_itu/')[0]+'aml_itu')
    !git pull origin main --ff-only
    os.environ["COLAB"] = "False"

# Utils Import
from utils.helpers import *
from utils.StatefarmPytorchDataset import StateFarmDataset


# Torch
import torch
from torch import nn
import torchvision
torchvision.disable_beta_transforms_warning()
import torchvision.transforms as T
from torchvision.transforms import v2
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader

# Install torchinfo, import if it's available
try:
  import torchinfo
except:
  !pip install torchinfo
  import torchinfo

from torchinfo import summary


# Printing current working directory
print(os.getcwd())

# Setting up device
if torch.cuda.is_available():
    device = torch.device("cuda")
    print (f"GPU is available")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    print('MPS device found.')
else:
    print ("No GPU available, using CPU instead")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
fatal: destination path 'aml_itu' already exists and is not an empty directory.
/content/aml_itu


### `Config`

In [None]:
MODEL_NAME = 'TinyVGG_best'

# Loading the config file (if content is in workin directory must mean colab is being used)
config = load_config(eval(os.environ["COLAB"]))


# Training Images
train_img = config['dataset']['images']['train']

# Outputting config
config

In [None]:
def save_model(model, model_name, epoch):
    """Function for saving model"""
    # Model name, with path
    timestamp = dt.datetime.now().strftime('%Y%m%d_%H%M%S')
    file = f'{model_name}_{timestamp}_epoch_{epoch+1}'
    name = os.path.join(config['outputs']['path'], model_name, file)

    # Make directory if not exists
    if not os.path.exists(os.path.join(os.path.join(config['outputs']['path'], model_name))):
        os.makedirs(os.path.join(os.path.join(config['outputs']['path'], model_name)))

    # Save model
    torch.save(model.state_dict(), f'{name}.pt')

## Sparse Filtring

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch
import gc
gc.collect()
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
from tqdm import tqdm
import time

# Sparse Filter Class
class SparseFilter(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SparseFilter, self).__init__()
        self.weights = nn.Parameter(torch.randn(input_dim, output_dim) * 0.5)
        self.epsilon = 1e-8

    def soft_abs(self, value):
        return torch.sqrt(value ** 2 + self.epsilon)

    def forward(self, x):
        first = torch.matmul(x, self.weights)
        second = self.soft_abs(first)
        third = second / torch.sqrt(torch.sum(second ** 2, axis=0) + self.epsilon)
        fourth = third / torch.sqrt(torch.sum(third ** 2, axis=1)[:, None] + self.epsilon)
        return torch.sum(fourth)


pretransform = v2.Compose([
      v2.ToPILImage(),
      v2.Resize((126, 168), antialias=True),
      v2.Grayscale(num_output_channels=1),
      v2.ToTensor(),
      v2.Normalize(mean=[0.485], std=[0.229]),
      v2.Lambda(lambda x: torch.flatten(x)),
      v2.ToTensor()
    ])

from torch.utils.data import Subset
import torch
from torch.utils.data import DataLoader
import random
from torch.utils.data import Subset
import torch
from torch.utils.data import DataLoader
import random

# Load the dataset without transformations
train_data = StateFarmDataset(config,
                              transform=pretransform,  # No image transformations
                              split='train',
                              target_transform=None)

# Generate random indices for the subset
subset_size = 10000
indices = torch.randperm(len(train_data)).tolist()
subset_indices = indices[:subset_size]

# Create a subset
train_subset = Subset(train_data, subset_indices)

# Create a DataLoader for the subset
train_subset_loader = DataLoader(train_data, batch_size=32, num_workers=4, shuffle=True)


In [None]:
def train_step(model, dataloader, optimizer, device, accumulation_steps=4):
    """Train step for a single epoch with gradient accumulation."""

    # Losses and accuracies
    train_loss, train_acc = 0, 0

    # Initialize the gradient
    optimizer.zero_grad()

    for i, data in enumerate(dataloader):

        # Extracting data and labels + moving to device
        imgs, labels = data
        imgs = imgs.to(device)

        # Forward pass
        loss = model(imgs)

        # Normalize the loss to account for accumulation
        normalized_loss = loss / accumulation_steps

        # Backward pass (accumulates gradients over multiple backward steps)
        normalized_loss.backward()

        # Step with optimizer every 'accumulation_steps' iterations
        if (i + 1) % accumulation_steps == 0 or (i + 1) == len(dataloader):
            optimizer.step()
            optimizer.zero_grad()

        # Update train loss
        train_loss += loss.item()

    # Return average train loss
    return train_loss / len(dataloader)

In [None]:
def train_sparse_filter(model, train_dataloader, optimizer, epochs, device):
    """Model training method"""
    # History
    history = dict(train_loss=[],
                   train_acc=[],
                   val_loss=[],
                   val_acc=[])

    # Loop through epochs
    for epoch in range(epochs):
        print(f'\nEpoch {epoch+1} of {epochs} started...')

        # Set model to train mode and do pass over data
        model.train(True)
        train_loss = train_step(model, train_dataloader, optimizer, device)

        print(f"Epoch {epoch+1} of {epochs} - Train loss: {train_loss:.5f}")

        # Save model
        save_model(model, MODEL_NAME, epoch)

        # Save train and val loss/acc
        history['train_loss'].append(train_loss)

        # Visualize every 5th epoch
        if (epoch + 1) % 5 == 0:
            visualize_training(history, epochs)

    return history

In [None]:
def visualize_training(history, num_epochs=50):

    # Generate Figure
    fig, axs = plt.subplots(1, 2, figsize=(12, 5))

    # Loss Plots
    sns.lineplot(y=history['train_loss'], x=list(range(len(history['train_loss']))), ax=axs[0], label='Train Loss')
    sns.lineplot(y=history['val_loss'], x=list(range(len(history['val_loss']))), ax=axs[0], label='Validation Loss')
    axs[0].set_ylabel('Cross Entropy Loss')
    axs[0].set_xlabel('Epochs')
    axs[0].set_xlim(0, num_epochs)

    # Accuracy Plots
    sns.lineplot(y=history['train_acc'], x=list(range(len(history['train_acc']))), ax=axs[1], label='Train Accuracy')
    sns.lineplot(y=history['val_acc'], x=list(range(len(history['val_acc']))), ax=axs[1], label='Validation Accuracy')
    axs[1].set_ylabel('Accuracy')
    axs[1].set_xlabel('Epochs')
    axs[1].set_xlim(0, num_epochs)

    # Show plot
    plt.show()

In [None]:
# Define and train the sparse filter model
input_dim = 126*168
output_dim = 126*168  # Desired output dimension
sparse_filter_model = SparseFilter(input_dim, output_dim).to(device)  # Move model to GPU
learning_rate = 0.01
optimizer = optim.Adam(sparse_filter_model.parameters(), lr=learning_rate)
epochs = 50



results = train_sparse_filter(model=sparse_filter_model,
                train_dataloader=train_subset_loader,
                optimizer=optimizer,
                epochs=epochs,
                device=device)

In [None]:
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm

# Assuming StateFarmDataset and SparseFilter are already defined
# Assuming config and target_transform are already set

# Load the dataset without transformations
#train_data = StateFarmDataset(config,
 #                             transform=None,  # No image transformations
 #                             split='train',
 #                             target_transform=None,
 #                             apply_sparse_filtering=False)


# Prepare data for sparse filtering (flatten the data)
# Note: Ensure that your dataset __getitem__ method returns the raw data without transformations
#flattened_data = []
#for data, _ in train_loader:
#    data = data.float()  # Convert data to float
#    data_flattened = data.reshape(-1).to(device)  # Move data to GPU
#    flattened_data.append(data_flattened)

input_dim = 168*224 #flattened_data.shape[1] # [15646, 921600]
output_dim = 168*224  # Set the desired output dimension
##sparse_filter_model = train_sparse_filter(train_loader, input_dim, output_dim)

In [None]:
#optimizer = optim.Adam(sparse_filter_model.parameters(), lr=0.01, weight_decay=1e-5)  # L2 regularization
#scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
#for epoch in range(50):
#    for data, _ in train_loader:
#        data = data.float()  # Convert data to float
#        data_flattened = data.reshape(-1).to(device)  # Move data to GPU
#
#        optimizer.zero_grad()
#        loss = sparse_filter_model(data_flattened)  # Forward pass on the batch
#        loss.backward()  # Compute gradients
#        optimizer.step()  # Update weights
#    scheduler.step()

#    print(f'Epoch {epoch}, Loss: {loss.item()}')

In [None]:
# Apply Sparse Filtering Transformation to Data
#with torch.no_grad():
#transformed_X = torch.matmul(X_flattened, model.weights).detach()

# Now you can use transformed_X as input to your TinyVGGish model or other models

# Note: Adjust input_dim, output_dim, and num_epochs according to your needs.

In [None]:
# Save the model weights
torch.save(sparse_filter_model.state_dict(), './outputs/SparseFilterWeights/sparse_filter_model_try2.pth')

In [None]:

torch.save(sparse_filter_model.state_dict(), './drive/MyDrive')

## `TinyVGG`

In [None]:
# IMG Transformations
augmentations = {
    'train': v2.Compose([
    # v2.RandomRotation(degrees=30),
    v2.RandomResizedCrop((168, 224), antialias=True, scale=(0.9, 1)),
    #v2.RandomHorizontalFlip(p=0.5),
    v2.ToDtype(torch.float32, scale=True)]),
    'val+test': v2.Compose([
    T.Resize((168, 224), antialias=True),
    v2.ToDtype(torch.float32, scale=True)])}

# Target Transformations (Removing the c from the target)
target_transform = T.Lambda(lambda y: torch.tensor(int(y.replace('c', ''))))

In [None]:
# Creating the dataset
train_data = StateFarmDataset(config,
                              transform=augmentations['train'],
                              split='train',
                              target_transform=target_transform,
                              apply_sparse_filtering=True)

print(f'Lenght of train data: {len(train_data)}')

# Creating the dataset
val_data = StateFarmDataset(config,
                            transform=augmentations['val+test'],
                            split='val',
                            target_transform=target_transform,
                            apply_sparse_filtering=True)

print(f'Lenght of val data: {len(val_data)}')

test_data = StateFarmDataset(config,
                            split='test',
                            transform=augmentations['val+test'],
                            target_transform=target_transform,
                            apply_sparse_filtering=True)

print(f'Lenght of val data: {len(test_data)}')

#### `Model`

In [None]:
# TinyVGG inpsired
# First Convolution Blocks With BatchNorm, MaxPool and Dropout
class ConvBlock(nn.Module):
    def __init__(self, in_channesl, out_channls, kernel_size = (3, 3), stride=1, pool_kernel = (2,2), dropout_rate = .2):
        super(ConvBlock, self).__init__()
        self.main = nn.Sequential(
            nn.Conv2d(in_channels=in_channesl,
                              out_channels=out_channls,
                              kernel_size=kernel_size,
                              stride=stride),
            nn.ReLU(True),
            nn.BatchNorm2d(out_channls),
            nn.Conv2d(in_channels=out_channls,
                              out_channels=out_channls,
                              kernel_size=kernel_size,
                              stride=stride),
            nn.ReLU(True),
            nn.BatchNorm2d(out_channls),
            nn.MaxPool2d(kernel_size = pool_kernel),
            nn.Dropout(dropout_rate))

    def forward(self, x):
        return self.main(x)

# Added couple extra fully connected layers
class FullyConnected(nn.Module):
    def __init__(self, in_features, out_featuers, dropout_rate) -> None:
        super(FullyConnected, self).__init__()

        self.main = nn.Sequential(
            nn.Linear(in_features=in_features, out_features=out_featuers),
            nn.ReLU(True),
            nn.BatchNorm1d(out_featuers),
            nn.Dropout(dropout_rate),)

    def forward(self, x):
        return self.main(x)


class TinyVGGish(torch.nn.Module):
    def __init__(self,
                 filters = 32,
                 num_classes = 10,
                 kernel_size = 3,
                 stride = 1,
                 in_channels = 3,
                 pool_kernel_size = 2,
                 dense_dropout_rate = .25,
                 conv_dropout_rate = .25):

        """TinyVGG Inspired Model with Added complexity and Regularizaiton"""
        super(TinyVGGish, self).__init__()

        # First Convolution Block
        self.main = nn.Sequential(
            ConvBlock(in_channesl=in_channels, out_channls=filters, kernel_size=kernel_size, stride=stride, pool_kernel=pool_kernel_size, dropout_rate=conv_dropout_rate),
            ConvBlock(in_channesl=filters, out_channls=filters*2, kernel_size=kernel_size, stride=stride, pool_kernel=pool_kernel_size, dropout_rate=conv_dropout_rate),
            ConvBlock(in_channesl=filters*2, out_channls=filters*4, kernel_size=kernel_size, stride=stride, pool_kernel=pool_kernel_size, dropout_rate=conv_dropout_rate),
            nn.Flatten(),
            FullyConnected(in_features=52224, out_featuers=512, dropout_rate=dense_dropout_rate),
            FullyConnected(in_features=512, out_featuers=128, dropout_rate=dense_dropout_rate),
            FullyConnected(in_features=128, out_featuers=num_classes, dropout_rate=dense_dropout_rate),
            )

    def forward(self, x):
        # Pass the data through the convolutional blocks
        x = self.main(x)
        return x

In [None]:
# Initialize Efficientnet model
model = TinyVGGish(num_classes = 10)

In [None]:
batch_size = config['modeling_params']['batch_size']
epochs = 50 # config['modeling_params']['epochs']
seed = 42

train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [None]:
print(batch_size)

In [None]:
# Model summary
x, y = next(iter(train_dataloader))

summary(model, input_size=x.shape)

#### `Training Methods`

In [None]:
def visualize_training(history, num_epochs=50):

    # Generate Figure
    fig, axs = plt.subplots(1, 2, figsize=(12, 5))

    # Loss Plots
    sns.lineplot(y=history['train_loss'], x=list(range(len(history['train_loss']))), ax=axs[0], label='Train Loss')
    sns.lineplot(y=history['val_loss'], x=list(range(len(history['val_loss']))), ax=axs[0], label='Validation Loss')
    axs[0].set_ylabel('Cross Entropy Loss')
    axs[0].set_xlabel('Epochs')
    axs[0].set_xlim(0, num_epochs)

    # Accuracy Plots
    sns.lineplot(y=history['train_acc'], x=list(range(len(history['train_acc']))), ax=axs[1], label='Train Accuracy')
    sns.lineplot(y=history['val_acc'], x=list(range(len(history['val_acc']))), ax=axs[1], label='Validation Accuracy')
    axs[1].set_ylabel('Accuracy')
    axs[1].set_xlabel('Epochs')
    axs[1].set_xlim(0, num_epochs)

    # Show plot
    plt.show()

In [None]:
def train_step(model, dataloader, loss_fn,optimizer, device):
    """Train step for a single epoch. Taken from PyTorch 'Training with PyTorch'"""

    # Losses and accuracies
    train_loss, train_acc = 0, 0

    for i, data in enumerate(dataloader):

        # Extracting data and labels + moving to device
        imgs, labels = data
        imgs, labels = imgs.to(device), labels.to(device)

        # Zero-ing gradients for every new batch
        optimizer.zero_grad()

        # Forward pass
        prediction = model(imgs)

        # Computing Loss and Gradient
        loss = loss_fn(prediction, labels)
        loss.backward()

        optimizer.step()

        # Update train loss and accuracy
        train_loss += loss.item()
        train_acc += (prediction.argmax(1) == labels).type(torch.float).mean().item()
    # Return train loss and accuracy
    return train_loss / len(dataloader), train_acc / len(dataloader)


In [None]:
def validation(model, dataloader, loss_fn, device):
    """Validation loop"""
    # Setup validation loss and accuracy
    val_loss, val_acc = 0, 0

    # Disable gradient calculations
    with torch.no_grad():
        for i, data in enumerate(dataloader):
            # Extract imgs and labels and sent to device
            imgs, labels = data
            imgs, labels  = imgs.to(device), labels.to(device)

            # Forward pass and update validation loss
            prediction = model(imgs)
            loss = loss_fn(prediction, labels)
            val_loss += loss.item()

            # Update validation accuracy
            val_acc += (prediction.argmax(1) == labels).type(torch.float).mean().item()
    # Return validation loss and accuracy
    return val_loss / len(dataloader), val_acc / len(dataloader)

In [None]:
def save_model(model, model_name, epoch):
    """Function for saving model"""
    # Model name, with path
    timestamp = dt.datetime.now().strftime('%Y%m%d_%H%M%S')
    file = f'{model_name}_{timestamp}_epoch_{epoch+1}'
    name = os.path.join(config['outputs']['path'], model_name, file)

    # Make directory if not exists
    if not os.path.exists(os.path.join(os.path.join(config['outputs']['path'], model_name))):
        os.makedirs(os.path.join(os.path.join(config['outputs']['path'], model_name)))

    # Save model
    torch.save(model.state_dict(), f'{name}.pt')

In [None]:
class EarlyStopper:
    """Early Stopping Class. Copied from https://stackoverflow.com/questions/71998978/early-stopping-in-pytorch"""
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = float('inf')

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            if self.counter != 0:
                print('Early Stopping Counter Reset')
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            print(f'Early Stopping Counter {self.counter} of {self.patience}')
            if self.counter >= self.patience:
                return True
        return False

In [None]:
def train(model, train_dataloader, validation_dataloader, optimizer, loss_fn, epochs, device, scheduler, early_stopping):
    """Model training method"""
    # History
    history = dict(train_loss=[],
                   train_acc=[],
                   val_loss=[],
                   val_acc=[])

    # Loop through epochs
    for epoch in range(epochs):
        print(f'\nEpoch {epoch+1} of {epochs} started...')

        # Set model to train mode and do pass over data
        model.train(True)
        train_loss, train_acc = train_step(model, train_dataloader, loss_fn, optimizer, device)

        # Set model to eval and do pass over validation data
        model.eval()
        val_loss, val_acc = validation(model, validation_dataloader, loss_fn, device)

        print(f"Epoch {epoch+1} of {epochs} - Train loss: {train_loss:.5f} - Train acc: {train_acc:.5f} - Val loss: {val_loss:.5f} - Val acc: {val_acc:.5f}")

        # lr Scheduler step
        scheduler.step(val_loss)


        # Print info


        # Save model if val loss is lower than previous lowest
        if val_loss < min(history['val_loss'], default=1e10):
            print(f"Saving model with new best val_loss: {val_loss:.5f}")

            # Save model
            save_model(model, MODEL_NAME, epoch)

        # Save train and val loss/acc
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)

        # Visualize every 5th epoch
        if (epoch + 1) % 5 == 0:
            visualize_training(history, epochs)

        if early_stopping.early_stop(val_loss):
            print(f"Epoch {epoch+1} of {epochs} - Early stopping")
            print('Saving final model, with loss: ', val_loss)
            save_model(model, MODEL_NAME, epoch)
            visualize_training(history, epoch+1)
            break

    return history

In [None]:
# Setting seed and general training setup
torch.manual_seed(seed)
epcohs = 50
learming_rate = 0.01
momentum = .9
weight_decay = 1e-5
# nestrov = True

# Creating Model Object
model = TinyVGGish(num_classes = 10, dense_dropout_rate=.5).to(device)

# Optimizer and and scheduler
optimizer = torch.optim.SGD(model.parameters(), lr=learming_rate, weight_decay=weight_decay, momentum=momentum, nesterov=nestrov)
# optimizer = torch.optim.Adam(model.parameters(), lr=learming_rate, weight_decay=weight_decay)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                       T_max = epochs,
                                                       eta_min = 0,
                                                       verbose=1)

# Visualizing scheduler effect on learning rate
lrs = []
for i in range(100):
    scheduler.step()
    lrs.append(scheduler.get_last_lr()[0])

plt.plot(lrs)

# Loss function
loss_fn = nn.CrossEntropyLoss()

# Early stopping
early_stopping = EarlyStopper(patience=7, min_delta=.03)

##### `Training`

In [None]:
# results
results = train(model=model,
                train_dataloader=train_dataloader,
                validation_dataloader=val_dataloader,
                optimizer=optimizer,
                loss_fn=loss_fn,
                epochs=epochs,
                device=device,
                scheduler=scheduler,
                early_stopping=early_stopping)

### Testing on Test Images

In [None]:
import random
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image

test_imgs = os.listdir(config['dataset']['images']['test'])
test_img = test_imgs[random.randint(0, len(test_imgs))]

In [None]:
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

In [None]:
for i, data in enumerate(test_dataloader):
    imgs, labels = data
    imgs, labels = imgs.to(device), labels.to(device)

    # Forward pass
    prediction = model(imgs)

    break

In [None]:
prediction.argmax(1)[0].item()

In [None]:
index = 16
print('Prediction:', config['dataset']['class_mapping']['c' + str(prediction.argmax(1)[index].item())])
print('True:', config['dataset']['class_mapping']['c' + str(labels[index].item())])

plt.imshow(imgs[index].cpu().permute(1, 2, 0))