# DATA

In [1]:
# Install gdown if not already installed
!pip install -U gdown

# Download the zip file using gdown
!gdown --id 1nzC-FjL5NtoUu-G2pkj9M8r7E79thK4R -O file.zip

# Unzip the file
!unzip file.zip -d extracted_folder


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: extracted_folder/Comys_Hackathon5/Task_B/train/Tim_Norbeck/distortion/Tim_Norbeck_0001_noisy.jpg  
  inflating: extracted_folder/Comys_Hackathon5/Task_B/train/Tim_Norbeck/distortion/Tim_Norbeck_0001_rainy.jpg  
  inflating: extracted_folder/Comys_Hackathon5/Task_B/train/Tim_Norbeck/distortion/Tim_Norbeck_0001_resized.jpg  
  inflating: extracted_folder/Comys_Hackathon5/Task_B/train/Tim_Norbeck/distortion/Tim_Norbeck_0001_sunny.jpg  
  inflating: extracted_folder/Comys_Hackathon5/Task_B/train/Tim_Norbeck/Tim_Norbeck_0001.jpg  
   creating: extracted_folder/Comys_Hackathon5/Task_B/train/Tina_Brown/
   creating: extracted_folder/Comys_Hackathon5/Task_B/train/Tina_Brown/distortion/
  inflating: extracted_folder/Comys_Hackathon5/Task_B/train/Tina_Brown/distortion/Tina_Brown_0001_blurred.jpg  
  inflating: extracted_folder/Comys_Hackathon5/Task_B/train/Tina_Brown/distortion/Tina_Brown_0001_foggy.jpg  
  inflating: 

# Imports

In [2]:
import os
import cv2
import random
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from itertools import combinations
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Model Architecture

In [3]:
class WindowAttention(nn.Module):
    def __init__(self, dim, heads=4):
        super().__init__()
        self.heads = heads
        self.scale = (dim // heads) ** -0.5
        self.to_qkv = nn.Linear(dim, dim * 3, bias=False)
        self.to_out = nn.Linear(dim, dim)

    def forward(self, x):
        # x: (B, N, C)
        B, N, C = x.shape
        qkv = self.to_qkv(x).chunk(3, dim=-1)
        q, k, v = map(
            lambda t: t.view(B, N, self.heads, C // self.heads).transpose(1, 2),
            qkv
        )  # (B, heads, N, dim_head)

        dots = torch.matmul(q, k.transpose(-2, -1)) * self.scale  # (B, heads, N, N)
        attn = dots.softmax(dim=-1)

        out = torch.matmul(attn, v)  # (B, heads, N, dim_head)
        out = out.transpose(1, 2).reshape(B, N, C)  # (B, N, C)
        out = self.to_out(out)
        return out

In [4]:
class MSFF_WinAttn_MobileNet(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()
        mobilenet = models.mobilenet_v2(pretrained=True).features

        # Feature stages
        self.stage1 = mobilenet[:4]    # 24-d
        self.stage2 = mobilenet[4:7]   # 32-d
        self.stage3 = mobilenet[7:14]  # 96-d
        self.stage4 = mobilenet[14:]   # 1280-d

        # Reduce channels to 256 for fusion
        self.reduce1 = nn.Conv2d(24, 256, 1)
        self.reduce2 = nn.Conv2d(32, 256, 1)
        self.reduce3 = nn.Conv2d(96, 256, 1)
        self.reduce4 = nn.Conv2d(1280, 256, 1)

        # Window attention with LayerNorm before and after
        self.win_attn = nn.Sequential(
            nn.LayerNorm(256),
            WindowAttention(dim=256, heads=4),
            nn.LayerNorm(256)
        )

        self.classifier = nn.Linear(256 * 4, num_classes)

    def forward(self, x):
        # Feature extraction
        x1 = self.stage1(x)
        x2 = self.stage2(x1)
        x3 = self.stage3(x2)
        x4 = self.stage4(x3)

        # Reduce + GAP
        x1 = F.adaptive_avg_pool2d(self.reduce1(x1), 1).flatten(1)
        x2 = F.adaptive_avg_pool2d(self.reduce2(x2), 1).flatten(1)
        x3 = F.adaptive_avg_pool2d(self.reduce3(x3), 1).flatten(1)
        x4 = F.adaptive_avg_pool2d(self.reduce4(x4), 1).flatten(1)

        # Stack multi-scale features
        feats = torch.stack([x1, x2, x3, x4], dim=1)  # (B, 4, 256)

        # Apply window attention
        feats = self.win_attn(feats)

        # Flatten and classify
        out = feats.flatten(1)  # (B, 4*256)
        return self.classifier(out)

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MSFF_WinAttn_MobileNet(num_classes=2).to(device)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 114MB/s] 


# Testing

In [6]:
def get_albumentations_test_transform():
    return A.Compose([
        A.Resize(224, 224),
        A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
        ToTensorV2()
    ])

class AlbumentationsDataset(ImageFolder):
    def __init__(self, root, transform=None):
        super().__init__(root)
        self.albumentations_transform = transform

    def __getitem__(self, index):
        image, label = super().__getitem__(index)
        image = np.array(image)
        if self.albumentations_transform:
            image = self.albumentations_transform(image=image)['image']
        return image, label

def test_model(model, model_path, test_folder, device='cuda' if torch.cuda.is_available() else 'cpu', batch_size=32):
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()

    test_transform = get_albumentations_test_transform()
    test_dataset = AlbumentationsDataset(test_folder, transform=test_transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.numpy())

    acc = accuracy_score(all_labels, all_preds)
    prec = precision_score(all_labels, all_preds, average='macro', zero_division=0)
    rec = recall_score(all_labels, all_preds, average='macro', zero_division=0)
    f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)

    print(f"\n Test Accuracy: {acc:.4f}")
    print(f" Precision:     {prec:.4f}")
    print(f" Recall:        {rec:.4f}")
    print(f" F1 Score:      {f1:.4f}")

    return acc, prec, rec, f1

In [7]:
test_folder = "/content/extracted_folder/Comys_Hackathon5/Task_A/val"
model_path = "/content/best_model_Task_A.pth"

acc, prec, rec, f1 = test_model(model, model_path, test_folder)


 Test Accuracy: 0.9479
 Precision:     0.9293
 Recall:        0.8949
 F1 Score:      0.9108


In [8]:
test_folder = "/content/extracted_folder/Comys_Hackathon5/Task_A/train"
model_path = "/content/best_model_Task_A.pth"

acc, prec, rec, f1 = test_model(model, model_path, test_folder)


 Test Accuracy: 0.9709
 Precision:     0.9383
 Recall:        0.9546
 F1 Score:      0.9462
