# DATA

# Imports

In [None]:
import os
from torchvision import datasets
from torch.utils.data import DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torch
from torch import nn
from torch.optim import Adam
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import os
from torchvision.datasets import ImageFolder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score



  check_for_updates()


# Data Loading

**Augmentations Applied**

- Resize to 224×224 pixels  
- Random horizontal flip (50% chance)  
- Random rotation (up to ±15°, 50% chance)  
- Apply motion blur (30% chance)  
- Adjust brightness and contrast randomly (50% chance)  
- Add fog effect (30% chance)  
- Add rain effect (30% chance)  
- Add shadow effect (30% chance)  
- Add sun flare effect (20% chance)  
- Add Gaussian noise (30% chance)  
- Apply CLAHE (Contrast Limited Adaptive Histogram Equalization, 20% chance)  
- Normalize image (mean: 0.5, std: 0.5 for each channel)  
- Convert image to tensor for model input


In [None]:
# Wrapper to apply Albumentations transforms
class AlbumentationsTransform:
    def __init__(self, transform):
        self.transform = transform

    def __call__(self, img):
        img = np.array(img)
        augmented = self.transform(image=img)
        return augmented['image']

# training data augmentation
train_transform = AlbumentationsTransform(A.Compose([
    A.Resize(224, 224),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=15, p=0.5),
    A.MotionBlur(blur_limit=7, p=0.3),
    A.RandomBrightnessContrast(brightness_limit=0.4, contrast_limit=0.4, p=0.5),
    A.RandomFog(p=0.3),
    A.RandomRain(p=0.3),
    A.RandomShadow(p=0.3),
    A.RandomSunFlare(p=0.2),
    A.GaussNoise(p=0.3),
    A.CLAHE(p=0.2),
    A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    ToTensorV2()
]))

# validation data
val_transform = AlbumentationsTransform(A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    ToTensorV2()
]))


train_dir = '/kaggle/input/comys-hackathon5-2025/Comys_Hackathon5/Task_A/train'
val_dir = '/kaggle/input/comys-hackathon5-2025/Comys_Hackathon5/Task_A/val'

train_data = datasets.ImageFolder(train_dir, transform=train_transform)
val_data = datasets.ImageFolder(val_dir, transform=val_transform)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False, num_workers=2)


# Model Architecture

## WindowAttention


*   Window attention explicitly attends within local windows (e.g.,7×7 patches).

*   This allows the model to learn richer relationships among local features,
such as:
      
     ---Part configurations (e.g., corners, edges)

     ---Short-range dependencies that convolutions might not fully capture.













In [None]:
class WindowAttention(nn.Module):
    def __init__(self, dim, heads=4):
        super().__init__()
        self.heads = heads
        self.scale = (dim // heads) ** -0.5
        self.to_qkv = nn.Linear(dim, dim * 3, bias=False)
        self.to_out = nn.Linear(dim, dim)

    def forward(self, x):
        # x: (B, N, C)
        B, N, C = x.shape
        qkv = self.to_qkv(x).chunk(3, dim=-1)
        q, k, v = map(
            lambda t: t.view(B, N, self.heads, C // self.heads).transpose(1, 2),
            qkv
        )  # (B, heads, N, dim_head)

        dots = torch.matmul(q, k.transpose(-2, -1)) * self.scale  # (B, heads, N, N)
        attn = dots.softmax(dim=-1)

        out = torch.matmul(attn, v)  # (B, heads, N, dim_head)
        out = out.transpose(1, 2).reshape(B, N, C)  # (B, N, C)
        out = self.to_out(out)
        return out

## MSFF_WinAttn_MobileNet

Multi-Scale Feature Fusion MobileNetV2 with Window Attention.

  This model extracts multi-scale convolutional features from different
  stages of MobileNetV2, reduces their channels to a uniform size, and
  applies local windowed self-attention to model dependencies between scales.

# MSFF_WinAttn_MobileNet

## Overview
`MSFF_WinAttn_MobileNet` is a hybrid deep learning model that combines:

- **MobileNetV2 backbone** for efficient multi-scale feature extraction.
- **1×1 convolutions** to normalize the channels of each feature stage.
- **Windowed Multi-Head Self-Attention** to model dependencies across scales.
- **Lightweight classifier** for final prediction.

This design enables capturing both spatial and semantic information across multiple feature hierarchies while keeping the model computationally efficient.

---

## Key Components

### 1️> Multi-Scale Feature Extraction
Features are extracted from 4 stages of MobileNetV2:
- **Stage 1:** Early low-level features (channels: 24)
- **Stage 2:** Mid-level features (channels: 32)
- **Stage 3:** High-level features (channels: 96)
- **Stage 4:** Final semantic features (channels: 1280)

These stages provide rich, complementary information about the input image.

---

### 2️> Channel Reduction
Each stage output is projected to **256 channels** using 1×1 convolutions:

- `reduce1`: 24 → 256 channels
- `reduce2`: 32 → 256 channels
- `reduce3`: 96 → 256 channels
- `reduce4`: 1280 → 256 channels

This normalization simplifies subsequent attention and fusion.

---

### 3️> Window Attention
After reduction and global average pooling, the 4 feature vectors are stacked to shape `(B, 4, 256)`.

A **windowed attention mechanism** is applied:
- `LayerNorm(256)`
- `WindowAttention`: Multi-head self-attention (4 heads)
- `LayerNorm(256)`

This allows the model to learn relationships **between different scales**.

---

### 4️> Classifier
The attended features are flattened and passed through a linear layer:
- `nn.Linear(256 × 4, num_classes)`

This produces the final logits for classification.

In [None]:
class MSFF_WinAttn_MobileNet(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()
        mobilenet = models.mobilenet_v2(pretrained=True).features

        # Feature stages
        self.stage1 = mobilenet[:4]    # 24-d
        self.stage2 = mobilenet[4:7]   # 32-d
        self.stage3 = mobilenet[7:14]  # 96-d
        self.stage4 = mobilenet[14:]   # 1280-d

        # Reduce channels to 256 for fusion
        self.reduce1 = nn.Conv2d(24, 256, 1)
        self.reduce2 = nn.Conv2d(32, 256, 1)
        self.reduce3 = nn.Conv2d(96, 256, 1)
        self.reduce4 = nn.Conv2d(1280, 256, 1)

        # Window attention with LayerNorm before and after
        self.win_attn = nn.Sequential(
            nn.LayerNorm(256),
            WindowAttention(dim=256, heads=4),
            nn.LayerNorm(256)
        )

        self.classifier = nn.Linear(256 * 4, num_classes)

    def forward(self, x):
        # Feature extraction
        x1 = self.stage1(x)
        x2 = self.stage2(x1)
        x3 = self.stage3(x2)
        x4 = self.stage4(x3)

        # Reduce + GAP
        x1 = F.adaptive_avg_pool2d(self.reduce1(x1), 1).flatten(1)
        x2 = F.adaptive_avg_pool2d(self.reduce2(x2), 1).flatten(1)
        x3 = F.adaptive_avg_pool2d(self.reduce3(x3), 1).flatten(1)
        x4 = F.adaptive_avg_pool2d(self.reduce4(x4), 1).flatten(1)

        # Stack multi-scale features
        feats = torch.stack([x1, x2, x3, x4], dim=1)  # (B, 4, 256)

        # Apply window attention
        feats = self.win_attn(feats)

        # Flatten and classify
        out = feats.flatten(1)  # (B, 4*256)
        return self.classifier(out)


In [None]:


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MSFF_WinAttn_MobileNet(num_classes=2).to(device)


criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=1e-4)


Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 109MB/s] 


# Train and Validation Rule

In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=50, save_dir='./models'):
    os.makedirs(save_dir, exist_ok=True)

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)

    best_val_acc = 0.0
    best_val_loss = float('inf')
    best_acc_model_path = os.path.join(save_dir, 'best_model_val_acc.pth')
    best_loss_model_path = os.path.join(save_dir, 'best_model_val_loss.pth')

    for epoch in range(epochs):
        # Training
        model.train()
        total_loss, correct, total = 0, 0, 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_acc = correct / total
        avg_train_loss = total_loss / len(train_loader)
        train_losses.append(avg_train_loss)
        train_accuracies.append(train_acc)

        # Validation
        model.eval()
        val_loss, val_correct, val_total = 0, 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, preds = torch.max(outputs, 1)
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

        avg_val_loss = val_loss / len(val_loader)
        val_acc = val_correct / val_total
        val_losses.append(avg_val_loss)
        val_accuracies.append(val_acc)

        scheduler.step(avg_val_loss)

        # Save best accuracy model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), best_acc_model_path)
            print(f"Saved best accuracy model at epoch {epoch+1} with Acc: {val_acc:.4f}")

        # Save best loss model
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), best_loss_model_path)
            print(f"Saved best loss model at epoch {epoch+1} with Loss: {avg_val_loss:.4f}")

        print(f"Epoch [{epoch+1}/{epochs}], "
              f"Train Loss: {avg_train_loss:.4f}, Train Acc: {train_acc:.4f}, "
              f"Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.4f}")

    print(f"\n Best Validation Accuracy: {best_val_acc:.4f}")
    print(f" Best Validation Loss: {best_val_loss:.4f}")
    print(f" Best Accuracy Model Saved at: {best_acc_model_path}")
    print(f" Best Loss Model Saved at: {best_loss_model_path}")

    return train_losses, train_accuracies, val_losses, val_accuracies, best_acc_model_path, best_loss_model_path


In [None]:
train_model(model, train_loader, val_loader, criterion, optimizer, epochs=150)



Saved best accuracy model at epoch 1 with Acc: 0.9076
Saved best loss model at epoch 1 with Loss: 0.2200
Epoch [1/150], Train Loss: 0.4279, Train Acc: 0.8453, Val Loss: 0.2200, Val Acc: 0.9076
Epoch [2/150], Train Loss: 0.2624, Train Acc: 0.9003, Val Loss: 0.2419, Val Acc: 0.9076
Saved best accuracy model at epoch 3 with Acc: 0.9265
Saved best loss model at epoch 3 with Loss: 0.1849
Epoch [3/150], Train Loss: 0.2533, Train Acc: 0.9019, Val Loss: 0.1849, Val Acc: 0.9265
Epoch [4/150], Train Loss: 0.2328, Train Acc: 0.9112, Val Loss: 0.2274, Val Acc: 0.9123
Saved best accuracy model at epoch 5 with Acc: 0.9479
Saved best loss model at epoch 5 with Loss: 0.1732
Epoch [5/150], Train Loss: 0.2140, Train Acc: 0.9138, Val Loss: 0.1732, Val Acc: 0.9479
Epoch [6/150], Train Loss: 0.1952, Train Acc: 0.9200, Val Loss: 0.1783, Val Acc: 0.9336
Epoch [7/150], Train Loss: 0.1816, Train Acc: 0.9283, Val Loss: 0.2087, Val Acc: 0.9289
Epoch [8/150], Train Loss: 0.1843, Train Acc: 0.9252, Val Loss: 0.213

([0.4278855932052018,
  0.2623553396981271,
  0.2532828080116725,
  0.23279771733967985,
  0.21400359639378844,
  0.19522300106091578,
  0.18158807562755758,
  0.18428710066392773,
  0.17766953116191214,
  0.16422649051566593,
  0.13776733023954219,
  0.13760521391131839,
  0.1373230644089521,
  0.12673743307346202,
  0.13599954606568226,
  0.13571694264280015,
  0.12581864665033388,
  0.1300470737099159,
  0.1377530941342721,
  0.13994664020958494,
  0.1281251076181404,
  0.14338458888232708,
  0.13336603071723804,
  0.1463704417597075,
  0.13251168983148748,
  0.14438553978918028,
  0.12924033788139702,
  0.13736821014861592,
  0.14307107871062444,
  0.13039711233778079,
  0.14681664958107668,
  0.14019033911286807,
  0.13087697913412188,
  0.14754252551031893,
  0.12870498829078478,
  0.13934270356644374,
  0.15873195539366025,
  0.1208258787261658,
  0.12968715075708803,
  0.14121067592660425,
  0.14667488069685755,
  0.11889416641998486,
  0.14849366656825191,
  0.1258969302060174

# Test (Code for Test dataset with folder path)

In [None]:
def get_albumentations_test_transform():
    return A.Compose([
        A.Resize(224, 224),
        A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
        ToTensorV2()
    ])

class AlbumentationsDataset(ImageFolder):
    def __init__(self, root, transform=None):
        super().__init__(root)
        self.albumentations_transform = transform

    def __getitem__(self, index):
        image, label = super().__getitem__(index)
        image = np.array(image)
        if self.albumentations_transform:
            image = self.albumentations_transform(image=image)['image']
        return image, label

def test_model(model, model_path, test_folder, device='cuda' if torch.cuda.is_available() else 'cpu', batch_size=32):
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()

    test_transform = get_albumentations_test_transform()
    test_dataset = AlbumentationsDataset(test_folder, transform=test_transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.numpy())

    acc = accuracy_score(all_labels, all_preds)
    prec = precision_score(all_labels, all_preds, average='macro', zero_division=0)
    rec = recall_score(all_labels, all_preds, average='macro', zero_division=0)
    f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)

    print(f"\n Test Accuracy: {acc:.4f}")
    print(f" Precision:     {prec:.4f}")
    print(f" Recall:        {rec:.4f}")
    print(f" F1 Score:      {f1:.4f}")

    return acc, prec, rec, f1


In [None]:
test_folder = "/kaggle/input/comys-hackathon5-2025/Comys_Hackathon5/Task_A/val"
model_path = "/kaggle/working/models/best_model_val_acc.pth"

acc, prec, rec, f1 = test_model(model, model_path, test_folder)



 Test Accuracy: 0.9479
 Precision:     0.9293
 Recall:        0.8949
 F1 Score:      0.9108


In [None]:
test_folder = "/content/extracted_folder/Comys_Hackathon5/Task_A/train"
model_path = "/content/best_model_val_loss.pth"

acc, prec, rec, f1 = test_model(model, model_path, test_folder)


 Test Accuracy: 0.9709
 Precision:     0.9383
 Recall:        0.9546
 F1 Score:      0.9462
