In [7]:
!pip install ptflops



In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset, WeightedRandomSampler
from torchvision import models, transforms
from PIL import Image
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, confusion_matrix, roc_auc_score, precision_recall_fscore_support
from ptflops import get_model_complexity_info

# 1. Custom Letterbox Padding

In [11]:
class LetterboxResize(object):
    def __init__(self, output_size=(224, 224)):
        self.output_size = output_size

    def __call__(self, img):
        # Create a black canvas
        canvas = Image.new("RGB", self.output_size, (0, 0, 0))
        img.thumbnail(self.output_size, Image.LANCZOS)
        # Paste centered
        offset = ((self.output_size[0] - img.size[0]) // 2, (self.output_size[1] - img.size[1]) // 2)
        canvas.paste(img, offset)
        return canvas

# Dataset and DataLoader with Split Logic

In [16]:
from torch.utils.data import Dataset, Subset
import pandas as pd

class PM25Dataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, self.data.iloc[idx, 0])
        image = Image.open(img_name).convert('RGB')
        label = self.data.iloc[idx, 1]
        
        if self.transform:
            image = self.transform(image)
        return image, label

# Initialize the full dataset (Assuming you have a CSV with filenames and labels)
# NOTE: Ensure the 1,558 duplicates from Task 1 are removed from your CSV first.
transform = transforms.Compose([
    LetterboxResize(output_size=(224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Example split loop setup
test_ratios = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

# The Training and Validation Function

In [17]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=50):
    train_loss, val_loss = [], []
    train_acc, val_acc = [], []

    for epoch in range(epochs):
        model.train()
        running_loss, correct = 0.0, 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels.data)

        # Validation phase (10% of training portion as per instructions)
        model.eval()
        v_loss, v_correct = 0.0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                v_loss += loss.item()
                _, preds = torch.max(outputs, 1)
                v_correct += torch.sum(preds == labels.data)
        
        # Log metrics for learning curves
        train_loss.append(running_loss/len(train_loader))
        val_loss.append(v_loss/len(val_loader))
        print(f"Epoch {epoch+1}/{epochs} - Loss: {train_loss[-1]:.4f}, Val Loss: {val_loss[-1]:.4f}")
        
    return train_loss, val_loss