In [73]:
import os
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.io import read_image
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from torchvision.models import resnet50, ResNet50_Weights
from PIL import Image
import torchmetrics

In [74]:
train_csv = "/Users/anshsarkar/NYU/Spring 2025/ECE-GY-9183-MLOPS/Project/ECE-GY-9183-Machine-Learning-Systems-Engg-Operations-Project/notebooks/data/dataset_classifier/train.csv"
val_csv = "/Users/anshsarkar/NYU/Spring 2025/ECE-GY-9183-MLOPS/Project/ECE-GY-9183-Machine-Learning-Systems-Engg-Operations-Project/notebooks/data/dataset_classifier/val.csv"

In [75]:
# from PIL import Image
# img = Image.open("../../data/aivshuman/train_data/c8b9aa66d43c43df87ba124a21c89579.jpg").convert('RGB')
# img.getbands()

In [76]:
# img.show()

In [77]:
class ImageDataset(Dataset):
    def __init__(self, csv_path, transform=None):
        self.df = pd.read_csv(csv_path)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.iloc[idx, 0]
        # image = read_image(img_path).float() / 255.0
        image = Image.open(img_path).convert('RGB')
        label = torch.tensor(self.df.iloc[idx, 1]).float()
        
        if self.transform:
            image = self.transform(image)
        return image, label

In [78]:
class ImageDataModule(pl.LightningDataModule):
    def __init__(self, train_csv, val_csv, batch_size=32):
        super().__init__()
        self.train_csv = train_csv
        self.val_csv = val_csv
        self.batch_size = batch_size
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        self.aug_transform = transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(15),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def setup(self, stage=None):
        self.train_ds = ImageDataset(self.train_csv, self.aug_transform)
        self.val_ds = ImageDataset(self.val_csv, self.transform)

    def train_dataloader(self):
        return DataLoader(self.train_ds, batch_size=self.batch_size, shuffle=True, num_workers=0)

    def val_dataloader(self):
        return DataLoader(self.val_ds, batch_size=self.batch_size, num_workers=0)

In [79]:
class ImageClassifier(pl.LightningModule):
    def __init__(self, learning_rate=1e-3, finetune=False):
        super().__init__()
        self.save_hyperparameters()
        self.backbone = resnet50(weights="IMAGENET1K_V2")
        if not finetune:
            for param in self.backbone.parameters():
                param.requires_grad = False
        self.classifier = nn.Sequential(
            nn.Linear(2048, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 1),
            nn.Sigmoid()
        )
        self.backbone.fc = self.classifier
        self.criterion = nn.BCELoss()
        self.train_accuracy = torchmetrics.classification.BinaryAccuracy()
        self.val_accuracy = torchmetrics.classification.BinaryAccuracy()

    def forward(self, x):
        return self.backbone(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x).squeeze()
        loss = self.criterion(y_hat, y)
        preds = (y_hat > 0.5).float()
        acc = self.train_accuracy(preds, y)
        self.log('train_loss', loss, prog_bar=True)
        self.log('train_acc', acc, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x).squeeze()
        loss = self.criterion(y_hat, y)
        preds = (y_hat > 0.5).float()
        acc = self.val_accuracy(preds, y)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def configure_optimizers(self):
        # Use different learning rates for backbone and head during fine-tuning
        if self.hparams.finetune:
            optimizer = torch.optim.Adam([
                {'params': self.backbone.parameters(), 'lr': self.hparams.learning_rate/10},
                {'params': self.classifier.parameters(), 'lr': self.hparams.learning_rate}
            ])
        else:
            optimizer = torch.optim.Adam(self.classifier.parameters(), lr=self.hparams.learning_rate)
        return optimizer

    def unfreeze_layers(self):
        # Unfreeze final ResNet blocks (layer3 and layer4)
        for name, param in self.backbone.named_parameters():
            if 'layer3' in name or 'layer4' in name:
                param.requires_grad = True

In [80]:
datamodule = ImageDataModule(
        train_csv = train_csv,
        val_csv = val_csv,
        batch_size=32
    )

In [81]:
checkpoint_callback = ModelCheckpoint(
        monitor='val_loss',
        filename='phase1-checkpoint',
        save_top_k=1,
        mode='min'
    )

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3,
    mode='min'
)

# Phase 1: Initial training (frozen backbone)
print("Starting Phase 1 - Training Classifier Head")
phase1_model = ImageClassifier(learning_rate=1e-3, finetune=False)

Starting Phase 1 - Training Classifier Head


In [82]:
phase1_trainer = pl.Trainer(
        max_epochs=15,
        callbacks=[checkpoint_callback, early_stopping],
        accelerator='auto',
        devices=1 
    )
phase1_trainer.fit(phase1_model, datamodule=datamodule)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name           | Type           | Params | Mode 
----------------------------------------------------------
0 | backbone       | ResNet         | 24.6 M | train
1 | classifier     | Sequential     | 1.0 M  | train
2 | criterion      | BCELoss        | 0      | train
3 | train_accuracy | BinaryAccuracy | 0      | train
4 | val_accuracy   | BinaryAccuracy | 0      | train
----------------------------------------------------------
1.0 M     Trainable params
23.5 M    Non-trainable params
24.6 M    Total params
98.231    Total estimated model params size (MB)
159       Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/opt/miniconda3/envs/mlops/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/opt/miniconda3/envs/mlops/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

In [None]:
print("\nStarting Phase 2 - Fine-Tuning")
    best_model = ImageClassifier.load_from_checkpoint(
        checkpoint_callback.best_model_path,
        learning_rate=1e-4,
        finetune=True
    )
    best_model.unfreeze_layers()

    phase2_checkpoint = ModelCheckpoint(
        monitor='val_loss',
        filename='phase2-checkpoint',
        save_top_k=1,
        mode='min'
    )

In [None]:
phase2_trainer = pl.Trainer(
        max_epochs=10,
        callbacks=[phase2_checkpoint, early_stopping],
        accelerator='auto',
        devices=1 if torch.cuda.is_available() else 0
    )
    phase2_trainer.fit(best_model, datamodule=datamodule)