In [2]:
pip install --quiet -r https://raw.githubusercontent.com/JoaquinLCalvo/Explained-CNN-SSL/refs/heads/explainability/requirements.txt

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
#Standard libraries
import os
from copy import deepcopy
from tqdm.notebook import tqdm

#plotting
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

#PyTorch
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

#Torchvision
import torchvision
import torchvision.transforms.functional as FT
from torchvision import transforms
from torchvision.datasets import STL10

#PyTorch Lightning
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint


# Setting the seed
pl.seed_everything(47)

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Path configurations
DATASET_PATH = "data/"
NUM_WORKERS = os.cpu_count()

# Device configs
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print("Device:", device)
print("Number of workers:", NUM_WORKERS)

Seed set to 47


Device: cpu
Number of workers: 16


In [4]:
num_epochs = 25
batch_size = 256
lr = 1e-3
temperature = 0.07
hidden_dim = 128
weight_decay = 1e-4

In [5]:
class ResNet(pl.LightningModule):
    def __init__(self, num_classes, lr, weight_decay, max_epochs=100):
        super().__init__()
        self.save_hyperparameters()
        self.model = torchvision.models.resnet18(pretrained=False, num_classes=num_classes)

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.hparams.lr, weight_decay=self.hparams.weight_decay)
        lr_scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=[int(self.hparams.max_epochs * 0.7), int(self.hparams.max_epochs * 0.9)], gamma=0.1
        )
        return [optimizer], [lr_scheduler]

    def _calculate_loss(self, batch, mode="train"):
        imgs, labels = batch
        preds = self.model(imgs)
        loss = F.cross_entropy(preds, labels)
        acc = (preds.argmax(dim=-1) == labels).float().mean()

        self.log(mode + "_loss", loss)
        self.log(mode + "_acc", acc)
        return loss

    def training_step(self, batch, batch_idx):
        return self._calculate_loss(batch, mode="train")

    def validation_step(self, batch, batch_idx):
        self._calculate_loss(batch, mode="val")

    def test_step(self, batch, batch_idx):
        self._calculate_loss(batch, mode="test")

In [6]:
# Applying first model's transformations

train_transforms = transforms.Compose(
    [
        transforms.RandomHorizontalFlip(),
        transforms.RandomResizedCrop(size=96, scale=(0.8, 1.0)),
        transforms.RandomGrayscale(p=0.2),
        transforms.GaussianBlur(kernel_size=9, sigma=(0.1, 0.5)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,)),
    ]
)

train_img_aug_data = STL10(root=DATASET_PATH, split="train", download=True, transform=train_transforms)

Downloading http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz to data/stl10_binary.tar.gz


  4%|▍         | 105M/2.64G [00:30<12:18, 3.43MB/s]  


KeyboardInterrupt: 

In [None]:
img_transforms = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize((0.5,), (0.5,))])

train_img_data = STL10(root=DATASET_PATH, split='train', download=True,
                       transform=img_transforms)
test_img_data = STL10(root=DATASET_PATH, split='test', download=True,
                      transform=img_transforms)

In [None]:
def train_resnet(batch_size, max_epochs=100, **kwargs):
    trainer = pl.Trainer(
        accelerator="auto",
        devices=1,
        max_epochs=max_epochs,
        callbacks=[
            LearningRateMonitor("epoch"),
        ],
        check_val_every_n_epoch=2,
    )
    trainer.logger._default_hp_metric = None

    # Data loaders
    train_loader = DataLoader(
        train_img_aug_data,
        batch_size=batch_size,
        shuffle=True,
        drop_last=True,
        pin_memory=True,
        num_workers=NUM_WORKERS,
    )
    test_loader = DataLoader(
        test_img_data, 
        batch_size=batch_size, 
        shuffle=False, 
        drop_last=False, 
        pin_memory=True, 
        num_workers=NUM_WORKERS,
    )

    # Initialize model and train
    pl.seed_everything(47)  # To ensure reproducibility
    model = ResNet(**kwargs)
    trainer.fit(model, train_loader, test_loader)

    # Test trained model on validation set
    train_result = trainer.test(model, dataloaders=train_loader, verbose=False)
    val_result = trainer.test(model, dataloaders=test_loader, verbose=False)
    result = {"train": train_result[0]["test_acc"], "test": val_result[0]["test_acc"]}

    return model, result


In [None]:
resnet_model, resnet_result = train_resnet(batch_size=batch_size, num_classes=10, lr=lr, weight_decay=weight_decay, max_epochs=num_epochs)
print(f"Accuracy on training set: {100*resnet_result['train']:4.2f}%")
print(f"Accuracy on test set: {100*resnet_result['test']:4.2f}%")

In [None]:
## Save the model

import boto3

# Save the model locally in SageMaker
resnet_model_save_path = "resnet_baseline_model.pth"
torch.save(resnet_model.state_dict(), resnet_model_save_path)
print(f"Model saved to {resnet_model_save_path}")

# Upload to S3
s3 = boto3.client('s3')
bucket_name = "sagemaker-eu-west-1-704404838550"
s3_key = "models/simclr_model_mixup.pth"

s3.upload_file(resnet_model_save_path, bucket_name, s3_key)
print(f"Model uploaded to s3://{bucket_name}/{s3_key}")