In [2]:
import os
import json
from PIL import Image
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from torch.cuda.amp import autocast, GradScaler
from torch.optim.lr_scheduler import OneCycleLR

import torchvision.transforms as transforms
import torchvision.datasets as datasets

import albumentations as A
from albumentations.pytorch import ToTensorV2

import timm

import wandb
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint, LearningRateFinder
from torchmetrics import Accuracy


## Consolidate data

In [3]:
def create_dataframe_from_folders(folder_list):
    data = {'image_path': [], 'class': []}

    for folder in tqdm(folder_list):
        for root, dirs, files in os.walk(folder):
            for file in files:
                if file.endswith('.JPEG'):
                    ts_file_path = os.path.join(root, file)
                    data['image_path'].append(ts_file_path)
                    data['class'].append(os.path.basename(root))

    df = pd.DataFrame(data)
    return df

In [4]:
%%time
train_df = create_dataframe_from_folders(['/kaggle/input/imagenet100/train.X1', '/kaggle/input/imagenet100/train.X2', '/kaggle/input/imagenet100/train.X3', '/kaggle/input/imagenet100/train.X4'])
val_df = create_dataframe_from_folders(['/kaggle/input/imagenet100/val.X'])
print('Number of Training Samples', len(train_df))
print('Number of unique clases: ', train_df['class'].nunique())
print()
print('Number of Validation Samples', len(val_df))

100%|██████████| 4/4 [01:33<00:00, 23.36s/it]
100%|██████████| 1/1 [00:01<00:00,  1.53s/it]

Number of Training Samples 130000
Number of unique clases:  100

Number of Validation Samples 5000
CPU times: user 950 ms, sys: 1.25 s, total: 2.2 s
Wall time: 1min 35s





In [5]:
train_df.head()

Unnamed: 0,image_path,class
0,/kaggle/input/imagenet100/train.X1/n01531178/n...,n01531178
1,/kaggle/input/imagenet100/train.X1/n01531178/n...,n01531178
2,/kaggle/input/imagenet100/train.X1/n01531178/n...,n01531178
3,/kaggle/input/imagenet100/train.X1/n01531178/n...,n01531178
4,/kaggle/input/imagenet100/train.X1/n01531178/n...,n01531178


In [6]:
val_df.head()

Unnamed: 0,image_path,class
0,/kaggle/input/imagenet100/val.X/n01531178/ILSV...,n01531178
1,/kaggle/input/imagenet100/val.X/n01531178/ILSV...,n01531178
2,/kaggle/input/imagenet100/val.X/n01531178/ILSV...,n01531178
3,/kaggle/input/imagenet100/val.X/n01531178/ILSV...,n01531178
4,/kaggle/input/imagenet100/val.X/n01531178/ILSV...,n01531178


In [7]:
class_to_label = {class_name: label for label, class_name in enumerate(train_df['class'].unique())}
label_to_class = {label: class_name for class_name, label in class_to_label.items()}

## Define Augmentations

In [8]:
hyp = {
    'image_size': 224,
    'batch_size': 128,
    'epochs': 15, 
    'arch': 'mobilenetv2_100',
    'num_classes': 100,
    'early_stopping_patience': 7,
    'device': torch.device("cuda" if torch.cuda.is_available() else "cpu")
}

In [9]:
# Define a set of augmentations for training
train_transform = A.Compose([
    A.Resize(224,224),  # Resize images to a common size
    A.HorizontalFlip(p=0.5),  # Apply horizontal flip with a probability of 0.5
    A.VerticalFlip(p=0.5),  # Apply vertical flip with a probability of 0.5
    A.RandomRotate90(p=0.5),  # Randomly rotate the image by 90 degrees
    A.RandomBrightnessContrast(p=0.2),  # Adjust brightness and contrast
    A.GaussNoise(p=0.2),  # Add random Gaussian noise
    A.Normalize(),  # Normalize pixel values to be in the range [0, 1]
    ToTensorV2(),  # Convert the image to a PyTorch tensor
])

# Define augmentations for validation (usually only basic augmentations without randomness)
val_transform = A.Compose([
    A.Resize(224,224),
    A.Normalize(),
    ToTensorV2(),
])

## Define PyTorch Dataset

In [10]:
class CustomImageNetDataset(torch.utils.data.Dataset):
    def __init__(self, df, transform=None):
        self.data = df
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = self.data.iloc[idx, 0]
        image = Image.open(img_name)
        image = image.convert('RGB')
        label = self.data.iloc[idx, 1]
        label = class_to_label[label]
        
        if self.transform:
            image = self.transform(image=np.array(image))['image']

        return image, label


## Define Model

In [11]:
import torch.nn.functional as F

class CustomModel(nn.Module):
    def __init__(self, model_name, num_classes, pretrained=True):
        super(CustomModel, self).__init__()
        # Load the base model
        self.base_model = timm.create_model(model_name, pretrained=pretrained, num_classes=0)
        
        # Modify the classification head
        in_features = self.base_model.num_features
        self.classifier = nn.Sequential(
            nn.Linear(in_features, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, num_classes),
            nn.Softmax(dim=1)  # Apply softmax activation along the class dimension
        )

    def forward(self, x):
        # Forward pass through the base model
        features = self.base_model(x)
        
        # Forward pass through the classifier
        output = self.classifier(features)
        
        return output

## Ready the data

In [12]:
train_dataset = CustomImageNetDataset(train_df, transform=train_transform)
val_dataset = CustomImageNetDataset(val_df, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=hyp['batch_size'], shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=hyp['batch_size'], shuffle=False, num_workers=4, pin_memory=True)

In [13]:
# Perform a sanity check on the data loaders
def check_data_loader(loader):
    for batch_idx, (images, labels) in enumerate(loader):
        print(f"Batch {batch_idx + 1}:")
        print("Image shape:", images.shape)
        print("Label shape:", labels.shape)
        print("\n")
        
        # Stop after printing a few batches
        if batch_idx == 2:
            break

# Perform sanity check on train_loader
print("Train DataLoader Sanity Check:")
check_data_loader(train_loader)

# Perform sanity check on val_loader
print("Validation DataLoader Sanity Check:")
check_data_loader(val_loader)


Train DataLoader Sanity Check:
Batch 1:
Image shape: torch.Size([128, 3, 224, 224])
Label shape: torch.Size([128])


Batch 2:
Image shape: torch.Size([128, 3, 224, 224])
Label shape: torch.Size([128])


Batch 3:
Image shape: torch.Size([128, 3, 224, 224])
Label shape: torch.Size([128])


Validation DataLoader Sanity Check:
Batch 1:
Image shape: torch.Size([128, 3, 224, 224])
Label shape: torch.Size([128])


Batch 2:
Image shape: torch.Size([128, 3, 224, 224])
Label shape: torch.Size([128])


Batch 3:
Image shape: torch.Size([128, 3, 224, 224])
Label shape: torch.Size([128])




## Train with PyTorch Lightning

In [14]:
class PLModel(pl.LightningModule):
    def __init__(self, name, num_classes, learning_rate=1e-3):
        super().__init__()
        self.name = name
        self.num_classes = num_classes
        self.learning_rate = learning_rate
        
        # Define your model
        self.model = CustomModel(name, num_classes) # You need to define your model here
        
        # Loss function
        self.loss_fn = nn.CrossEntropyLoss()
        
        # Metrics
        self.train_acc = Accuracy('multiclass', num_classes=num_classes)
        self.valid_acc = Accuracy('multiclass', num_classes=num_classes)
        
    def forward(self, x):
        return self.model(x)
    
    def configure_optimizers(self):
            optimizer = optim.Adam(self.parameters(), lr=self.learning_rate)
            scheduler = OneCycleLR(optimizer, max_lr=self.learning_rate, steps_per_epoch=len(train_loader), epochs=self.trainer.max_epochs)
            return {
                'optimizer': optimizer,
#                 'lr_scheduler': {
#                     'scheduler': scheduler,
#                     'interval': 'step'
#                 }
            }

    
    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.loss_fn(logits, y)
        self.log('train_loss', loss, on_epoch=True, prog_bar=True, logger=True)
        self.log('train_acc', self.train_acc(logits, y), on_epoch=True, logger=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.loss_fn(logits, y)
        self.log('val_loss', loss, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc', self.valid_acc(logits, y), on_epoch=True, logger=True)

In [15]:
# Define Early Stopping callback
early_stop_callback = EarlyStopping(
   monitor='val_loss',
   patience=hyp['early_stopping_patience'],
   verbose=True,
   mode='min'
)

# Define Model Checkpoint callback
checkpoint_callback = ModelCheckpoint(
    monitor='val_loss',
    dirpath='checkpoints/',
    filename=f"{hyp['arch']}_best_model",
    save_top_k=1,
    mode='min'
)

In [16]:
wandb.login()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [17]:
# Set up Weights & Biases logging
wandb_logger = pl.loggers.wandb.WandbLogger(project="rai_ablation_study", name=f"{hyp['arch']}_training")

In [18]:
# Initialize model
model = PLModel(hyp['arch'], num_classes=100)

model.safetensors:   0%|          | 0.00/14.2M [00:00<?, ?B/s]

In [19]:
# Initialize Lightning Trainer
trainer = pl.Trainer(
    max_epochs=hyp['epochs'], 
    precision='16-mixed', # Use mixed precision training
    callbacks=[early_stop_callback, checkpoint_callback],
    logger=wandb_logger
)

In [None]:
# Start training
trainer.fit(model, train_loader, val_loader)

[34m[1mwandb[0m: Currently logged in as: [33mlawjarp[0m. Use [1m`wandb login --relogin`[0m to force relogin


/opt/conda/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:653: Checkpoint directory /kaggle/working/checkpoints exists and is not empty.


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [None]:
csv_path = 'all_models_metrics_df.csv'
if not os.path.exists(csv_path):
    columns = ['model_name', 'accuracy_avg', 'precision_avg', 'recall_avg', 'f1_avg']
    for i in range(100):
        columns.append(f"{label_to_class[i]}_precision")
        columns.append(f"{label_to_class[i]}_recall")        
        columns.append(f"{label_to_class[i]}_f1")
    metrics_df = pd.DataFrame(columns=columns)
    metrics_df.to_csv(csv_path, index=False)

In [None]:
ls -la ./checkpoints

In [None]:
# rm all_models_metrics_df.csv

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, precision_recall_fscore_support
import matplotlib.pyplot as plt
import seaborn as sns

save_table_path = f"{hyp['arch']}_val.csv"
# model.load_state_dict(f"./checkpoints/{hyp['arch']}_best_model.ckpt")
# model = PLModel(hyp['arch'], num_classes=100)
model = PLModel.load_from_checkpoint(f"./checkpoints/{hyp['arch']}_best_model.ckpt", name=hyp['arch'], num_classes=100)


device = hyp['device']
model = model.to(device)
model.eval()

true_labels = []
pred_labels = []

with torch.no_grad():
    for images, labels in val_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        true_labels.extend(labels.cpu().numpy())
        pred_labels.extend(predicted.cpu().numpy())

cr = classification_report(true_labels, pred_labels, output_dict=True)

# Calculate confusion matrix
cm = confusion_matrix(true_labels, pred_labels)

class_labels = [label_to_class[label] for label in range(len(label_to_class))]
# Convert confusion matrix to DataFrame for visualization
cm_df = pd.DataFrame(cm, index=class_labels, columns=class_labels)
cm_df.to_csv(f"{hyp['arch']}_cf.csv", index=False)

# Get metrics from classification report
new_row = {}
new_row['model_name'] = hyp['arch']
new_row['accuracy_avg'] = cr['accuracy']
new_row['precision_avg'] = cr['macro avg']['precision']
new_row['recall_avg'] = cr['macro avg']['recall']
new_row['f1_avg'] = cr['macro avg']['f1-score']
for i in range(100):
    new_row[f"{label_to_class[i]}_precision"] = cr[str(i)]['precision']
    new_row[f"{label_to_class[i]}_recall"] = cr[str(i)]['recall']
    new_row[f"{label_to_class[i]}_f1"] = cr[str(i)]['f1-score']
    
df = pd.read_csv(csv_path)
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
df.to_csv(csv_path, index=False)

total_params = sum(p.numel() for p in model.parameters())

# Upload metrics and confusion matrix to wandb
table = wandb.Table(columns=["Model Name", "Model Params", "Accuracy", "Precision", "Recall", "F1 Score"])
table.add_data(hyp['arch'], total_params, new_row['accuracy_avg'], new_row['precision_avg'], new_row['recall_avg'], new_row['f1_avg'])

wandb.log({"Metrics": table,})

In [None]:
df

In [None]:
wandb.finish()