<a href="https://colab.research.google.com/github/Pavitra-khare/DA6401_ASS_2B/blob/main/DL_ASS_2B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install wandb



In [None]:
# import wandb
# wandb.login(key='c118c1c1779ae69dc5db59f0ff99465d34292bfe')
!wandb login

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33m3628-pavitrakhare[0m ([33m3628-pavitrakhare-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms
from torch.utils.data import DataLoader, SubsetRandomSampler
from sklearn.model_selection import train_test_split
from torchvision import models
import wandb

# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
from torchvision.models import resnet50, ResNet50_Weights

def pretrain_model(model_name, strategy):
    """
    Initialize a model with different pretrained layer freezing strategies:
    - strategy='freeze_all_except_last': Only the final FC layer is trainable
    - strategy='freeze_80_percent': Freeze 80% of the initial layers
    - strategy='freeze_fc_only': Only freeze the FC layer
    - strategy='train_from_scratch': Use untrained weights
    """
    def load_model(name, use_pretrained):
        if name == 'resnet':
            model_weights = ResNet50_Weights.DEFAULT if use_pretrained else None
            return resnet50(weights=model_weights)
        else:
            raise ValueError(f"Unsupported model: {name}")

    # Decide on using pretrained weights
    use_pretrained = (strategy != 'train_from_scratch')
    pretrained_model = load_model(model_name, use_pretrained)

    # Update the output layer for 10 classes
    num_features = pretrained_model.fc.in_features
    pretrained_model.fc = nn.Linear(num_features, 10)

    # Freezing strategies
    def freeze_except_fc(model):
        for name, param in model.named_parameters():
            if not name.startswith("fc"):
                param.requires_grad = False

    def freeze_fc_layer(model):
        for name, param in model.named_parameters():
            if name.startswith("fc"):
                param.requires_grad = False

    def freeze_initial_80_percent(model):
        children = list(model.named_children())
        freeze_upto = int(len(children) * 0.8)
        for idx, (_, module) in enumerate(children):
            if idx < freeze_upto:
                for param in module.parameters():
                    param.requires_grad = False

    if strategy == 'freeze_all_except_last':
        freeze_except_fc(pretrained_model)
    elif strategy == 'freeze_80_percent':
        freeze_initial_80_percent(pretrained_model)
    elif strategy == 'freeze_fc_only':
        freeze_fc_layer(pretrained_model)
    # No action for 'train_from_scratch' since all layers are already trainable

    return pretrained_model


In [None]:
def data_load(data_dir, data_augumentation):
    """
    Load training and validation data from a directory with optional augmentation.
    Splits data into 80% training and 20% validation.
    """

    def get_transform(augment):
        if augment == 'Yes':
            return transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ColorJitter(0.2, 0.2, 0.2, 0.1),
                transforms.RandomRotation(degrees=20),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
            ])
        return transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
        ])

    # Apply transformations
    transform_pipeline = get_transform(data_augumentation)

    # Prepare dataset and split
    complete_dataset = ImageFolder(root=data_dir, transform=transform_pipeline)
    dataset_size = len(complete_dataset)
    indices = list(range(dataset_size))
    train_indices, val_indices = train_test_split(indices, test_size=0.2, random_state=42)

    # Data samplers for randomized loading
    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)

    # Initialize DataLoaders
    train_loader = DataLoader(
        complete_dataset,
        batch_size=32,
        sampler=train_sampler,
        num_workers=4,
        pin_memory=True
    )

    val_loader = DataLoader(
        complete_dataset,
        batch_size=32,
        sampler=val_sampler,
        num_workers=4,
        pin_memory=True
    )

    return train_loader, val_loader


In [None]:
def train_on_train_data(model, train_data):
    """
    Trains the model on the provided training data for one epoch.
    Returns average loss and training accuracy.
    """
    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    model.train()

    total_loss = 0.0
    correct_preds = 0
    total_samples = 0

    for batch_inputs, batch_labels in train_data:
        batch_inputs, batch_labels = batch_inputs.to(device), batch_labels.to(device)

        optimizer.zero_grad()
        logits = model(batch_inputs)
        loss = loss_fn(logits, batch_labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        predictions = torch.argmax(logits, dim=1)
        correct_preds += (predictions == batch_labels).sum().item()
        total_samples += batch_labels.size(0)

    epoch_loss = total_loss / len(train_data)
    accuracy = (correct_preds / total_samples) * 100

    return epoch_loss, accuracy


In [None]:
def test_on_valid_data(model, test_data):
    """
    Evaluates the model on the provided validation/test dataset.
    Returns the accuracy percentage.
    """
    model.eval()
    total_correct = 0
    total_samples = 0

    with torch.no_grad():
        for batch_x, batch_y in test_data:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            predictions = model(batch_x)
            predicted_labels = torch.argmax(predictions, dim=1)
            total_correct += (predicted_labels == batch_y).sum().item()
            total_samples += batch_y.size(0)

    accuracy_percent = (total_correct / total_samples) * 100
    return accuracy_percent


In [None]:
def test_data_load(test_dir, data_augumentation='No'):
    """
    Loads the test dataset with optional augmentation and returns a DataLoader.
    """
    # Choose transformation pipeline based on augmentation flag
    if data_augumentation == 'Yes':
        augmentation_pipeline = transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
            transforms.RandomRotation(20),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
        ])
    else:
        augmentation_pipeline = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
        ])

    # Apply transformations and load dataset
    dataset = ImageFolder(root=test_dir, transform=augmentation_pipeline)
    loader = DataLoader(
        dataset,
        batch_size=32,
        shuffle=False,
        num_workers=4,
        pin_memory=True
    )
    return loader


In [None]:
def model_train(model, train_data, val_data, test_data, epochs):
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    best_val_acc = 0
    patience = 2
    wait = 0
    best_model_state = None

    for epoch in range(epochs):
        train_loss, train_accuracy = train_on_train_data(model, train_data)

        print(f'Epoch {epoch + 1}/{epochs} | Training Loss: {train_loss:.4f} | Training Accuracy: {train_accuracy:.2f}%')
        wandb.log({'Train loss': train_loss, 'Train accuracy': train_accuracy})

        val_accuracy = test_on_valid_data(model, val_data)
        print(f'Epoch {epoch + 1}/{epochs} | Validation Accuracy: {val_accuracy:.2f}%')
        wandb.log({'val_accuracy': val_accuracy, 'epoch': epoch})

        if val_accuracy > best_val_acc:
            best_val_acc = val_accuracy
            wait = 0
            best_model_state = model.state_dict()  # Save best model state
        else:
            wait += 1
            if wait >= patience:
                print(f"Early stopping at epoch {epoch + 1}")
                break

    # Restore best model before testing
    if best_model_state:
        model.load_state_dict(best_model_state)

    # Final evaluation on test set
    test_accuracy = test_on_valid_data(model, test_data)
    print(f'Test Accuracy: {test_accuracy:.2f}%')
    wandb.log({'test_accuracy': test_accuracy})

    print('Model training Completed.')
    return model


In [None]:
# Update the sweep configuration to include the strategies
sweep_config = {
    'method': 'grid',  # Changed to grid to ensure all strategies are tested
    'metric': {
        'name': 'val_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'model': {
            'values': ['resnet']
        },
        'strategy': {
            'values': [
                'freeze_all_except_last',  # Strategy 1
                'freeze_fc_only',          # Strategy 3
                'freeze_80_percent',       # Strategy 2
                'train_from_scratch'       # Strategy 4
            ]
        },
        'epoch': {
            'values': [10]
        }


    }
}

sweep_id = wandb.sweep(sweep_config, project='DL_ASS2_Prac')

Create sweep with ID: 5lto2ws8
Sweep URL: https://wandb.ai/3628-pavitrakhare-indian-institute-of-technology-madras/DL_ASS2_Prac/sweeps/5lto2ws8


In [None]:
def main():
    with wandb.init() as run:
        # Dynamically create a run name based on current configuration
        config = wandb.config
        run_name_parts = [
            f"ep{config.epoch}",
            f"strategy-{config.strategy}",
            f"model-{config.model}"
        ]
        wandb.run.name = "_".join(run_name_parts)

        # Set up the model with the specified training strategy
        selected_model = pretrain_model(model_name=config.model, strategy=config.strategy)
        selected_model = selected_model.to(device)

        # Prepare training and validation datasets
        training_path = '/kaggle/input/my-dataset/inaturalist_12K/train'
        train_data, val_data = data_load(training_path, data_augumentation='No')

        # Prepare test dataset
        testing_path = '/kaggle/input/my-dataset/inaturalist_12K/val'
        test_data = test_data_load(testing_path, data_augumentation='No')

        # Begin training and evaluate on test data
        model_train(selected_model, train_data, val_data, test_data, epochs=config.epoch)

wandb.agent(sweep_id, function=main, count=4)  # Execute with all 4 strategies
wandb.finish()


[34m[1mwandb[0m: Agent Starting Run: goa87lu1 with config:
[34m[1mwandb[0m: 	epoch: 10
[34m[1mwandb[0m: 	model: resnet
[34m[1mwandb[0m: 	strategy: freeze_all_except_last
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 220MB/s]


Epoch 1/10 | Training Loss: 1.2331 | Training Accuracy: 65.45%
Epoch 1/10 | Validation Accuracy: 74.75%
Epoch 2/10 | Training Loss: 0.7629 | Training Accuracy: 77.12%
Epoch 2/10 | Validation Accuracy: 78.50%
Epoch 3/10 | Training Loss: 0.6441 | Training Accuracy: 80.45%
Epoch 3/10 | Validation Accuracy: 78.85%
Epoch 4/10 | Training Loss: 0.5738 | Training Accuracy: 82.02%
Epoch 4/10 | Validation Accuracy: 80.40%
Epoch 5/10 | Training Loss: 0.5385 | Training Accuracy: 82.41%
Epoch 5/10 | Validation Accuracy: 79.60%
Epoch 6/10 | Training Loss: 0.4933 | Training Accuracy: 84.37%
Epoch 6/10 | Validation Accuracy: 80.55%
Epoch 7/10 | Training Loss: 0.4627 | Training Accuracy: 85.37%
Epoch 7/10 | Validation Accuracy: 80.85%
Epoch 8/10 | Training Loss: 0.4398 | Training Accuracy: 85.99%
Epoch 8/10 | Validation Accuracy: 80.75%
Epoch 9/10 | Training Loss: 0.4204 | Training Accuracy: 86.89%
Epoch 9/10 | Validation Accuracy: 80.65%
Early stopping at epoch 9
Test Accuracy: 81.20%
Model training C

0,1
Train accuracy,▁▅▆▆▇▇███
Train loss,█▄▃▂▂▂▁▁▁
epoch,▁▂▃▄▅▅▆▇█
test_accuracy,▁
val_accuracy,▁▅▆▇▇████

0,1
Train accuracy,86.88586
Train loss,0.4204
epoch,8.0
test_accuracy,81.2
val_accuracy,80.65


[34m[1mwandb[0m: Agent Starting Run: 4duvpe09 with config:
[34m[1mwandb[0m: 	epoch: 10
[34m[1mwandb[0m: 	model: resnet
[34m[1mwandb[0m: 	strategy: freeze_fc_only


Epoch 1/10 | Training Loss: 1.4916 | Training Accuracy: 49.69%
Epoch 1/10 | Validation Accuracy: 48.10%
Epoch 2/10 | Training Loss: 1.2020 | Training Accuracy: 58.99%
Epoch 2/10 | Validation Accuracy: 53.75%
Epoch 3/10 | Training Loss: 0.9671 | Training Accuracy: 67.65%
Epoch 3/10 | Validation Accuracy: 58.00%
Epoch 4/10 | Training Loss: 0.7323 | Training Accuracy: 75.75%
Epoch 4/10 | Validation Accuracy: 53.95%
Epoch 5/10 | Training Loss: 0.5838 | Training Accuracy: 80.57%
Epoch 5/10 | Validation Accuracy: 56.45%
Early stopping at epoch 5
Test Accuracy: 56.75%
Model training Completed.


0,1
Train accuracy,▁▃▅▇█
Train loss,█▆▄▂▁
epoch,▁▃▅▆█
test_accuracy,▁
val_accuracy,▁▅█▅▇

0,1
Train accuracy,80.57257
Train loss,0.58377
epoch,4.0
test_accuracy,56.75
val_accuracy,56.45


[34m[1mwandb[0m: Agent Starting Run: wx7637pk with config:
[34m[1mwandb[0m: 	epoch: 10
[34m[1mwandb[0m: 	model: resnet
[34m[1mwandb[0m: 	strategy: freeze_80_percent


Epoch 1/10 | Training Loss: 1.2355 | Training Accuracy: 64.16%
Epoch 1/10 | Validation Accuracy: 75.45%
Epoch 2/10 | Training Loss: 0.7560 | Training Accuracy: 77.92%
Epoch 2/10 | Validation Accuracy: 79.35%
Epoch 3/10 | Training Loss: 0.6368 | Training Accuracy: 80.45%
Epoch 3/10 | Validation Accuracy: 80.00%
Epoch 4/10 | Training Loss: 0.5779 | Training Accuracy: 82.26%
Epoch 4/10 | Validation Accuracy: 80.40%
Epoch 5/10 | Training Loss: 0.5245 | Training Accuracy: 83.52%
Epoch 5/10 | Validation Accuracy: 79.40%
Epoch 6/10 | Training Loss: 0.4935 | Training Accuracy: 84.66%
Epoch 6/10 | Validation Accuracy: 80.75%
Epoch 7/10 | Training Loss: 0.4638 | Training Accuracy: 85.34%
Epoch 7/10 | Validation Accuracy: 80.90%
Epoch 8/10 | Training Loss: 0.4412 | Training Accuracy: 86.25%
Epoch 8/10 | Validation Accuracy: 80.20%
Epoch 9/10 | Training Loss: 0.4213 | Training Accuracy: 87.09%
Epoch 9/10 | Validation Accuracy: 80.05%
Early stopping at epoch 9
Test Accuracy: 81.35%
Model training C

0,1
Train accuracy,▁▅▆▇▇▇▇██
Train loss,█▄▃▂▂▂▁▁▁
epoch,▁▂▃▄▅▅▆▇█
test_accuracy,▁
val_accuracy,▁▆▇▇▆██▇▇

0,1
Train accuracy,87.08589
Train loss,0.42128
epoch,8.0
test_accuracy,81.35
val_accuracy,80.05


[34m[1mwandb[0m: Agent Starting Run: dee4k05z with config:
[34m[1mwandb[0m: 	epoch: 10
[34m[1mwandb[0m: 	model: resnet
[34m[1mwandb[0m: 	strategy: train_from_scratch


Epoch 1/10 | Training Loss: 2.3077 | Training Accuracy: 17.54%
Epoch 1/10 | Validation Accuracy: 19.35%
Epoch 2/10 | Training Loss: 2.2162 | Training Accuracy: 19.74%
Epoch 2/10 | Validation Accuracy: 21.40%
Epoch 3/10 | Training Loss: 2.1737 | Training Accuracy: 20.40%
Epoch 3/10 | Validation Accuracy: 20.80%
Epoch 4/10 | Training Loss: 2.1385 | Training Accuracy: 21.77%
Epoch 4/10 | Validation Accuracy: 18.45%
Early stopping at epoch 4
Test Accuracy: 19.45%
Model training Completed.


0,1
Train accuracy,▁▅▆█
Train loss,█▄▂▁
epoch,▁▃▆█
test_accuracy,▁
val_accuracy,▃█▇▁

0,1
Train accuracy,21.76522
Train loss,2.13853
epoch,3.0
test_accuracy,19.45
val_accuracy,18.45
