# Imports

In [None]:
import torchvision.models as models
import torch
from PIL import Image
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import torch.nn as nn
import numpy as np
import random
from sklearn.metrics import confusion_matrix, accuracy_score
import torch, random, numpy as np, os
import timm


# Helper Functions

In [8]:
def train_one_epoch(mod, train_loader, optimizer, device):
    mod.train()
    loss_fn = torch.nn.CrossEntropyLoss()

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = mod(images)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

def evaluate(mod, val_loader, device):
    mod.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = mod(images)

            preds = torch.argmax(outputs, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    return accuracy_score(all_labels, all_preds)


In [9]:
def seed_worker(worker_id):
    worker_seed = seed
    np.random.seed(worker_seed)
    random.seed(worker_seed)


# Base Model

In [None]:
os.environ["PYTHONHASHSEED"] = str(seed)
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

torch.use_deterministic_algorithms(True)


In [11]:
transform = transforms.Compose([
    transforms.Lambda(lambda img: img.convert("RGB")), # ensures each images has RGB components
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])


dataset = datasets.ImageFolder("data", transform = transform)


In [12]:
model = timm.create_model("efficientnet_b0", 
                            pretrained=True,
                            drop_rate=0.4,
                            drop_path_rate=0.1,
                            num_classes=2)
model.classifier = torch.nn.Linear(model.classifier.in_features, 2)
model.eval()

generator = torch.Generator().manual_seed(seed)

total_size = len(dataset)
train_size = int(0.8 * total_size)
val_size = total_size - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size], generator = generator)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False, num_workers=0, worker_init_fn=seed_worker)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True, generator = torch.Generator().manual_seed(seed), num_workers=0, worker_init_fn=seed_worker)



In [13]:
for m in model.modules():
    if m.__class__.__name__ == "Dropout":
        m.p = 0.0
    if m.__class__.__name__ == "DropPath":
        m.drop_prob = 0.0

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)

device = "cuda" if torch.cuda.is_available() else "cpu"

train_one_epoch(model, train_loader, optimizer, device)

accuracy = evaluate(model, val_loader, device)

print(accuracy)

0.6907216494845361


# Drop Rate and Drop Path Rate Tuning

In [18]:
def rates(drop_rate, drop_path_rate, seed = 42):
    os.environ["PYTHONHASHSEED"] = str(seed)
    os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(True)

    generator = torch.Generator().manual_seed(seed)
    total_size = len(dataset)
    train_size = int(0.8 * total_size)
    val_size = total_size - train_size

    train_dataset, val_dataset = random_split(dataset, [train_size, val_size], generator = generator)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False, num_workers=0, worker_init_fn=seed_worker)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True, generator = torch.Generator().manual_seed(seed), num_workers=0, worker_init_fn=seed_worker)

    model = timm.create_model("efficientnet_b0", 
                            pretrained=True,
                            drop_rate=drop_rate,
                            drop_path_rate=drop_path_rate,
                            num_classes=2)
    model.classifier = torch.nn.Linear(model.classifier.in_features, 2)

    for m in model.modules():
        if m.__class__.__name__ == "Dropout":
            m.p = 0.0
        if m.__class__.__name__ == "DropPath":
            m.drop_prob = 0.0

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)

    train_one_epoch(model, train_loader, optimizer, device)

    acc = evaluate(model, val_loader, device)
    return acc




In [17]:
drop_rates = [0.0, 0.2, 0.3, 0.4]
drop_path_rates = [0.0, 0.1, 0.2]
results = {}

for dr in drop_rates:
    for dpr in drop_path_rates:
        acc = rates(dr, dpr)
        results[(dr, dpr)] = acc
        print(f"DR: {dr} DPR: {dpr} Accuracy: {acc:.4f}")

DR: 0.0 DPR: 0.0 Accuracy: 0.7216
DR: 0.0 DPR: 0.1 Accuracy: 0.7216
DR: 0.0 DPR: 0.2 Accuracy: 0.7216
DR: 0.2 DPR: 0.0 Accuracy: 0.7113
DR: 0.2 DPR: 0.1 Accuracy: 0.7113
DR: 0.2 DPR: 0.2 Accuracy: 0.7113
DR: 0.3 DPR: 0.0 Accuracy: 0.6907
DR: 0.3 DPR: 0.1 Accuracy: 0.6907
DR: 0.3 DPR: 0.2 Accuracy: 0.6907
DR: 0.4 DPR: 0.0 Accuracy: 0.6907
DR: 0.4 DPR: 0.1 Accuracy: 0.6907
DR: 0.4 DPR: 0.2 Accuracy: 0.6907


# Learning Rate and Weight Decay

In [22]:
def lr(learning_rate, weight_decay, seed = 42):
    os.environ["PYTHONHASHSEED"] = str(seed)
    os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(True)

    generator = torch.Generator().manual_seed(seed)
    total_size = len(dataset)
    train_size = int(0.8 * total_size)
    val_size = total_size - train_size

    train_dataset, val_dataset = random_split(dataset, [train_size, val_size], generator = generator)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False, num_workers=0, worker_init_fn=seed_worker)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True, generator = torch.Generator().manual_seed(seed), num_workers=0, worker_init_fn=seed_worker)

    model = timm.create_model("efficientnet_b0", 
                            pretrained=True,
                            drop_rate=0,
                            drop_path_rate=0,
                            num_classes=2)
    model.classifier = torch.nn.Linear(model.classifier.in_features, 2)

    for m in model.modules():
        if m.__class__.__name__ == "Dropout":
            m.p = 0.0
        if m.__class__.__name__ == "DropPath":
            m.drop_prob = 0.0

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    train_one_epoch(model, train_loader, optimizer, device)

    acc = evaluate(model, val_loader, device)
    return acc




In [23]:
lrs = [1e-5, 3e-5, 1e-4, 3e-4]
weight_decays = [0, 1e-5, 1e-4, 3e-4]
results = {}

for learning_rate in lrs:
    for wd in weight_decays:
        acc = lr(learning_rate, wd)
        results[(learning_rate, wd)] = acc
        print(f"LR: {learning_rate} WD: {wd} Accuracy: {acc:.4f}")

LR: 1e-05 WD: 0 Accuracy: 0.5258
LR: 1e-05 WD: 1e-05 Accuracy: 0.5258
LR: 1e-05 WD: 0.0001 Accuracy: 0.5258
LR: 1e-05 WD: 0.0003 Accuracy: 0.5258
LR: 3e-05 WD: 0 Accuracy: 0.6082
LR: 3e-05 WD: 1e-05 Accuracy: 0.6082
LR: 3e-05 WD: 0.0001 Accuracy: 0.6082
LR: 3e-05 WD: 0.0003 Accuracy: 0.6082
LR: 0.0001 WD: 0 Accuracy: 0.7216
LR: 0.0001 WD: 1e-05 Accuracy: 0.7216
LR: 0.0001 WD: 0.0001 Accuracy: 0.7216
LR: 0.0001 WD: 0.0003 Accuracy: 0.7216
LR: 0.0003 WD: 0 Accuracy: 0.7113
LR: 0.0003 WD: 1e-05 Accuracy: 0.7113
LR: 0.0003 WD: 0.0001 Accuracy: 0.7113
LR: 0.0003 WD: 0.0003 Accuracy: 0.7113


# Batch Size

In [28]:
def batch(batch_size, seed = 42):
    os.environ["PYTHONHASHSEED"] = str(seed)
    os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(True)

    generator = torch.Generator().manual_seed(seed)
    total_size = len(dataset)
    train_size = int(0.8 * total_size)
    val_size = total_size - train_size

    train_dataset, val_dataset = random_split(dataset, [train_size, val_size], generator = generator)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=0, worker_init_fn=seed_worker)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, generator = torch.Generator().manual_seed(seed), num_workers=0, worker_init_fn=seed_worker)

    model = timm.create_model("efficientnet_b0", 
                            pretrained=True,
                            drop_rate=0,
                            drop_path_rate=0,
                            num_classes=2)
    model.classifier = torch.nn.Linear(model.classifier.in_features, 2)

    for m in model.modules():
        if m.__class__.__name__ == "Dropout":
            m.p = 0.0
        if m.__class__.__name__ == "DropPath":
            m.drop_prob = 0.0

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)

    train_one_epoch(model, train_loader, optimizer, device)

    acc = evaluate(model, val_loader, device)
    return acc




In [29]:
batch_sizes = [16, 32, 64]
results = {}

for batch_size in batch_sizes:
    acc = batch(batch_size)
    results[batch_size] = acc
    print(f"Batch Size: {batch_size} Accuracy: {acc:.4f}")

Batch Size: 16 Accuracy: 0.7320
Batch Size: 32 Accuracy: 0.7216
Batch Size: 64 Accuracy: 0.6701


# Learning Rate Schedulers

In [37]:
# Cosine Annealing
def cosine_annealing(seed=42):
    
    os.environ["PYTHONHASHSEED"] = str(seed)
    os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(True)

    generator = torch.Generator().manual_seed(seed)
    total_size = len(dataset)
    train_size = int(0.8 * total_size)
    val_size = total_size - train_size

    train_dataset, val_dataset = random_split(dataset, [train_size, val_size], generator = generator)
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=False, num_workers=0, worker_init_fn=seed_worker)
    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=True, generator = torch.Generator().manual_seed(seed), num_workers=0, worker_init_fn=seed_worker)

    model = timm.create_model("efficientnet_b0", 
                            pretrained=True,
                            drop_rate=0,
                            drop_path_rate=0,
                            num_classes=2)
    model.classifier = torch.nn.Linear(model.classifier.in_features, 2)

    for m in model.modules():
        if m.__class__.__name__ == "Dropout":
            m.p = 0.0
        if m.__class__.__name__ == "DropPath":
            m.drop_prob = 0.0

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=1)

    train_one_epoch(model, train_loader, optimizer, device)
    scheduler.step()

    acc = evaluate(model, val_loader, device)

    return acc

result = cosine_annealing()
print(result)


0.7319587628865979


In [38]:
# Cosine Annealing Warm Restarts
def cosine_annealing_wr(seed=42):
    
    os.environ["PYTHONHASHSEED"] = str(seed)
    os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(True)

    generator = torch.Generator().manual_seed(seed)
    total_size = len(dataset)
    train_size = int(0.8 * total_size)
    val_size = total_size - train_size

    train_dataset, val_dataset = random_split(dataset, [train_size, val_size], generator = generator)
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=False, num_workers=0, worker_init_fn=seed_worker)
    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=True, generator = torch.Generator().manual_seed(seed), num_workers=0, worker_init_fn=seed_worker)

    model = timm.create_model("efficientnet_b0", 
                            pretrained=True,
                            drop_rate=0,
                            drop_path_rate=0,
                            num_classes=2)
    model.classifier = torch.nn.Linear(model.classifier.in_features, 2)

    for m in model.modules():
        if m.__class__.__name__ == "Dropout":
            m.p = 0.0
        if m.__class__.__name__ == "DropPath":
            m.drop_prob = 0.0

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, 
        T_0=1,
        T_mult=1)

    train_one_epoch(model, train_loader, optimizer, device)
    scheduler.step()

    acc = evaluate(model, val_loader, device)

    return acc

result = cosine_annealing_wr()
print(result)


0.7319587628865979


# Data Augmentation

In [42]:
transform = transforms.Compose([
    transforms.Lambda(lambda img: img.convert("RGB")), # ensures each images has RGB components
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),

    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(
        brightness=0.2, contrast=0.2, saturation=0.2
    ),
    transforms.ToTensor()
])


dataset = datasets.ImageFolder("data", transform = transform)


In [None]:
def best_model(seed=42):
    os.environ["PYTHONHASHSEED"] = str(seed)
    os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(True)

    generator = torch.Generator().manual_seed(seed)
    total_size = len(dataset)
    train_size = int(0.8 * total_size)
    val_size = total_size - train_size

    train_dataset, val_dataset = random_split(dataset, [train_size, val_size], generator = generator)
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=False, num_workers=0, worker_init_fn=seed_worker)
    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=True, generator = torch.Generator().manual_seed(seed), num_workers=0, worker_init_fn=seed_worker)

    model = timm.create_model("efficientnet_b0", 
                            pretrained=True,
                            drop_rate=0,
                            drop_path_rate=0,
                            num_classes=2)
    model.classifier = torch.nn.Linear(model.classifier.in_features, 2)

    for m in model.modules():
        if m.__class__.__name__ == "Dropout":
            m.p = 0.0
        if m.__class__.__name__ == "DropPath":
            m.drop_prob = 0.0

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)

    train_one_epoch(model, train_loader, optimizer, device)

    acc = evaluate(model, val_loader, device)
    return acc

result = best_model()
print(result)


0.7319587628865979


# Final Paramters
- Drop Rate: 0
- Drop Path Rate: 0
- Learning Rate: 1e-4
- Batch Size: 16
- Learning Rate Scheduler: Doesn't need one
- Data Augmentation: Doesn't need one