In [None]:
%pip install torch torchvision peft tqdm

In [29]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
from peft import get_peft_model, LoraConfig
from sklearn.model_selection import ParameterGrid
from tqdm.notebook import tqdm


In [30]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [None]:
# Load CIFAR-10 dataset
full_train_dataset = datasets.CIFAR10(root='./data', train=True,
                                      download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False,
                                download=True, transform=transform)

# Split training data into training and validation sets
train_size = int(0.8 * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

In [36]:
# Early Stopping Class
class EarlyStopping:
    def __init__(self, patience=5):
        self.patience = patience
        self.counter = 0
        self.best_acc = None
        self.early_stop = False

    def __call__(self, val_acc, model, save_path):
        if self.best_acc is None:
            self.best_acc = val_acc
            self.save_checkpoint(model, save_path)
        elif self.best_acc < val_acc:
            self.best_acc = val_acc
            self.counter = 0
            self.save_checkpoint(model, save_path)
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

    def save_checkpoint(self, model, save_path):
        torch.save(model.state_dict(), save_path)

In [33]:
# Training function with early stopping
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for inputs, labels in tqdm(loader, desc="Training", leave=False):
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    return running_loss / len(loader)

In [34]:
# Evaluation function
def evaluate(model, loader, device, criterion):
    model.eval()
    correct = 0
    total = 0
    running_loss = 0.0
    with torch.no_grad():
        for inputs, labels in tqdm(loader, desc="Evaluating", leave=False):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return running_loss / len(loader), correct / total

In [None]:
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters: {total_params}")
print(f"Trainable parameters: {trainable_params}")

# Freeze original model parameters
for param in model.parameters():
    param.requires_grad = False

# Modify the final layer for CIFAR-10
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)

# Configure LoRA using PEFT with specified target modules
lora_config = LoraConfig(
    lora_alpha=0.1,
    lora_dropout=0.1,
    r=16,
    target_modules=["layer1.0.conv1", "layer1.0.conv2",
                    "layer1.1.conv1", "layer1.1.conv2", 
                    "layer2.0.conv1", "layer2.0.conv2", 
                    "layer2.1.conv1", "layer2.1.conv2", 
                    "layer3.0.conv1", "layer2.0.conv2", 
                    "layer3.1.conv1", "layer3.1.conv2", 
                    "layer4.0.conv1", "layer2.0.conv2", 
                    "layer4.1.conv1", "layer4.1.conv2", 
                    "fc"]
)

model = get_peft_model(model, lora_config)
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters: {total_params}")
print(f"Trainable parameters: {trainable_params}")

In [None]:
# Hyperparameter tuning
param_grid = {
    'lr': [0.0001],
    'lora_r': [14],
    'lora_alpha': [16]
}

best_acc = 0
best_params = {}
best_model_path = 'best_model.pth'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

for params in ParameterGrid(param_grid):
    print(f"Testing parameters: {params}")
    
    # Update data loaders with new batch size
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)
    
    # Initialize pre-trained ResNet-18 model
    model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
    
    # Freeze original model parameters
    for param in model.parameters():
        param.requires_grad = False
    
    # Modify the final layer for CIFAR-10
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 10)
    
    # Configure LoRA using PEFT with specified target modules
    lora_config = LoraConfig(
        lora_alpha=params['lora_alpha'],
        lora_dropout=0.1,
        r=params['lora_r'],
        target_modules=["layer1.0.conv1", "layer1.0.conv2",
                        "layer1.1.conv1", "layer1.1.conv2", 
                        "layer2.0.conv1", "layer2.0.conv2", 
                        "layer2.1.conv1", "layer2.1.conv2", 
                        "layer3.0.conv1", "layer2.0.conv2", 
                        "layer3.1.conv1", "layer3.1.conv2", 
                        "layer4.0.conv1", "layer2.0.conv2", 
                        "layer4.1.conv1", "layer4.1.conv2", 
                        "fc"]
    )
    
    model = get_peft_model(model, lora_config)
    
    model.to(device)
    
    # Define loss function and optimizer with new learning rate
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=params['lr'])
    
    # Initialize Early Stopping
    early_stopping = EarlyStopping(patience=7)
    
    # Training loop with epochs
    num_epochs = 100
    for epoch in range(num_epochs):

        train_loss = train_epoch(model, train_loader, criterion, optimizer, device)
        val_loss, val_acc = evaluate(model, val_loader, device, criterion)
        print(f"Epoch {epoch+1}/{num_epochs} - Training Loss: {train_loss:.4f} - Validation Loss: {val_loss:.4f} - Validation Accuracy: {val_acc:.4f}")
        
        # Early Stopping check
        early_stopping(val_acc, model, 'best_model.pth')
        if early_stopping.early_stop:
            print("Early stopping triggered.")
            break
    
    # Load the best model for this hyperparameter configuration
    model.load_state_dict(torch.load('best_model.pth'))
    
    # Validate the best model
    _, best_val_acc = evaluate(model, val_loader, device, criterion)
    print(f"Best Validation Accuracy for this configuration: {best_val_acc:.4f}")
    
    if best_val_acc > best_acc:
        best_acc = best_val_acc
        best_params = params.copy()
        # Save the best model across all configurations
        torch.save(model.state_dict(), 'best_model_overall.pth')
    
    print("-" * 50)

In [None]:
print(f'Best Validation Accuracy: {best_acc}')
print(f'Best Parameters: {best_params}')

In [None]:
# Load the best overall model
model = models.resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)

lora_config = LoraConfig(
    lora_alpha=best_params['lora_alpha'],
    lora_dropout=0.1,
    r=best_params['lora_r'],
    target_modules=["layer1.0.conv1", "layer1.0.conv2",
                    "layer1.1.conv1", "layer1.1.conv2", 
                    "layer2.0.conv1", "layer2.0.conv2", 
                    "layer2.1.conv1", "layer2.1.conv2", 
                    "layer3.0.conv1", "layer2.0.conv2", 
                    "layer3.1.conv1", "layer3.1.conv2", 
                    "layer4.0.conv1", "layer2.0.conv2", 
                    "layer4.1.conv1", "layer4.1.conv2", 
                    "fc"]
)
model = get_peft_model(model, lora_config)
model.load_state_dict(torch.load('best_model_overall.pth'))
model.to(device)

In [None]:
# Final evaluation on the test set
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)
test_loss, test_acc = evaluate(model, test_loader, device, nn.CrossEntropyLoss())
print(f'Final Test Accuracy: {test_acc:.4f}')

In [None]:
# Update data loaders with new batch size
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)

# Initialize pre-trained ResNet-18 model
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

# Modify the final layer for CIFAR-10
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)

model.to(device)

# Define loss function and optimizer with new learning rate
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=best_params['lr'])

# Initialize Early Stopping
early_stopping = EarlyStopping(patience=7)

# Training loop with epochs
num_epochs = 100
for epoch in range(num_epochs):

    train_loss = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = evaluate(model, val_loader, device, criterion)
    print(f"Epoch {epoch+1}/{num_epochs} - Training Loss: {train_loss:.4f} - Validation Loss: {val_loss:.4f} - Validation Accuracy: {val_acc:.4f}")
    
    # Early Stopping check
    early_stopping(val_acc, model, 'best_model.pth')
    if early_stopping.early_stop:
        print("Early stopping triggered.")
        break

# Load the best model for this hyperparameter configuration
model.load_state_dict(torch.load('best_model.pth'))

# Final evaluation on the test set
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)
test_loss, test_acc = evaluate(model, test_loader, device, nn.CrossEntropyLoss())
print(f'Final Test Accuracy: {test_acc:.4f}')