In [1]:
# Essential imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os
from tqdm import tqdm

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
# split dataset (one time operation)
import os
import shutil
from sklearn.model_selection import train_test_split

# Define the dataset path
dataset_path = '../data/animals_dataset/raw-img/'
output_path = '../data/animals_dataset_reduced/'

# Define the percentage of images to keep
sample_ratio = 0.05

# Define split ratios for the reduced dataset
train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1

# Create output directories
for split in ['train', 'val', 'test']:
    split_path = os.path.join(output_path, split)
    os.makedirs(split_path, exist_ok=True)

# Process each class folder
for class_name in os.listdir(dataset_path):
    class_folder = os.path.join(dataset_path, class_name)
    if os.path.isdir(class_folder):
        # Get all image file paths for the class
        image_files = [os.path.join(class_folder, img) for img in os.listdir(class_folder) if os.path.isfile(os.path.join(class_folder, img))]
        
        # Randomly sample 25% of the images
        sampled_files, _ = train_test_split(image_files, test_size=(1 - sample_ratio), random_state=42)
        
        # Split the sampled files into train, val, and test
        train_files, temp_files = train_test_split(sampled_files, test_size=(1 - train_ratio), random_state=42)
        val_files, test_files = train_test_split(temp_files, test_size=(test_ratio / (val_ratio + test_ratio)), random_state=42)
        
        # Copy files to respective directories
        for split, files in zip(['train', 'val', 'test'], [train_files, val_files, test_files]):
            split_class_folder = os.path.join(output_path, split, class_name)
            os.makedirs(split_class_folder, exist_ok=True)
            for file in files:
                shutil.copy(file, os.path.join(split_class_folder, os.path.basename(file)))

print("Reduced dataset split completed!")

Reduced dataset split completed!


# Baseline Implementation

In [4]:
# Data pre-processing and augmentation
from torchvision import transforms

# Basic transforms for baseline (minimal augmentation)
baseline_transform_train = transforms.Compose([
    transforms.Resize((224, 224)),  # Standard ImageNet size
    transforms.RandomHorizontalFlip(),  # Data augmentation
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])  # ImageNet stats
])

baseline_transform_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])
])

class AnimalDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        
        # Load image paths and labels
        for label, class_name in enumerate(os.listdir(root_dir)):
            class_folder = os.path.join(root_dir, class_name)
            if os.path.isdir(class_folder):
                for img_file in os.listdir(class_folder):
                    img_path = os.path.join(class_folder, img_file)
                    if os.path.isfile(img_path):
                        self.image_paths.append(img_path)
                        self.labels.append(label)
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert("RGB")
        
        if self.transform:
            image = self.transform(image)
        
        return image, label



In [5]:
from torch.utils.data import DataLoader

# Define dataset paths
train_dir = '../data/animals_dataset_reduced/train'
val_dir = '../data/animals_dataset_reduced/val'

# Create datasets
train_dataset = AnimalDataset(root_dir=train_dir, transform=baseline_transform_train)
val_dataset = AnimalDataset(root_dir=val_dir, transform=baseline_transform_val)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

In [6]:
# Load pretrained resnet-18 model
from torchvision import models
import torch.nn as nn

# Load the pretrained ResNet-18 model
# Load the pretrained MobileNetV2 model
model = models.mobilenet_v2(pretrained=True)

# Modify the classifier layer to match the number of classes
num_classes = 10  # Update this based on your dataset
model.classifier[1] = nn.Linear(model.last_channel, num_classes)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)



In [7]:
#loss function and optimizer

import torch.optim as optim

# Define the loss function
criterion = nn.CrossEntropyLoss()

# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [8]:
# Training Config
# Define training parameters
num_epochs = 10
train_loader = train_loader  # Use the DataLoader defined earlier
val_loader = val_loader      # Use the DataLoader defined earlier
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)

In [None]:
import time
class TrainingLogger:
    def __init__(self):
        self.train_losses = []
        self.val_losses = []
        self.train_accuracies = []
        self.val_accuracies = []
        self.learning_rates = []
    
    def log_epoch(self, train_loss, val_loss, train_acc, val_acc, lr):
        self.train_losses.append(train_loss)
        self.val_losses.append(val_loss)
        self.train_accuracies.append(train_acc)
        self.val_accuracies.append(val_acc)
        self.learning_rates.append(lr)
    
    def plot_metrics(self):
        fig, axes = plt.subplots(1, 2, figsize=(15, 5))
        
        # Loss plot
        axes[0].plot(self.train_losses, label='Train Loss')
        axes[0].plot(self.val_losses, label='Val Loss')
        axes[0].set_xlabel('Epoch')
        axes[0].set_ylabel('Loss')
        axes[0].legend()
        axes[0].set_title('Training and Validation Loss')
        
        # Accuracy plot
        axes[1].plot(self.train_accuracies, label='Train Acc')
        axes[1].plot(self.val_accuracies, label='Val Acc')
        axes[1].set_xlabel('Epoch')
        axes[1].set_ylabel('Accuracy')
        axes[1].legend()
        axes[1].set_title('Training and Validation Accuracy')
        
        plt.tight_layout()
        plt.savefig('baseline_training_curves.png')
        plt.show()

def train_one_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for inputs, labels in tqdm(train_loader, desc='Training'):
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

def validate(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc='Validation'):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    epoch_loss = running_loss / len(val_loader)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

def run_baseline_experiment(config):
    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    # Load data
    train_loader = config['train_loader']
    val_loader = config['val_loader']
    
    # Create model
    model = models.resnet18(pretrained=config['pretrained'])
    model.fc = nn.Linear(model.fc.in_features, config['num_classes'])
    model = model.to(device)
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), 
                           lr=config['learning_rate'],
                           weight_decay=config['weight_decay'])
    
    # Training logger
    logger = TrainingLogger()
    
    # Training loop
    best_val_acc = 0.0
    for epoch in range(config['num_epochs']):
        print(f"\nEpoch {epoch+1}/{config['num_epochs']}")
        
        start_time = time.time()  # Start timing
        
        train_loss, train_acc = train_one_epoch(model, train_loader, 
                                                criterion, optimizer, device)
        val_loss, val_acc = validate(model, val_loader, criterion, device)
        
        epoch_time = time.time() - start_time  # End timing
        print(f"Epoch {epoch+1} took {epoch_time:.2f} seconds")
        
        logger.log_epoch(train_loss, val_loss, train_acc, val_acc, 
                         config['learning_rate'])
        
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
        
        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), 'baseline_best_model.pth')
            print(f"Saved best model with validation accuracy: {val_acc:.2f}%")
    
    # Plot results
    logger.plot_metrics()
    
    return model, logger

# Example configuration
config = {
    'train_loader': train_loader,
    'val_loader': val_loader,
    'num_classes': 10,
    'pretrained': True,
    'learning_rate': 1,#0.001,
    'weight_decay': 1e-4,
    'num_epochs': 5
}

# Run the experiment
model, logger = run_baseline_experiment(config)



Using device: cpu

Epoch 1/5

Epoch 1/5


