## TEAM MEMBERS 
* * *
Rajan Ghimire , STUDENT ID: C0924991

* * *

Prajwal Luitel , STUDENT ID: C0927658

* * *



In [9]:
import torch

if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
        print(f"Total Memory: {torch.cuda.get_device_properties(i).total_memory / (1024 ** 3):.2f} GB")
else:
    print("No GPU available.")


GPU 0: NVIDIA GeForce RTX 4070 Ti SUPER
Total Memory: 15.70 GB


In [1]:
import kagglehub

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os
from pathlib import Path

# Download latest version
path = kagglehub.dataset_download("anshtanwar/microscopic-fungi-images")

print("Path to dataset files:", path)


from torchvision.models import vgg16
vgg_model = vgg16()


  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: /home/aeroserver/.cache/kagglehub/datasets/anshtanwar/microscopic-fungi-images/versions/2


In [2]:


class CustomImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):

        self.root_dir = Path(root_dir)
        self.transform = transform
        self.classes = sorted([d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))])
        self.class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)}
        
        self.samples = []
        for class_name in self.classes:
            class_dir = self.root_dir / class_name
            for img_name in os.listdir(class_dir)[:100]:
                if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                    self.samples.append((
                        str(class_dir / img_name),
                        self.class_to_idx[class_name]
                    ))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        
        image = Image.open(img_path).convert('RGB')
        
        # Apply transforms if any
        if self.transform:
            image = self.transform(image)
        
        return image, label

def create_dataloaders(data_dir, batch_size=32, num_workers=4):
    # Define transforms
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225])
    ])
    
    # Create dataset
    dataset = CustomImageDataset(
        root_dir=data_dir,
        transform=transform
    )
    
    # Create dataloader
    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers
    )
    
    return dataloader, dataset.classes



In [3]:
# Usage example

batch_size = 32

train_loader, classes = create_dataloaders(
data_dir=f"{path}/train",
batch_size=batch_size
)
test_loader, classes = create_dataloaders(
data_dir=f"{path}/test",
batch_size=batch_size
)
valid_loader, classes = create_dataloaders(
data_dir=f"{path}/valid",
batch_size=batch_size
)


In [4]:
def print_model_structure(model, indent=0):
    for name, child in model.named_children():
        params = sum(p.numel() for p in child.parameters())
        print(f"{'  ' * indent}└─ {name}: {child.__class__.__name__} ({params:,} parameters)")
        
        # Recursively print child modules if they exist
        if list(child.children()):
            print_model_structure(child, indent + 1)

# Print the model structure
print("VGG Architecture:")
print_model_structure(vgg_model)

VGG Architecture:
└─ features: Sequential (14,714,688 parameters)
  └─ 0: Conv2d (1,792 parameters)
  └─ 1: ReLU (0 parameters)
  └─ 2: Conv2d (36,928 parameters)
  └─ 3: ReLU (0 parameters)
  └─ 4: MaxPool2d (0 parameters)
  └─ 5: Conv2d (73,856 parameters)
  └─ 6: ReLU (0 parameters)
  └─ 7: Conv2d (147,584 parameters)
  └─ 8: ReLU (0 parameters)
  └─ 9: MaxPool2d (0 parameters)
  └─ 10: Conv2d (295,168 parameters)
  └─ 11: ReLU (0 parameters)
  └─ 12: Conv2d (590,080 parameters)
  └─ 13: ReLU (0 parameters)
  └─ 14: Conv2d (590,080 parameters)
  └─ 15: ReLU (0 parameters)
  └─ 16: MaxPool2d (0 parameters)
  └─ 17: Conv2d (1,180,160 parameters)
  └─ 18: ReLU (0 parameters)
  └─ 19: Conv2d (2,359,808 parameters)
  └─ 20: ReLU (0 parameters)
  └─ 21: Conv2d (2,359,808 parameters)
  └─ 22: ReLU (0 parameters)
  └─ 23: MaxPool2d (0 parameters)
  └─ 24: Conv2d (2,359,808 parameters)
  └─ 25: ReLU (0 parameters)
  └─ 26: Conv2d (2,359,808 parameters)
  └─ 27: ReLU (0 parameters)
  └─ 28: C

In [5]:
import torch
import torch.nn as nn
from torchvision.models import vgg16, VGG16_Weights
from torch.utils.data import DataLoader, SubsetRandomSampler
from sklearn.model_selection import KFold
import optuna
import numpy as np
from typing import Dict, Tuple, List

class CustomVGG16(nn.Module):
    def __init__(self, num_classes: int, dropout_rate: float = 0.5):
        super(CustomVGG16, self).__init__()
        
        vgg = vgg16(weights=VGG16_Weights.DEFAULT)
        
        self.features = vgg.features
        
        for param in self.features.parameters():
            param.requires_grad = False
            
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(p=dropout_rate),
            nn.Linear(4096, 1024),
            nn.ReLU(True),
            nn.Dropout(p=dropout_rate),
            nn.Linear(1024, num_classes)
        )
        
    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [6]:


class TrainingPipeline:
    def __init__(self, num_classes: int, device: str = 'cuda'):
        self.num_classes = num_classes
        self.device = device
        self.best_params = None
        self.best_model = None
        
    def objective(self, trial: optuna.Trial, train_loader: DataLoader, 
                 valid_loader: DataLoader) -> float:
        lr = trial.suggest_float('lr', 1e-5, 1e-2, log=True)
        dropout_rate = trial.suggest_float('dropout_rate', 0.2, 0.7)
        batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
        
        model = CustomVGG16(self.num_classes, dropout_rate).to(self.device)
        
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.classifier.parameters(), lr=lr)
        
        epochs = 10  
        best_valid_loss = float('inf')
        
        for epoch in range(epochs):
            # Training phase
            model.train()
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
            
            # Validation phase
            model.eval()
            valid_loss = 0.0
            correct = 0
            total = 0
            
            with torch.no_grad():
                for inputs, labels in valid_loader:
                    inputs, labels = inputs.to(self.device), labels.to(self.device)
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    valid_loss += loss.item()
                    
                    _, predicted = outputs.max(1)
                    total += labels.size(0)
                    correct += predicted.eq(labels).sum().item()
            
            valid_loss = valid_loss / len(valid_loader)
            accuracy = correct / total
            
            # Report intermediate value
            trial.report(valid_loss, epoch)
            
            if trial.should_prune():
                raise optuna.TrialPruned()
            
            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
        
        return best_valid_loss

    def train_final_model(self, train_loader: DataLoader, 
                         valid_loader: DataLoader, 
                         test_loader: DataLoader,
                         k_folds: int = 5) -> Tuple[nn.Module, Dict]:
        kfold = KFold(n_splits=k_folds, shuffle=True)
        
        train_indices = np.arange(len(train_loader.dataset))
        
        fold_results = []
        
        # K-fold Cross Validation
        for fold, (train_ids, valid_ids) in enumerate(kfold.split(train_indices)):
            print(f'FOLD {fold}')
            print('--------------------------------')
            
            train_subsampler = SubsetRandomSampler(train_ids)
            valid_subsampler = SubsetRandomSampler(valid_ids)
            
            train_fold_loader = DataLoader(
                train_loader.dataset, 
                batch_size=self.best_params['batch_size'],
                sampler=train_subsampler
            )
            valid_fold_loader = DataLoader(
                train_loader.dataset,
                batch_size=self.best_params['batch_size'],
                sampler=valid_subsampler
            )
            
            model = CustomVGG16(
                self.num_classes,
                self.best_params['dropout_rate']
            ).to(self.device)
            
            # Training setup
            criterion = nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(
                model.classifier.parameters(),
                lr=self.best_params['lr']
            )
            
            # Training loop
            epochs = 30  
            best_valid_loss = float('inf')
            
            for epoch in range(epochs):
                # Training phase
                model.train()
                train_loss = 0.0
                for inputs, labels in train_fold_loader:
                    inputs, labels = inputs.to(self.device), labels.to(self.device)
                    
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                    train_loss += loss.item()
                
                # Validation phase
                model.eval()
                valid_loss = 0.0
                correct = 0
                total = 0
                
                with torch.no_grad():
                    for inputs, labels in valid_fold_loader:
                        inputs = inputs.to(self.device)
                        labels = labels.to(self.device)
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)
                        valid_loss += loss.item()
                        
                        _, predicted = outputs.max(1)
                        total += labels.size(0)
                        correct += predicted.eq(labels).sum().item()
                
                train_loss = train_loss / len(train_fold_loader)
                valid_loss = valid_loss / len(valid_fold_loader)
                accuracy = correct / total
                
                print(f'Epoch: {epoch+1}/{epochs}')
                print(f'Training Loss: {train_loss:.4f}')
                print(f'Validation Loss: {valid_loss:.4f}')
                print(f'Validation Accuracy: {accuracy:.4f}')
                
                if valid_loss < best_valid_loss:
                    best_valid_loss = valid_loss
                    if fold == 0:  # Save best model from first fold
                        self.best_model = model.state_dict()
            
            fold_results.append(best_valid_loss)
        
        print('\nK-FOLD CROSS VALIDATION RESULTS')
        print('--------------------------------')
        print(f'Average validation loss: {np.mean(fold_results):.4f}')
        print(f'Std of validation loss: {np.std(fold_results):.4f}')
        
        model = CustomVGG16(self.num_classes, self.best_params['dropout_rate']).to(self.device)
        model.load_state_dict(self.best_model)
        model.eval()
        
        test_loss = 0.0
        correct = 0
        total = 0
        
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs = inputs.to(self.device)
                labels = labels.to(self.device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                test_loss += loss.item()
                
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()
        
        test_loss = test_loss / len(test_loader)
        test_accuracy = correct / total
        
        print('\nFINAL TEST RESULTS')
        print('--------------------------------')
        print(f'Test Loss: {test_loss:.4f}')
        print(f'Test Accuracy: {test_accuracy:.4f}')
        
        return model, {
            'test_loss': test_loss,
            'test_accuracy': test_accuracy,
            'fold_results': fold_results
        }

    def find_best_hyperparameters(self, train_loader: DataLoader, 
                                 valid_loader: DataLoader, 
                                 n_trials: int = 100) -> Dict:
        study = optuna.create_study(direction='minimize')
        study.optimize(lambda trial: self.objective(trial, train_loader, valid_loader), 
                      n_trials=n_trials)
        
        self.best_params = study.best_params
        return study.best_params




In [7]:
def main():
    
    num_classes = len(classes)  
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    pipeline = TrainingPipeline(num_classes=num_classes, device=device)
    
    best_params = pipeline.find_best_hyperparameters(
        train_loader=train_loader,
        valid_loader=valid_loader,
        n_trials=10
    )
    print("Best hyperparameters:", best_params)
    
    model, results = pipeline.train_final_model(
        train_loader=train_loader,
        valid_loader=valid_loader,
        test_loader=test_loader,
        k_folds=5
    )
    
    print("Final results:", results)
    
    torch.save(model.state_dict(), 'best_model.pth')
    
    
main()

[I 2025-02-01 13:17:46,622] A new study created in memory with name: no-name-e4abfacb-9508-40e3-9f12-7694d8888d4f
[I 2025-02-01 13:18:05,158] Trial 0 finished with value: 1.4390889406204224 and parameters: {'lr': 0.008517362174549684, 'dropout_rate': 0.2672239664535954, 'batch_size': 64}. Best is trial 0 with value: 1.4390889406204224.
[I 2025-02-01 13:18:23,527] Trial 1 finished with value: 1.047280205147607 and parameters: {'lr': 0.0006797298690041322, 'dropout_rate': 0.24545322758358784, 'batch_size': 16}. Best is trial 1 with value: 1.047280205147607.
[I 2025-02-01 13:18:41,923] Trial 2 finished with value: 1.2471999270575387 and parameters: {'lr': 0.007062023853657059, 'dropout_rate': 0.27383006600049903, 'batch_size': 64}. Best is trial 1 with value: 1.047280205147607.
[I 2025-02-01 13:19:00,315] Trial 3 finished with value: 0.9099310551370893 and parameters: {'lr': 0.0006285257834221934, 'dropout_rate': 0.5970128392051451, 'batch_size': 32}. Best is trial 3 with value: 0.9099310

Best hyperparameters: {'lr': 0.0001188600851104574, 'dropout_rate': 0.6848642149849223, 'batch_size': 16}
FOLD 0
--------------------------------
Epoch: 1/30
Training Loss: 1.4902
Validation Loss: 1.3070
Validation Accuracy: 0.4400
Epoch: 2/30
Training Loss: 1.1089
Validation Loss: 1.1095
Validation Accuracy: 0.5800
Epoch: 3/30
Training Loss: 0.9057
Validation Loss: 0.9917
Validation Accuracy: 0.6400
Epoch: 4/30
Training Loss: 0.7559
Validation Loss: 0.9663
Validation Accuracy: 0.6600
Epoch: 5/30
Training Loss: 0.5508
Validation Loss: 0.9646
Validation Accuracy: 0.6300
Epoch: 6/30
Training Loss: 0.4002
Validation Loss: 1.1407
Validation Accuracy: 0.6000
Epoch: 7/30
Training Loss: 0.2739
Validation Loss: 1.1142
Validation Accuracy: 0.6800
Epoch: 8/30
Training Loss: 0.2058
Validation Loss: 0.9471
Validation Accuracy: 0.6400
Epoch: 9/30
Training Loss: 0.1396
Validation Loss: 1.0343
Validation Accuracy: 0.6000
Epoch: 10/30
Training Loss: 0.0946
Validation Loss: 1.3907
Validation Accuracy: 

The hyperparameter tuning results (using Optuna) show:

10 trials were attempted (0-9)
The best performing model was found in trial 4 with these parameters:

Learning rate: 0.0001188 (a relatively small learning rate)
Dropout rate: 0.6848 (fairly aggressive dropout to prevent overfitting)
Batch size: 16 (smaller batch size)


Trials 6-9 were "pruned" (stopped early) because they weren't performing well