In [1]:
"""
### Overview:
This script is designed for training an image classification model using a fine-tuned ResNet-50, incorporating data preprocessing,
augmentation, and hyperparameter optimization via Bayesian methods.

### Key Steps:

1. Label Encoding:
   - Converts categorical labels from the dataset into numerical values.
   - Saves the mapping dictionary (`label_to_idx.json`) to ensure consistency across datasets.

2. Custom Dataset Class (`CornDataset`):
   - Loads image paths and their corresponding labels from CSV files.
   - Facilitates easy integration with PyTorch’s `DataLoader`.

3. Data Augmentation:
   - Applies random transformations (cropping, flipping, rotation, color jittering) to the training images.
   - Aims to enhance model robustness and reduce overfitting.

4. Model Setup (ResNet-50 Fine-tuning):
   - Uses a pre-trained ResNet-50 as the feature extractor.
   - Unfreezes the last few layers to allow fine-tuning.
   - Replaces the final classification layer with a custom fully connected layer, including dropout for regularization.

5. Bayesian Optimization (Optuna for Hyperparameter Tuning):
   - Searches for the best hyperparameters to maximize validation accuracy.
   - Tunable parameters include:
     - **Batch size:** {16, 32, 64}
     - **Learning rate:** Continuous range from 1e-5 to 1e-3
     - **Dropout rate:** Continuous range from 0.2 to 0.7
     - **Weight decay:** Continuous range from 1e-5 to 1e-3

6. Training Process:
   - Each trial runs for a maximum of **10 epochs**.
   - Implements **early stopping** to halt training if validation accuracy plateaus.

7. Model Selection & Saving:
   - The best-performing model from all trials is saved as `best_resnet50_model.pth`.
   - Ensures optimal performance on unseen data.

This pipeline efficiently optimizes and fine-tunes the ResNet-50 model while leveraging Bayesian optimization for better hyperparameter selection.
"""

!pip install torch torchvision pandas pillow scikit-learn optuna tqdm
import os
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import pandas as pd
from PIL import Image
import numpy as np
from sklearn.preprocessing import LabelEncoder
import optuna
from optuna.samplers import TPESampler
from tqdm import tqdm
import requests
from io import BytesIO
import torch.cuda.amp as amp  # Import for mixed precision training

from google.colab import drive
drive.mount('/content/gdrive')

# Define paths
absolute_path = "/content/gdrive/My Drive/Projects/Multimodal/"
TRAIN_CSV = absolute_path + "Datasets/Corn_train_set_150.csv"
VAL_CSV = absolute_path + "Datasets/Corn_validation_set_150.csv"
LABEL_JSON = absolute_path + "Datasets/corn-label_to_idx-150.json"
BEST_MODEL_PATH = absolute_path + "Datasets/corn-best_resnet50_model-150.pth"

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Print GPU info
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory Allocated: {torch.cuda.memory_allocated(0)/1e9:.2f} GB")
    print(f"Memory Reserved: {torch.cuda.memory_reserved(0)/1e9:.2f} GB")
    print(f"Total Memory: {torch.cuda.get_device_properties(0).total_memory/1e9:.2f} GB")

BATCH_SIZES = [16, 32, 64, 128, 256]
NUM_WORKERS = 8
PREFETCH_FACTOR = 2

# Set up data prefetching and pinning
torch.multiprocessing.set_sharing_strategy('file_system')
torch.set_float32_matmul_precision('high')  # Use TF32 precision on A100

# Prepare data downloading and caching
class ImageCache:
    def __init__(self, capacity=1000):
        self.capacity = capacity
        self.cache = {}

    def get(self, url):
        if url in self.cache:
            return self.cache[url]

        try:
            response = requests.get(url)
            image = Image.open(BytesIO(response.content)).convert('RGB')

            # Keep cache size in check
            if len(self.cache) >= self.capacity:
                # Remove a random item
                self.cache.pop(next(iter(self.cache)))

            self.cache[url] = image
            return image
        except Exception as e:
            print(f"Error loading image from {url}: {e}")
            return Image.new('RGB', (224, 224), color='black')

# Global image cache
image_cache = ImageCache()

# Custom Dataset with optimized loading
class CornDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

        # Load the label encoding
        with open(LABEL_JSON, 'r') as f:
            self.label_to_idx = json.load(f)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_url = self.data.iloc[idx]['Image']
        category = self.data.iloc[idx]['Category']

        # Convert category to encoded label
        label = self.label_to_idx[category]

        # Load image from URL with caching
        image = image_cache.get(img_url)

        if self.transform:
            image = self.transform(image)

        return image, label

# Function to create the dataset and dataloaders
def create_data_loaders(batch_size):
    # Define transformations
    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    val_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    # Create datasets
    train_dataset = CornDataset(TRAIN_CSV, transform=train_transform)
    val_dataset = CornDataset(VAL_CSV, transform=val_transform)

    # Create dataloaders with optimized settings
    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=NUM_WORKERS,
        pin_memory=True,
        prefetch_factor=PREFETCH_FACTOR,
        persistent_workers=True
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=True,
        prefetch_factor=PREFETCH_FACTOR,
        persistent_workers=True
    )

    return train_loader, val_loader

# Function to perform label encoding
def perform_label_encoding():
    train_data = pd.read_csv(TRAIN_CSV)
    val_data = pd.read_csv(VAL_CSV)

    # Combine all categories
    all_categories = pd.concat([train_data['Category'], val_data['Category']]).unique()

    # Create encoding
    label_encoder = LabelEncoder()
    label_encoder.fit(all_categories)

    # Create label_to_idx dictionary
    label_to_idx = {category: int(idx) for category, idx in zip(all_categories, label_encoder.transform(all_categories))}

    # Save to json
    with open(LABEL_JSON, 'w') as f:
        json.dump(label_to_idx, f)

    print(f"Label encoding saved to {LABEL_JSON}")
    return len(label_to_idx)

# Function to train for one epoch with mixed precision
def train_epoch(model, loader, criterion, optimizer, device, scaler):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in tqdm(loader, desc="Training"):
        inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)

        # Zero the parameter gradients
        optimizer.zero_grad(set_to_none=True)  # More efficient than zero_grad()

        # Forward pass with mixed precision
        with amp.autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        # Backward and optimize with scaled gradients
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # Statistics
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

# Function to validate with mixed precision
def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad(), amp.autocast():
        for inputs, labels in tqdm(loader, desc="Validating"):
            inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss = running_loss / total
    val_acc = correct / total
    return val_loss, val_acc

# Function to initialize the model
def create_model(num_classes, dropout_rate=0.5):
    # Load with higher performance settings
    try:
        model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
    except:
        model = models.resnet50(pretrained=True)

    # Freeze early layers
    for param in list(model.parameters())[:-4]:  # Freeze fewer layers
        param.requires_grad = False

    # Replace the final fully connected layer
    num_ftrs = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Dropout(dropout_rate),
        nn.Linear(num_ftrs, num_classes)
    )

    # Use channels_last memory format for better performance on A100
    model = model.to(device, memory_format=torch.channels_last)
    return model

# Objective function for Optuna
def objective(trial, num_classes):
    # Define hyperparameters to optimize
    batch_size = trial.suggest_categorical('batch_size', BATCH_SIZES)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-3, log=True)
    dropout_rate = trial.suggest_float('dropout_rate', 0.2, 0.7)
    weight_decay = trial.suggest_float('weight_decay', 1e-5, 1e-3, log=True)

    # Create model and dataloaders
    model = create_model(num_classes, dropout_rate)
    train_loader, val_loader = create_data_loaders(batch_size)

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Create gradient scaler for mixed precision training
    scaler = amp.GradScaler()

    # Train for a few epochs
    best_val_acc = 0
    patience = 0
    max_patience = 3

    for epoch in range(10):  # Maximum 10 epochs per trial
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device, scaler)
        val_loss, val_acc = validate(model, val_loader, criterion, device)

        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

        # Print GPU memory usage
        if torch.cuda.is_available():
            print(f"GPU Memory: {torch.cuda.memory_allocated(0)/1e9:.2f}GB / {torch.cuda.get_device_properties(0).total_memory/1e9:.2f}GB")

        trial.report(val_acc, epoch)

        # Handle pruning
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

        # Early stopping
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience = 0

            # Save the current model as a checkpoint for this trial
            trial_model_path = f"trial_{trial.number}_model.pth"
            torch.save(model.state_dict(), trial_model_path)
        else:
            patience += 1
            if patience >= max_patience:
                print(f"Early stopping at epoch {epoch+1}")
                break

    return best_val_acc

# Main function
def main():
    # Perform label encoding first
    num_classes = perform_label_encoding()
    print(f"Number of classes: {num_classes}")

    # Create the optuna study with A100-optimized settings
    study = optuna.create_study(direction='maximize', sampler=TPESampler())

    # Run fewer trials but with more GPU utilization
    n_trials = 30 # 10
    print(f"Running {n_trials} trials with larger batch sizes to maximize GPU usage")

    # Pass num_classes to objective function using a lambda function
    study.optimize(lambda trial: objective(trial, num_classes), n_trials=n_trials)

    # Get the best parameters
    best_params = study.best_params
    best_value = study.best_value
    print(f"Best trial: {study.best_trial.number}")
    print(f"Best validation accuracy: {best_value:.4f}")
    print(f"Best hyperparameters: {best_params}")

    # Load the best model from the best trial
    best_model = create_model(num_classes, best_params['dropout_rate'])
    best_model.load_state_dict(torch.load(f"trial_{study.best_trial.number}_model.pth"))

    # Save the best model
    torch.save(best_model.state_dict(), BEST_MODEL_PATH)
    print(f"Best model saved to {BEST_MODEL_PATH}")

    # Clean up trial model files
    for trial in study.trials:
        trial_model_path = f"trial_{trial.number}_model.pth"
        if os.path.exists(trial_model_path):
            os.remove(trial_model_path)

if __name__ == "__main__":
    main()

Collecting optuna
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvid

[I 2025-03-20 06:39:38,717] A new study created in memory with name: no-name-9a9a7c50-e128-4f58-9cf4-4f09d91a9484


Label encoding saved to /content/gdrive/My Drive/Projects/Multimodal/Datasets/corn-label_to_idx-150.json
Number of classes: 4
Running 30 trials with larger batch sizes to maximize GPU usage


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 202MB/s]
  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 4/4 [00:48<00:00, 12.01s/it]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 1/1 [00:45<00:00, 45.84s/it]


Epoch 1, Train Loss: 1.4710, Train Acc: 0.2754, Val Loss: 1.4176, Val Acc: 0.2188
GPU Memory: 0.11GB / 42.47GB


Training: 100%|██████████| 4/4 [00:36<00:00,  9.03s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.71it/s]


Epoch 2, Train Loss: 1.4596, Train Acc: 0.2520, Val Loss: 1.4166, Val Acc: 0.2500
GPU Memory: 0.11GB / 42.47GB


Training: 100%|██████████| 4/4 [00:28<00:00,  7.06s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.93it/s]


Epoch 3, Train Loss: 1.4331, Train Acc: 0.2520, Val Loss: 1.3991, Val Acc: 0.2656
GPU Memory: 0.11GB / 42.47GB


Training: 100%|██████████| 4/4 [00:21<00:00,  5.31s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.88it/s]


Epoch 4, Train Loss: 1.4144, Train Acc: 0.2793, Val Loss: 1.3827, Val Acc: 0.2969
GPU Memory: 0.11GB / 42.47GB


Training: 100%|██████████| 4/4 [00:15<00:00,  3.76s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.88it/s]


Epoch 5, Train Loss: 1.4012, Train Acc: 0.2949, Val Loss: 1.3710, Val Acc: 0.3359
GPU Memory: 0.11GB / 42.47GB


Training: 100%|██████████| 4/4 [00:13<00:00,  3.42s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.91it/s]


Epoch 6, Train Loss: 1.4261, Train Acc: 0.2734, Val Loss: 1.3619, Val Acc: 0.3359
GPU Memory: 0.11GB / 42.47GB


Training: 100%|██████████| 4/4 [00:10<00:00,  2.60s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.94it/s]


Epoch 7, Train Loss: 1.3925, Train Acc: 0.2832, Val Loss: 1.3561, Val Acc: 0.3516
GPU Memory: 0.11GB / 42.47GB


Training: 100%|██████████| 4/4 [00:08<00:00,  2.09s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.91it/s]


Epoch 8, Train Loss: 1.4080, Train Acc: 0.2793, Val Loss: 1.3506, Val Acc: 0.3828
GPU Memory: 0.11GB / 42.47GB


Training: 100%|██████████| 4/4 [00:06<00:00,  1.56s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.94it/s]


Epoch 9, Train Loss: 1.3960, Train Acc: 0.2852, Val Loss: 1.3451, Val Acc: 0.3672
GPU Memory: 0.11GB / 42.47GB


Training: 100%|██████████| 4/4 [00:04<00:00,  1.12s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.89it/s]
[I 2025-03-20 06:43:42,546] Trial 0 finished with value: 0.3828125 and parameters: {'batch_size': 128, 'learning_rate': 2.1010882623540563e-05, 'dropout_rate': 0.4987190691474953, 'weight_decay': 0.00014158168241863974}. Best is trial 0 with value: 0.3828125.


Epoch 10, Train Loss: 1.3596, Train Acc: 0.3105, Val Loss: 1.3397, Val Acc: 0.3594
GPU Memory: 0.11GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 8/8 [00:23<00:00,  2.92s/it]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 2/2 [00:24<00:00, 12.05s/it]


Epoch 1, Train Loss: 1.3286, Train Acc: 0.3555, Val Loss: 1.1342, Val Acc: 0.6250
GPU Memory: 0.11GB / 42.47GB


Training: 100%|██████████| 8/8 [00:21<00:00,  2.71s/it]
Validating: 100%|██████████| 2/2 [00:00<00:00,  9.27it/s]


Epoch 2, Train Loss: 1.1352, Train Acc: 0.5293, Val Loss: 0.9778, Val Acc: 0.6797
GPU Memory: 0.11GB / 42.47GB


Training: 100%|██████████| 8/8 [00:20<00:00,  2.50s/it]
Validating: 100%|██████████| 2/2 [00:00<00:00, 10.53it/s]


Epoch 3, Train Loss: 0.9937, Train Acc: 0.6406, Val Loss: 0.8502, Val Acc: 0.7891
GPU Memory: 0.11GB / 42.47GB


Training: 100%|██████████| 8/8 [00:16<00:00,  2.05s/it]
Validating: 100%|██████████| 2/2 [00:00<00:00, 10.26it/s]


Epoch 4, Train Loss: 0.9087, Train Acc: 0.6797, Val Loss: 0.8029, Val Acc: 0.7266
GPU Memory: 0.11GB / 42.47GB


Training: 100%|██████████| 8/8 [00:15<00:00,  1.88s/it]
Validating: 100%|██████████| 2/2 [00:00<00:00, 10.17it/s]


Epoch 5, Train Loss: 0.8345, Train Acc: 0.7070, Val Loss: 0.7083, Val Acc: 0.7812
GPU Memory: 0.11GB / 42.47GB


Training: 100%|██████████| 8/8 [00:12<00:00,  1.58s/it]
Validating: 100%|██████████| 2/2 [00:00<00:00, 10.16it/s]


Epoch 6, Train Loss: 0.7581, Train Acc: 0.7363, Val Loss: 0.6558, Val Acc: 0.8125
GPU Memory: 0.11GB / 42.47GB


Training: 100%|██████████| 8/8 [00:12<00:00,  1.54s/it]
Validating: 100%|██████████| 2/2 [00:00<00:00,  9.48it/s]


Epoch 7, Train Loss: 0.7007, Train Acc: 0.7598, Val Loss: 0.6348, Val Acc: 0.7891
GPU Memory: 0.11GB / 42.47GB


Training: 100%|██████████| 8/8 [00:13<00:00,  1.68s/it]
Validating: 100%|██████████| 2/2 [00:00<00:00, 10.35it/s]


Epoch 8, Train Loss: 0.6573, Train Acc: 0.7910, Val Loss: 0.5759, Val Acc: 0.8281
GPU Memory: 0.11GB / 42.47GB


Training: 100%|██████████| 8/8 [00:10<00:00,  1.33s/it]
Validating: 100%|██████████| 2/2 [00:00<00:00, 10.24it/s]


Epoch 9, Train Loss: 0.6253, Train Acc: 0.8008, Val Loss: 0.5841, Val Acc: 0.7969
GPU Memory: 0.11GB / 42.47GB


Training: 100%|██████████| 8/8 [00:07<00:00,  1.02it/s]
Validating: 100%|██████████| 2/2 [00:00<00:00,  9.43it/s]


Epoch 10, Train Loss: 0.6462, Train Acc: 0.7559, Val Loss: 0.5255, Val Acc: 0.8594
GPU Memory: 0.11GB / 42.47GB


[I 2025-03-20 06:46:43,899] Trial 1 finished with value: 0.859375 and parameters: {'batch_size': 64, 'learning_rate': 0.0005896994014605296, 'dropout_rate': 0.5680905482762781, 'weight_decay': 0.00023219198676951189}. Best is trial 1 with value: 0.859375.
  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 2/2 [01:32<00:00, 46.08s/it]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 1/1 [00:45<00:00, 45.80s/it]


Epoch 1, Train Loss: 1.5250, Train Acc: 0.2129, Val Loss: 1.3701, Val Acc: 0.2969
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 2/2 [00:45<00:00, 22.96s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.63it/s]


Epoch 2, Train Loss: 1.4783, Train Acc: 0.2500, Val Loss: 1.3570, Val Acc: 0.2656
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 2/2 [00:23<00:00, 11.84s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.92it/s]


Epoch 3, Train Loss: 1.4263, Train Acc: 0.2871, Val Loss: 1.3431, Val Acc: 0.2969
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 2/2 [00:13<00:00,  6.97s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.91it/s]


Epoch 4, Train Loss: 1.4167, Train Acc: 0.2949, Val Loss: 1.3264, Val Acc: 0.3594
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 2/2 [00:06<00:00,  3.45s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.87it/s]


Epoch 5, Train Loss: 1.4238, Train Acc: 0.2773, Val Loss: 1.3069, Val Acc: 0.4141
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 2/2 [00:04<00:00,  2.39s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.92it/s]


Epoch 6, Train Loss: 1.4324, Train Acc: 0.2559, Val Loss: 1.2872, Val Acc: 0.5156
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 2/2 [00:03<00:00,  1.52s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.90it/s]


Epoch 7, Train Loss: 1.4121, Train Acc: 0.2910, Val Loss: 1.2680, Val Acc: 0.5469
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 2/2 [00:02<00:00,  1.22s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.91it/s]


Epoch 8, Train Loss: 1.4148, Train Acc: 0.3125, Val Loss: 1.2519, Val Acc: 0.5547
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 2/2 [00:01<00:00,  1.24it/s]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.92it/s]


Epoch 9, Train Loss: 1.3456, Train Acc: 0.3418, Val Loss: 1.2375, Val Acc: 0.5703
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 2/2 [00:01<00:00,  1.05it/s]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.82it/s]


Epoch 10, Train Loss: 1.3603, Train Acc: 0.3301, Val Loss: 1.2231, Val Acc: 0.6094
GPU Memory: 0.21GB / 42.47GB


[I 2025-03-20 06:50:51,697] Trial 2 finished with value: 0.609375 and parameters: {'batch_size': 256, 'learning_rate': 0.00011498985635186809, 'dropout_rate': 0.6838310502116627, 'weight_decay': 0.0007792233860803548}. Best is trial 1 with value: 0.859375.
  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 4/4 [00:46<00:00, 11.59s/it]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 1/1 [00:46<00:00, 46.19s/it]


Epoch 1, Train Loss: 1.4227, Train Acc: 0.2578, Val Loss: 1.4156, Val Acc: 0.2422
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 4/4 [00:35<00:00,  8.95s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.61it/s]


Epoch 2, Train Loss: 1.4211, Train Acc: 0.2246, Val Loss: 1.4016, Val Acc: 0.2422
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 4/4 [00:29<00:00,  7.26s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.71it/s]


Epoch 3, Train Loss: 1.3946, Train Acc: 0.2637, Val Loss: 1.3928, Val Acc: 0.2188
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 4/4 [00:19<00:00,  4.92s/it]
Validating: 100%|██████████| 1/1 [00:00<00:00,  2.94it/s]
[I 2025-03-20 06:53:50,700] Trial 3 finished with value: 0.2421875 and parameters: {'batch_size': 128, 'learning_rate': 4.640357322499083e-05, 'dropout_rate': 0.23917129916489321, 'weight_decay': 5.883775621066863e-05}. Best is trial 1 with value: 0.859375.


Epoch 4, Train Loss: 1.3758, Train Acc: 0.3125, Val Loss: 1.3818, Val Acc: 0.2344
GPU Memory: 0.21GB / 42.47GB
Early stopping at epoch 4


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 32/32 [00:23<00:00,  1.38it/s]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 8/8 [00:06<00:00,  1.29it/s]


Epoch 1, Train Loss: 1.3701, Train Acc: 0.3105, Val Loss: 1.2457, Val Acc: 0.4844
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:20<00:00,  1.54it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 53.03it/s]


Epoch 2, Train Loss: 1.2467, Train Acc: 0.4512, Val Loss: 1.1241, Val Acc: 0.6484
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:19<00:00,  1.63it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 60.14it/s]


Epoch 3, Train Loss: 1.1447, Train Acc: 0.5195, Val Loss: 1.0439, Val Acc: 0.5625
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:16<00:00,  1.89it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 56.94it/s]


Epoch 4, Train Loss: 1.0690, Train Acc: 0.6035, Val Loss: 0.9768, Val Acc: 0.7266
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:14<00:00,  2.24it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 52.75it/s]


Epoch 5, Train Loss: 1.0071, Train Acc: 0.6309, Val Loss: 0.9282, Val Acc: 0.5703
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:13<00:00,  2.40it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 50.74it/s]


Epoch 6, Train Loss: 0.9376, Train Acc: 0.6699, Val Loss: 0.8346, Val Acc: 0.8281
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:13<00:00,  2.34it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 57.22it/s]


Epoch 7, Train Loss: 0.9016, Train Acc: 0.6816, Val Loss: 0.7945, Val Acc: 0.7500
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:10<00:00,  2.93it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 57.33it/s]


Epoch 8, Train Loss: 0.8397, Train Acc: 0.7559, Val Loss: 0.7668, Val Acc: 0.7031
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:10<00:00,  3.13it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 51.53it/s]
[I 2025-03-20 06:56:22,545] Trial 4 finished with value: 0.828125 and parameters: {'batch_size': 16, 'learning_rate': 0.0001459255384359482, 'dropout_rate': 0.3515179968796186, 'weight_decay': 1.4980183258900475e-05}. Best is trial 1 with value: 0.859375.


Epoch 9, Train Loss: 0.8188, Train Acc: 0.7598, Val Loss: 0.7120, Val Acc: 0.7734
GPU Memory: 0.21GB / 42.47GB
Early stopping at epoch 9


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 4/4 [00:46<00:00, 11.53s/it]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 1/1 [00:46<00:00, 46.10s/it]
[I 2025-03-20 06:57:55,285] Trial 5 pruned. 


Epoch 1, Train Loss: 1.4872, Train Acc: 0.2754, Val Loss: 1.3733, Val Acc: 0.2500
GPU Memory: 0.21GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 4/4 [00:46<00:00, 11.54s/it]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 1/1 [00:46<00:00, 46.04s/it]
[I 2025-03-20 06:59:28,120] Trial 6 pruned. 


Epoch 1, Train Loss: 1.4475, Train Acc: 0.2383, Val Loss: 1.4318, Val Acc: 0.2188
GPU Memory: 0.21GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 32/32 [00:23<00:00,  1.37it/s]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 8/8 [00:05<00:00,  1.33it/s]


Epoch 1, Train Loss: 1.2719, Train Acc: 0.4219, Val Loss: 1.0910, Val Acc: 0.6094
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:21<00:00,  1.47it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 61.39it/s]


Epoch 2, Train Loss: 1.0150, Train Acc: 0.6602, Val Loss: 0.8658, Val Acc: 0.8516
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:18<00:00,  1.72it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 56.96it/s]


Epoch 3, Train Loss: 0.8987, Train Acc: 0.6895, Val Loss: 0.7614, Val Acc: 0.7422
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:17<00:00,  1.83it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 53.92it/s]


Epoch 4, Train Loss: 0.7815, Train Acc: 0.7656, Val Loss: 0.6642, Val Acc: 0.8359
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:15<00:00,  2.12it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 55.00it/s]


Epoch 5, Train Loss: 0.7472, Train Acc: 0.7734, Val Loss: 0.5890, Val Acc: 0.8828
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:14<00:00,  2.18it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 55.68it/s]


Epoch 6, Train Loss: 0.6891, Train Acc: 0.7930, Val Loss: 0.6261, Val Acc: 0.7812
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:11<00:00,  2.77it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 57.59it/s]


Epoch 7, Train Loss: 0.6808, Train Acc: 0.7656, Val Loss: 0.5920, Val Acc: 0.7656
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:11<00:00,  2.77it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 58.69it/s]
[I 2025-03-20 07:01:50,689] Trial 7 finished with value: 0.8828125 and parameters: {'batch_size': 16, 'learning_rate': 0.00033694443483078525, 'dropout_rate': 0.22675172571920388, 'weight_decay': 0.00010880650775211902}. Best is trial 7 with value: 0.8828125.


Epoch 8, Train Loss: 0.6150, Train Acc: 0.8145, Val Loss: 0.5232, Val Acc: 0.8203
GPU Memory: 0.21GB / 42.47GB
Early stopping at epoch 8


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 2/2 [01:32<00:00, 46.00s/it]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 1/1 [00:45<00:00, 45.85s/it]
[I 2025-03-20 07:04:09,054] Trial 8 pruned. 


Epoch 1, Train Loss: 1.4402, Train Acc: 0.2227, Val Loss: 1.4119, Val Acc: 0.2422
GPU Memory: 0.21GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 16/16 [00:23<00:00,  1.46s/it]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 4/4 [00:11<00:00,  2.90s/it]


Epoch 1, Train Loss: 1.2257, Train Acc: 0.4531, Val Loss: 0.9717, Val Acc: 0.6719
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 16/16 [00:21<00:00,  1.32s/it]
Validating: 100%|██████████| 4/4 [00:00<00:00, 27.42it/s]


Epoch 2, Train Loss: 0.9467, Train Acc: 0.6562, Val Loss: 0.7335, Val Acc: 0.8047
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 16/16 [00:19<00:00,  1.21s/it]
Validating: 100%|██████████| 4/4 [00:00<00:00, 25.41it/s]


Epoch 3, Train Loss: 0.7530, Train Acc: 0.7598, Val Loss: 0.6633, Val Acc: 0.8047
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 16/16 [00:16<00:00,  1.06s/it]
Validating: 100%|██████████| 4/4 [00:00<00:00, 29.19it/s]


Epoch 4, Train Loss: 0.6702, Train Acc: 0.7891, Val Loss: 0.7418, Val Acc: 0.6641
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 16/16 [00:16<00:00,  1.01s/it]
Validating: 100%|██████████| 4/4 [00:00<00:00, 29.68it/s]
[I 2025-03-20 07:05:59,244] Trial 9 finished with value: 0.8046875 and parameters: {'batch_size': 32, 'learning_rate': 0.0008655172463522219, 'dropout_rate': 0.37967200319919814, 'weight_decay': 0.00019590430162788502}. Best is trial 7 with value: 0.8828125.


Epoch 5, Train Loss: 0.6083, Train Acc: 0.7891, Val Loss: 0.5901, Val Acc: 0.7734
GPU Memory: 0.21GB / 42.47GB
Early stopping at epoch 5


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 32/32 [00:23<00:00,  1.37it/s]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 8/8 [00:05<00:00,  1.34it/s]


Epoch 1, Train Loss: 1.3448, Train Acc: 0.3242, Val Loss: 1.1620, Val Acc: 0.5703
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:21<00:00,  1.50it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 55.34it/s]


Epoch 2, Train Loss: 1.1175, Train Acc: 0.5645, Val Loss: 0.9071, Val Acc: 0.7891
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:19<00:00,  1.63it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 53.67it/s]


Epoch 3, Train Loss: 0.9246, Train Acc: 0.7207, Val Loss: 0.8187, Val Acc: 0.6406
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:17<00:00,  1.78it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 59.13it/s]


Epoch 4, Train Loss: 0.8063, Train Acc: 0.7539, Val Loss: 0.6978, Val Acc: 0.8125
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:16<00:00,  1.89it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 54.30it/s]


Epoch 5, Train Loss: 0.7237, Train Acc: 0.7949, Val Loss: 0.6412, Val Acc: 0.8125
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:14<00:00,  2.17it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 50.85it/s]


Epoch 6, Train Loss: 0.7125, Train Acc: 0.7617, Val Loss: 0.5607, Val Acc: 0.8516
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:12<00:00,  2.61it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 50.19it/s]


Epoch 7, Train Loss: 0.6743, Train Acc: 0.7891, Val Loss: 0.5364, Val Acc: 0.8203
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:12<00:00,  2.61it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 54.45it/s]


Epoch 8, Train Loss: 0.6604, Train Acc: 0.7676, Val Loss: 0.5443, Val Acc: 0.7891
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:11<00:00,  2.84it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 50.95it/s]
[I 2025-03-20 07:08:37,836] Trial 10 finished with value: 0.8515625 and parameters: {'batch_size': 16, 'learning_rate': 0.0003587791515362431, 'dropout_rate': 0.20278811396465785, 'weight_decay': 2.9254261236871295e-05}. Best is trial 7 with value: 0.8828125.


Epoch 9, Train Loss: 0.6537, Train Acc: 0.7559, Val Loss: 0.4686, Val Acc: 0.8516
GPU Memory: 0.21GB / 42.47GB
Early stopping at epoch 9


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 8/8 [00:23<00:00,  2.92s/it]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 2/2 [00:23<00:00, 11.57s/it]
[I 2025-03-20 07:09:24,833] Trial 11 pruned. 


Epoch 1, Train Loss: 1.3781, Train Acc: 0.3301, Val Loss: 1.1797, Val Acc: 0.4609
GPU Memory: 0.21GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 8/8 [00:23<00:00,  2.90s/it]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 2/2 [00:23<00:00, 11.59s/it]


Epoch 1, Train Loss: 1.4354, Train Acc: 0.2910, Val Loss: 1.2395, Val Acc: 0.5312
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 8/8 [00:20<00:00,  2.59s/it]
Validating: 100%|██████████| 2/2 [00:00<00:00,  8.74it/s]
[I 2025-03-20 07:10:32,991] Trial 12 pruned. 


Epoch 2, Train Loss: 1.2326, Train Acc: 0.4160, Val Loss: 1.1376, Val Acc: 0.5312
GPU Memory: 0.21GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 8/8 [00:23<00:00,  2.93s/it]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 2/2 [00:22<00:00, 11.46s/it]
[I 2025-03-20 07:11:19,991] Trial 13 pruned. 


Epoch 1, Train Loss: 1.2860, Train Acc: 0.3867, Val Loss: 1.1577, Val Acc: 0.4609
GPU Memory: 0.30GB / 42.47GB


Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.11/multiprocessing/queues.py", line 239, in _feed
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.11/multiprocessing/queues.py", line 239, in _feed
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.11/multiprocessing/queues.py", line 239, in _feed
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.11/multiprocessing/queues.py", line 239, in _feed
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.11/multiprocessing/queues.py", line 239, in _feed
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.11/multiprocessing/queues.py", line 239, in _feed
    reader_close()
  File "/usr/lib/python3.11/multiprocessing/connection.py", line 178, in close
    self

Epoch 1, Train Loss: 1.3483, Train Acc: 0.3672, Val Loss: 1.2143, Val Acc: 0.6094
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:21<00:00,  1.52it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 50.28it/s]


Epoch 2, Train Loss: 1.1656, Train Acc: 0.5312, Val Loss: 1.0390, Val Acc: 0.7344
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:20<00:00,  1.56it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 57.64it/s]


Epoch 3, Train Loss: 1.0488, Train Acc: 0.5801, Val Loss: 0.9830, Val Acc: 0.5391
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:16<00:00,  1.90it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 57.34it/s]


Epoch 4, Train Loss: 0.9835, Train Acc: 0.6152, Val Loss: 0.8541, Val Acc: 0.6641
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:14<00:00,  2.17it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 51.32it/s]
[I 2025-03-20 07:13:54,091] Trial 14 pruned. 


Epoch 5, Train Loss: 0.8756, Train Acc: 0.6875, Val Loss: 0.8330, Val Acc: 0.5938
GPU Memory: 0.21GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 16/16 [00:23<00:00,  1.45s/it]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 4/4 [00:11<00:00,  2.89s/it]


Epoch 1, Train Loss: 1.3681, Train Acc: 0.3516, Val Loss: 1.0845, Val Acc: 0.7031
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 16/16 [00:20<00:00,  1.29s/it]
Validating: 100%|██████████| 4/4 [00:00<00:00, 27.80it/s]


Epoch 2, Train Loss: 1.1337, Train Acc: 0.5254, Val Loss: 0.8896, Val Acc: 0.7656
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 16/16 [00:20<00:00,  1.28s/it]
Validating: 100%|██████████| 4/4 [00:00<00:00, 27.74it/s]


Epoch 3, Train Loss: 0.9528, Train Acc: 0.6133, Val Loss: 0.7633, Val Acc: 0.7344
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 16/16 [00:15<00:00,  1.01it/s]
Validating: 100%|██████████| 4/4 [00:00<00:00, 31.90it/s]


Epoch 4, Train Loss: 0.8244, Train Acc: 0.6855, Val Loss: 0.7010, Val Acc: 0.7422
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 16/16 [00:16<00:00,  1.03s/it]
Validating: 100%|██████████| 4/4 [00:00<00:00, 31.62it/s]
[I 2025-03-20 07:15:43,827] Trial 15 pruned. 


Epoch 5, Train Loss: 0.7785, Train Acc: 0.7031, Val Loss: 0.6668, Val Acc: 0.7422
GPU Memory: 0.21GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 8/8 [00:23<00:00,  2.93s/it]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 2/2 [00:23<00:00, 11.52s/it]
[I 2025-03-20 07:16:30,946] Trial 16 pruned. 


Epoch 1, Train Loss: 1.4457, Train Acc: 0.2695, Val Loss: 1.3301, Val Acc: 0.2578
GPU Memory: 0.21GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 32/32 [00:23<00:00,  1.37it/s]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 8/8 [00:05<00:00,  1.34it/s]
[I 2025-03-20 07:17:00,903] Trial 17 pruned. 


Epoch 1, Train Loss: 1.5213, Train Acc: 0.1934, Val Loss: 1.3770, Val Acc: 0.2500
GPU Memory: 0.21GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 8/8 [00:23<00:00,  2.93s/it]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 2/2 [00:23<00:00, 11.59s/it]
[I 2025-03-20 07:17:48,165] Trial 18 pruned. 


Epoch 1, Train Loss: 1.4409, Train Acc: 0.2832, Val Loss: 1.2501, Val Acc: 0.4375
GPU Memory: 0.21GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 32/32 [00:23<00:00,  1.38it/s]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 8/8 [00:06<00:00,  1.30it/s]


Epoch 1, Train Loss: 1.2692, Train Acc: 0.4141, Val Loss: 1.0684, Val Acc: 0.6094
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:22<00:00,  1.45it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 53.21it/s]


Epoch 2, Train Loss: 1.0566, Train Acc: 0.6270, Val Loss: 0.8730, Val Acc: 0.7266
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:18<00:00,  1.69it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 56.00it/s]


Epoch 3, Train Loss: 0.8908, Train Acc: 0.7012, Val Loss: 0.7436, Val Acc: 0.8281
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:16<00:00,  1.99it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 56.95it/s]


Epoch 4, Train Loss: 0.7851, Train Acc: 0.7754, Val Loss: 0.6956, Val Acc: 0.7812
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:15<00:00,  2.06it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 57.61it/s]


Epoch 5, Train Loss: 0.7589, Train Acc: 0.7617, Val Loss: 0.6785, Val Acc: 0.6953
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:13<00:00,  2.34it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 53.79it/s]
[I 2025-03-20 07:19:46,018] Trial 19 finished with value: 0.828125 and parameters: {'batch_size': 16, 'learning_rate': 0.00034371929039027165, 'dropout_rate': 0.3163182010896306, 'weight_decay': 0.00016288250146282186}. Best is trial 7 with value: 0.8828125.


Epoch 6, Train Loss: 0.7199, Train Acc: 0.7520, Val Loss: 0.6389, Val Acc: 0.7578
GPU Memory: 0.21GB / 42.47GB
Early stopping at epoch 6


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 16/16 [00:23<00:00,  1.46s/it]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 4/4 [00:11<00:00,  2.95s/it]
[I 2025-03-20 07:20:21,701] Trial 20 pruned. 


Epoch 1, Train Loss: 1.4425, Train Acc: 0.2559, Val Loss: 1.3675, Val Acc: 0.3047
GPU Memory: 0.21GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 32/32 [00:23<00:00,  1.38it/s]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 8/8 [00:06<00:00,  1.28it/s]


Epoch 1, Train Loss: 1.2462, Train Acc: 0.4980, Val Loss: 1.1022, Val Acc: 0.6016
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:21<00:00,  1.52it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 49.85it/s]


Epoch 2, Train Loss: 1.0343, Train Acc: 0.6230, Val Loss: 0.9067, Val Acc: 0.7891
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:20<00:00,  1.56it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 57.19it/s]


Epoch 3, Train Loss: 0.8823, Train Acc: 0.7324, Val Loss: 0.8000, Val Acc: 0.7344
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:16<00:00,  1.98it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 61.19it/s]


Epoch 4, Train Loss: 0.8454, Train Acc: 0.7090, Val Loss: 0.7331, Val Acc: 0.7266
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:15<00:00,  2.13it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 52.71it/s]
[I 2025-03-20 07:22:05,722] Trial 21 finished with value: 0.7890625 and parameters: {'batch_size': 16, 'learning_rate': 0.00031492012101216145, 'dropout_rate': 0.20105310258414344, 'weight_decay': 3.15418545281269e-05}. Best is trial 7 with value: 0.8828125.


Epoch 5, Train Loss: 0.7585, Train Acc: 0.7461, Val Loss: 0.6597, Val Acc: 0.7578
GPU Memory: 0.21GB / 42.47GB
Early stopping at epoch 5


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 32/32 [00:23<00:00,  1.37it/s]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 8/8 [00:05<00:00,  1.34it/s]


Epoch 1, Train Loss: 1.1672, Train Acc: 0.4824, Val Loss: 0.9810, Val Acc: 0.6094
GPU Memory: 0.21GB / 42.47GB


Training: 100%|██████████| 32/32 [00:21<00:00,  1.47it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 52.71it/s]
[I 2025-03-20 07:22:57,565] Trial 22 pruned. 


Epoch 2, Train Loss: 0.8936, Train Acc: 0.6660, Val Loss: 0.7986, Val Acc: 0.6562
GPU Memory: 0.21GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 32/32 [00:23<00:00,  1.38it/s]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 8/8 [00:06<00:00,  1.32it/s]


Epoch 1, Train Loss: 1.3098, Train Acc: 0.4102, Val Loss: 1.1254, Val Acc: 0.5859
GPU Memory: 0.30GB / 42.47GB


Training: 100%|██████████| 32/32 [00:20<00:00,  1.55it/s]
Validating: 100%|██████████| 8/8 [00:00<00:00, 54.17it/s]
[I 2025-03-20 07:23:48,453] Trial 23 pruned. 


Epoch 2, Train Loss: 1.0396, Train Acc: 0.6543, Val Loss: 0.9533, Val Acc: 0.6406
GPU Memory: 0.21GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 32/32 [00:23<00:00,  1.37it/s]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 8/8 [00:05<00:00,  1.34it/s]
[I 2025-03-20 07:24:18,381] Trial 24 pruned. 


Epoch 1, Train Loss: 1.2572, Train Acc: 0.4121, Val Loss: 1.0844, Val Acc: 0.4375
GPU Memory: 0.30GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 2/2 [01:32<00:00, 46.16s/it]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 1/1 [00:45<00:00, 45.71s/it]
[I 2025-03-20 07:26:37,063] Trial 25 pruned. 


Epoch 1, Train Loss: 1.4248, Train Acc: 0.2461, Val Loss: 1.4056, Val Acc: 0.2500
GPU Memory: 0.21GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 8/8 [00:23<00:00,  2.99s/it]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 2/2 [00:22<00:00, 11.48s/it]
[I 2025-03-20 07:27:24,559] Trial 26 pruned. 


Epoch 1, Train Loss: 1.3002, Train Acc: 0.4082, Val Loss: 1.1488, Val Acc: 0.4062
GPU Memory: 0.11GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 32/32 [00:23<00:00,  1.37it/s]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 8/8 [00:05<00:00,  1.34it/s]
[I 2025-03-20 07:27:54,499] Trial 27 pruned. 


Epoch 1, Train Loss: 1.3457, Train Acc: 0.3340, Val Loss: 1.1938, Val Acc: 0.5625
GPU Memory: 0.11GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 32/32 [00:23<00:00,  1.37it/s]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 8/8 [00:05<00:00,  1.34it/s]
[I 2025-03-20 07:29:44,463] Trial 28 pruned. 


Epoch 1, Train Loss: 1.3334, Train Acc: 0.3418, Val Loss: 1.1732, Val Acc: 0.4688
GPU Memory: 0.11GB / 42.47GB


  scaler = amp.GradScaler()
  with amp.autocast():
Training: 100%|██████████| 8/8 [00:23<00:00,  2.91s/it]
  with torch.no_grad(), amp.autocast():
Validating: 100%|██████████| 2/2 [00:23<00:00, 11.51s/it]
[I 2025-03-20 07:30:31,380] Trial 29 pruned. 


Epoch 1, Train Loss: 1.4784, Train Acc: 0.2383, Val Loss: 1.3840, Val Acc: 0.3203
GPU Memory: 0.11GB / 42.47GB
Best trial: 7
Best validation accuracy: 0.8828
Best hyperparameters: {'batch_size': 16, 'learning_rate': 0.00033694443483078525, 'dropout_rate': 0.22675172571920388, 'weight_decay': 0.00010880650775211902}
Best model saved to /content/gdrive/My Drive/Projects/Multimodal/Datasets/corn-best_resnet50_model-150.pth


## Predictions Phase

In [2]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import requests
from io import BytesIO
import json
import logging
import os
import time
from google.colab import drive
drive.mount('/content/gdrive')

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('prediction.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

class TestImageDataset(Dataset):
    """Custom Dataset for loading test images"""
    def __init__(self, csv_file, feature_col, transform=None):
        self.data = pd.read_csv(csv_file)
        self.feature_col = feature_col
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        try:
            img_url = self.data.iloc[idx][self.feature_col]

            # Download and open image
            response = requests.get(img_url, timeout=10)
            if response.status_code != 200:
                raise ValueError(f"Failed to fetch image: HTTP {response.status_code}")

            img = Image.open(BytesIO(response.content)).convert('RGB')

            if self.transform:
                img = self.transform(img)

            return img, idx

        except Exception as e:
            logger.error(f"Error loading image at index {idx}: {str(e)}")
            raise

def load_model_and_labels(model_path, label_to_idx_path):
    """Load the trained model and label mapping"""
    try:
        # Load label mapping
        with open(label_to_idx_path, 'r') as f:
            label_to_idx = json.load(f)

        # Create inverse mapping
        idx_to_label = {v: k for k, v in label_to_idx.items()}

        # Initialize model
        model = models.resnet50(weights=None)
        model.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(model.fc.in_features, len(label_to_idx))
        )

        # Load trained weights
        model.load_state_dict(torch.load(model_path))

        return model, idx_to_label

    except Exception as e:
        logger.error(f"Error loading model and labels: {str(e)}")
        raise

def predict_images(test_set_path, model_path, label_to_idx_path, batch_size,
                  prediction_col_name, output_path, feature_col='Image'):
    """
    Make predictions on test images and save results

    Parameters:
    - test_set_path: path to test CSV file
    - model_path: path to trained model weights
    - label_to_idx_path: path to label mapping JSON
    - batch_size: batch size for predictions
    - prediction_col_name: name for the new predictions column
    - output_path: path to save predictions CSV
    - feature_col: name of column containing image URLs

    Returns:
    - result_df: DataFrame with predictions
    - execution_time: Time taken for predictions in seconds
    - prediction_cost: Cost of predictions based on execution time
    """
    try:
        # Start timing
        start_time = time.time()

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        logger.info(f"Using device: {device}")

        # Load test data
        test_df = pd.read_csv(test_set_path)
        logger.info(f"Loaded test set with {len(test_df)} images")

        # Create transforms for test images
        test_transform = transforms.Compose([
            transforms.Resize((224, 224)),  # Standard ResNet input size
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                              std=[0.229, 0.224, 0.225])
        ])

        # Create dataset and dataloader
        test_dataset = TestImageDataset(test_set_path, feature_col, test_transform)
        test_loader = DataLoader(test_dataset,
                               batch_size=batch_size,
                               shuffle=False,
                               num_workers=4)

        # Load model and label mapping
        model, idx_to_label = load_model_and_labels(model_path, label_to_idx_path)
        model = model.to(device)
        model.eval()

        # Make predictions
        predictions = []
        with torch.no_grad():
            for batch_images, batch_indices in test_loader:
                batch_images = batch_images.to(device)
                outputs = model(batch_images)
                _, predicted = torch.max(outputs.data, 1)

                # Convert indices to labels
                batch_predictions = [idx_to_label[idx.item()]
                                  for idx in predicted]

                # Store predictions with their indices
                for idx, pred in zip(batch_indices, batch_predictions):
                    predictions.append((idx.item(), pred))

        # Sort predictions by index to maintain original order
        predictions.sort(key=lambda x: x[0])
        predicted_labels = [pred[1] for pred in predictions]

        # Add predictions to dataframe
        test_df[prediction_col_name] = predicted_labels

        # Save results
        test_df.to_csv(output_path, index=False)

        # Calculate execution time and cost
        execution_time = time.time() - start_time
        prediction_cost = 0.000281392488 * execution_time

        logger.info(f"Predictions saved to {output_path}")
        logger.info(f"Prediction time: {execution_time:.2f} seconds")
        logger.info(f"Prediction cost: ${prediction_cost:.6f}")

        return test_df, execution_time, prediction_cost

    except Exception as e:
        logger.error(f"Error in prediction pipeline: {str(e)}")
        raise

absolute_path = "/content/gdrive/My Drive/Projects/Multimodal/"

if __name__ == "__main__":
    test_params = {
        'test_set_path': absolute_path + 'Datasets/Corn_test_set_150.csv',
        'model_path': absolute_path + 'Datasets/corn-best_resnet50_model-150.pth',
        'label_to_idx_path': absolute_path + 'Datasets/corn-label_to_idx-150.json',
        'batch_size': 16,
        'prediction_col_name': 'ResNet50-Predictions-Bayesian-Optimization',
        'output_path': absolute_path + 'Datasets/Corn-test_set_150_with_predictions.csv'
    }

    # Run predictions
    result_df, execution_time, prediction_cost = predict_images(**test_params)

    print("\nPrediction Results Summary:")
    print(f"Total prediction time: {execution_time:.2f} seconds")
    print(f"Total prediction cost: ${prediction_cost:.6f}")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).

Prediction Results Summary:
Total prediction time: 18.19 seconds
Total prediction cost: $0.005120
