In [None]:
!pip install optuna
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import requests
from io import BytesIO
import time
from tqdm import tqdm
import os
import logging
import gc
import json
from datetime import datetime
import numpy as np
from sklearn.metrics import precision_recall_fscore_support
import multiprocessing
import optuna
from optuna.trial import TrialState
from google.colab import drive
drive.mount('/content/gdrive')

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('training.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

def save_best_model(model, label_to_idx, config, val_metrics, epoch, best_val_loss, best_model_filename):
    if val_metrics['loss'] < best_val_loss:
        best_val_loss = val_metrics['loss']

        # Create the directory if it doesn't exist
        model_dir = os.path.join(config['paths']['absolute_path'], 'ConvNeXt-Trained-Models/')
        os.makedirs(model_dir, exist_ok=True)

        config_filename = f"model_lr_{config['hyperparameters']['learning_rate']}_bs_{config['hyperparameters']['batch_size']}_es_{config['hyperparameters']['image_size']}.pth"
        model_path = os.path.join(config['paths']['absolute_path']+'ConvNeXt-Trained-Models/', config_filename)

        torch.save(model.state_dict(), model_path)

        label_to_idx_filename = model_path.replace('.pth', '_label_to_idx.json')
        with open(label_to_idx_filename, 'w') as f:
            json.dump(label_to_idx, f)

        logger.info(f"Best model saved at epoch {epoch} with loss {val_metrics['loss']}")

    return best_val_loss

class Config:
    @staticmethod
    def validate_config(config):
        """
        Validates the provided configuration to ensure all necessary paths, files, and parameters are correctly defined.
        - Checks for required paths and files in the dataset.
        - Ensures that hyperparameters like batch size, learning rate, and image size are positive.
        """

        required_paths = ['absolute_path', 'dataset_path']
        required_files = ['train_file', 'validation_file']
        required_columns = ['feature_col', 'label_col']

        # Validate paths
        for path in required_paths:
            if not os.path.exists(config['paths'][path]):
                raise ValueError(f"Path not found: {config['paths'][path]}")

        # Validate files
        for file in required_files:
            file_path = os.path.join(config['paths']['dataset_path'], config['filenames'][file])
            if not os.path.exists(file_path):
                raise ValueError(f"File not found: {file_path}")

        # Validate hyperparameters
        if config['hyperparameters']['batch_size'] <= 0:
            raise ValueError("Batch size must be positive")
        if config['hyperparameters']['learning_rate'] <= 0:
            raise ValueError("Learning rate must be positive")
        if config['hyperparameters']['image_size'] <= 0:
            raise ValueError("Image size must be positive")

        return True

class CustomImageDataset(Dataset):
    """
    A custom PyTorch Dataset for loading images from local files on Google Drive.
    """

    def __init__(self, csv_file, config, transform=None):
        self.data = pd.read_csv(os.path.join(config['paths']['dataset_path'], csv_file))
        self.transform = transform
        self.feature_col = config['columns']['feature_col']
        self.label_col = config['columns']['label_col']
        self.classes = sorted(self.data[self.label_col].unique())
        self.label_to_idx = {label: idx for idx, label in enumerate(self.classes)}
        self.dataset_path = config['paths']['dataset_path']

    def __len__(self):
        return len(self.data)

    def _get_local_path_from_url(self, url):
        """
        Convert a URL to a local file path.
        Example: https://applied-ai.gr/projects/computer-vision/400/1111786.jpg -> dataset_path/400/1111786.jpg
        """
        # Extract the relevant part of the path from the URL
        # This assumes URLs follow a consistent pattern
        filename = url.split('/')[-1]  # Get the filename (e.g., 1111786.jpg)
        folder = url.split('/')[-2]    # Get the folder (e.g., 400)

        # Construct the local path
        local_path = os.path.join(self.dataset_path, folder, filename)
        return local_path

    def __getitem__(self, idx):
        try:
            img_url = self.data.iloc[idx][self.feature_col]
            label = self.data.iloc[idx][self.label_col]

            # Get the local path from the URL
            local_path = self._get_local_path_from_url(img_url)

            # Load the image from the local path
            img = Image.open(local_path).convert('RGB')

            if self.transform:
                img = self.transform(img)

            label_idx = self.label_to_idx[label]
            return img, label_idx

        except Exception as e:
            logger.error(f"Error loading image at index {idx}: {str(e)}")
            logger.error(f"URL: {img_url}, Local path: {local_path}")
            raise

def clear_gpu_memory():
    """
    Clears GPU memory to avoid memory overflow issues during training.
    - Uses PyTorch's built-in functions to release GPU memory.
    """
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect()

class ModelTrainer:
    def __init__(self, config):
        self.config = config
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.metrics_history = []
        self.label_to_idx = None
        self.convnext_type = config.get('model_config', {}).get('convnext_type', 'tiny')

    # Prepares data augmentations and preprocessing steps for training and validation datasets.
    def _create_transforms(self):
        # Define transformations for the training dataset.
        # For ConvNeXt models, we can use more advanced augmentations
        train_transform = transforms.Compose([
            # Resizes the image to a square defined by the configured image size.
            transforms.Resize((self.config['hyperparameters']['image_size'],
                           self.config['hyperparameters']['image_size'])),
            # Randomly crop a portion of the image and resize it to the input size
            transforms.RandomResizedCrop(self.config['hyperparameters']['image_size'], scale=(0.8, 1.0)),
            # Randomly flips the image horizontally.
            transforms.RandomHorizontalFlip(),
            # Randomly rotates the image by up to 15 degrees.
            transforms.RandomRotation(15),
            # Randomly changes the brightness, contrast, and saturation of the image.
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
            # Random augmentation pipeline for improved robustness
            transforms.RandAugment(num_ops=2, magnitude=9),
            transforms.ToTensor(), # Converts the image into a PyTorch tensor.
            # Normalizes the image using the specified mean
            # and standard deviation values (pre-trained model standards).
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
        ])

        # Define transformations for the validation dataset (no data augmentation).
        val_transform = transforms.Compose([
            transforms.Resize((self.config['hyperparameters']['image_size'],
                           self.config['hyperparameters']['image_size'])),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
        ])

        return train_transform, val_transform

    # Creates PyTorch DataLoader objects for efficient data loading during training and validation.
    def _create_dataloaders(self, train_transform, val_transform):
        train_dataset = CustomImageDataset(
            self.config['filenames']['train_file'],
            self.config,
            transform=train_transform
        )

        self.label_to_idx = train_dataset.label_to_idx  # Store the mapping

        validation_dataset = CustomImageDataset(
            self.config['filenames']['validation_file'],
            self.config,
            transform=val_transform
        )

        num_workers = min(multiprocessing.cpu_count(), 4)

        train_loader = DataLoader(
            train_dataset,
            batch_size=self.config['hyperparameters']['batch_size'],
            shuffle=True,
            num_workers=num_workers,
            pin_memory=True
        )

        validation_loader = DataLoader(
            validation_dataset,
            batch_size=self.config['hyperparameters']['batch_size'],
            shuffle=False,
            num_workers=num_workers,
            pin_memory=True
        )

        return train_loader, validation_loader, train_dataset.classes

    # Configures a ConvNeXt model with fine-tuning of specific layers for the given number of classes.
    def _create_model(self, num_classes):
        # Load a pre-trained ConvNeXt model based on configuration
        if self.convnext_type.lower() == 'tiny':
            model = models.convnext_tiny(weights=models.ConvNeXt_Tiny_Weights.IMAGENET1K_V1)
            feature_dim = 768  # ConvNeXt Tiny has 768 features
        elif self.convnext_type.lower() == 'small':
            model = models.convnext_small(weights=models.ConvNeXt_Small_Weights.IMAGENET1K_V1)
            feature_dim = 768  # ConvNeXt Small has 768 features
        elif self.convnext_type.lower() == 'base':
            model = models.convnext_base(weights=models.ConvNeXt_Base_Weights.IMAGENET1K_V1)
            feature_dim = 1024  # ConvNeXt Base has 1024 features
        elif self.convnext_type.lower() == 'large':
            model = models.convnext_large(weights=models.ConvNeXt_Large_Weights.IMAGENET1K_V1)
            feature_dim = 1536  # ConvNeXt Large has 1536 features
        else:
            # Default to tiny if not specified or invalid
            logger.warning(f"Invalid ConvNeXt type: {self.convnext_type}. Using ConvNeXt Tiny instead.")
            model = models.convnext_tiny(weights=models.ConvNeXt_Tiny_Weights.IMAGENET1K_V1)
            feature_dim = 768

        # Replace the classifier head
        # model.classifier = nn.Sequential(
        #     nn.LayerNorm(feature_dim),
        #     nn.Dropout(0.5),
        #     nn.Linear(feature_dim, num_classes)
        # )
        model.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),  # Added AdaptiveAvgPool2d
            nn.Flatten(),  # Added Flatten layer
            nn.LayerNorm(feature_dim),
            nn.Dropout(0.5),
            nn.Linear(feature_dim, num_classes)
        )


        # Move the model to the specified device (GPU)
        model = model.to(self.device)

        # Freeze early layers to prevent their weights from being updated during training
        for param in model.parameters():
            param.requires_grad = False

        # Unfreeze the final stage and classifier for fine-tuning
        # ConvNeXt has 'features', which is a sequential module with stages
        for param in model.features[-1].parameters():  # Unfreeze last stage
            param.requires_grad = True
        for param in model.classifier.parameters():
            param.requires_grad = True

        return model

    # Performs one epoch of training and computes training metrics like loss and accuracy.
    def _train_epoch(self, model, train_loader, criterion, optimizer):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        all_preds = [] # List to store all predicted labels for the epoch.
        all_labels = [] # List to store all true labels for the epoch.

        # Loops through the training data loader batch by batch.
        for inputs, labels in tqdm(train_loader, desc='Training'):
            # Moves the inputs and labels to the configured device (GPU).
            inputs, labels = inputs.to(self.device), labels.to(self.device)

            # Clears the gradients of the optimizer to prepare for the current batch.
            optimizer.zero_grad()
            # Passes the inputs through the model to obtain outputs (predictions).
            outputs = model(inputs)
            # Computes the loss between the predictions and the true labels.
            loss = criterion(outputs, labels)
            # Backpropagates the loss to compute gradients for all trainable parameters.
            loss.backward()

            # Clips gradients to avoid exploding gradients during backpropagation.
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step() # Updates the model parameters using the optimizer.

            # Accumulates the loss for reporting the average loss over the epoch.
            running_loss += loss.item()
            # Gets the predicted class labels for the batch by taking the index
            # of the maximum value in each output vector.
            _, predicted = torch.max(outputs.data, 1)
            # Updates the total number of labels and the count of correct
            # predictions for accuracy calculation.
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Appends the predicted and true labels for this batch
            # to the lists for further analysis.
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

        return {
            'loss': running_loss / len(train_loader),
            'accuracy': 100 * correct / total,
            'predictions': np.array(all_preds),
            'labels': np.array(all_labels)
        }

    # Evaluates the model on the validation dataset and computes metrics like precision, recall, and F1-score.
    def _validate(self, model, validation_loader, criterion):
        model.eval()
        running_loss = 0.0
        correct = 0
        total = 0
        all_preds = []
        all_labels = []

        with torch.no_grad():
            for inputs, labels in validation_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        return {
            'loss': running_loss / len(validation_loader),
            'accuracy': 100 * correct / total,
            'predictions': np.array(all_preds),
            'labels': np.array(all_labels)
        }

    def train(self, patience=4):
        try:
            train_transform, val_transform = self._create_transforms()
            train_loader, validation_loader, classes = self._create_dataloaders(
                train_transform, val_transform)

            model = self._create_model(len(classes))
            criterion = nn.CrossEntropyLoss()

            # Use AdamW optimizer which handles weight decay differently than Adam
            optimizer = torch.optim.AdamW([
                {'params': model.classifier.parameters(),
                'lr': self.config['hyperparameters']['learning_rate'] * 10,
                'weight_decay': self.config['hyperparameters'].get('weight_decay', 0.05)},
                {'params': model.features[-1].parameters(),
                'lr': self.config['hyperparameters']['learning_rate'],
                'weight_decay': self.config['hyperparameters'].get('weight_decay', 0.05)}
            ])

            scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer,
                T_max=self.config['hyperparameters']['num_epochs'],
                eta_min=self.config['hyperparameters']['learning_rate'] / 100
            )

            best_val_loss = float('inf')
            early_stop_counter = 0
            epoch_metrics = []

            for epoch in range(self.config['hyperparameters']['num_epochs']):
                epoch_start = time.time()

                train_metrics = self._train_epoch(model, train_loader, criterion, optimizer)
                clear_gpu_memory()
                val_metrics = self._validate(model, validation_loader, criterion)

                precision, recall, f1, _ = precision_recall_fscore_support(
                    val_metrics['labels'],
                    val_metrics['predictions'],
                    average='weighted',
                    zero_division=0
                )

                val_metrics.update({
                    'precision': precision,
                    'recall': recall,
                    'f1': f1
                })

                epoch_time = time.time() - epoch_start

                epoch_metrics.append({
                    'epoch': epoch + 1,
                    'train': train_metrics,
                    'validation': val_metrics,
                    'time': epoch_time
                })

                # Update the learning rate scheduler
                scheduler.step()

                # Store whether this epoch improved the validation loss
                improved = val_metrics['loss'] < best_val_loss

                # Save the model if it's better (and update best_val_loss)
                best_val_loss = save_best_model(
                    model, self.label_to_idx, self.config, val_metrics,
                    epoch + 1, best_val_loss, "best_model.pth"
                )

                # Update early stopping counter based on whether we improved
                if improved:
                    early_stop_counter = 0
                else:
                    early_stop_counter += 1

                if early_stop_counter >= patience:
                    logger.info("Early stopping triggered.")
                    break

            return epoch_metrics

        except Exception as e:
            logger.error(f"Training failed: {str(e)}")
            raise

    # Saves training results and configurations for reproducibility and analysis.
    def track_training_results(self, config, metrics):
        final_epoch_metrics = metrics[-1]
        return {
            'timestamp': datetime.now().isoformat(),
            'configuration': {
                'convnext_type': config.get('model_config', {}).get('convnext_type', 'tiny'),
                'learning_rate': config['hyperparameters']['learning_rate'],
                'batch_size': config['hyperparameters']['batch_size'],
                'image_size': config['hyperparameters']['image_size'],
                'weight_decay': config['hyperparameters'].get('weight_decay', 0.05),
                'num_epochs': config['hyperparameters']['num_epochs']
            },
            'performance': {
                'final_accuracy': final_epoch_metrics['validation']['accuracy'],
                'final_loss': final_epoch_metrics['validation']['loss'],
                'precision': final_epoch_metrics['validation']['precision'],
                'recall': final_epoch_metrics['validation']['recall'],
                'f1_score': final_epoch_metrics['validation']['f1'],
                'training_time': final_epoch_metrics['time']
            }
        }

def objective(trial, base_config):
    """
    Objective function for Optuna optimization.
    Trains a model with parameters suggested by Optuna and returns validation metrics.
    """
    # Suggest hyperparameters
    current_config = base_config.copy()

    # For ConvNeXt, we suggest different hyperparameter ranges
    current_config['hyperparameters'].update({
        'learning_rate': trial.suggest_float('learning_rate', 1e-6, 1e-4, log=True),
        'batch_size': trial.suggest_categorical('batch_size', [8, 16, 32, 64]),
        'image_size': trial.suggest_categorical('image_size', [400]),  # use only 400px images
        'weight_decay': trial.suggest_float('weight_decay', 1e-5, 1e-2, log=True),
        'num_epochs': 50  # Set a high number, we'll use early stopping
    })

    # Suggest which ConvNeXt variant to use
    current_config['model_config'] = {
        'convnext_type': 'tiny' # trial.suggest_categorical('convnext_type', ['tiny', 'base'])
    }

    # Create trainer with the suggested parameters
    trainer = ModelTrainer(current_config)

    # Train with patience for early stopping
    metrics = trainer.train(patience=5)

    # Get final validation metrics
    final_metrics = metrics[-1]['validation']

    # Log the trial results to a file
    trial_result = {
        'trial_number': trial.number,
        'timestamp': datetime.now().isoformat(),
        'parameters': {
            'convnext_type': current_config['model_config']['convnext_type'],
            'learning_rate': current_config['hyperparameters']['learning_rate'],
            'batch_size': current_config['hyperparameters']['batch_size'],
            'image_size': current_config['hyperparameters']['image_size'],
            'weight_decay': current_config['hyperparameters']['weight_decay'],
            'actual_epochs': len(metrics)
        },
        'metrics': {
            'accuracy': final_metrics['accuracy'],
            'loss': final_metrics['loss'],
            'precision': final_metrics['precision'],
            'recall': final_metrics['recall'],
            'f1': final_metrics['f1']
        }
    }

    # Save trial results to txt file
    results_file = os.path.join(base_config['paths']['absolute_path'], 'optuna_trials_convnext.txt')
    with open(results_file, 'a') as f:
        f.write(json.dumps(trial_result) + '\n')

    # Return the value to optimize (higher is better)
    return final_metrics['f1']  # Optimize for F1 score

def run_bayesian_optimization(base_config, n_trials=100, study_name="convnext_optimization"):
    """
    Performs Bayesian optimization using Optuna for hyperparameter tuning.
    - Creates a study and optimizes the objective function
    - Saves the best parameters and study results
    """
    # Create study directory
    study_dir = os.path.join(base_config['paths']['absolute_path'], 'optuna_studies_convnext')
    os.makedirs(study_dir, exist_ok=True)

    # Create a new study
    storage_path = f"sqlite:///{os.path.join(study_dir, f'{study_name}.db')}"
    try:
        study = optuna.create_study(
            study_name=study_name,
            storage=storage_path,
            load_if_exists=True,
            direction="maximize",  # Maximize F1 score
            sampler=optuna.samplers.TPESampler(seed=42)  # Use TPE sampler with fixed seed
        )
        logger.info(f"Loaded existing study '{study_name}'")
    except:
        study = optuna.create_study(
            study_name=study_name,
            storage=storage_path,
            direction="maximize",  # Maximize F1 score
            sampler=optuna.samplers.TPESampler(seed=42)  # Use TPE sampler with fixed seed
        )
        logger.info(f"Created new study '{study_name}'")

    # Optimize the study
    study.optimize(lambda trial: objective(trial, base_config), n_trials=n_trials)

    # Get best trial and parameters
    best_trial = study.best_trial
    best_params = best_trial.params

    # Save best parameters
    best_params_file = os.path.join(study_dir, 'best_params.json')
    with open(best_params_file, 'w') as f:
        json.dump({
            'best_params': best_params,
            'best_value': best_trial.value,
            'timestamp': datetime.now().isoformat()
        }, f, indent=2)

    # Create a summary report
    trials_df = study.trials_dataframe()
    summary = {
        'study_name': study_name,
        'n_trials': n_trials,
        'best_params': best_params,
        'best_value': best_trial.value,
        'completed_trials': len(study.get_trials(states=[TrialState.COMPLETE])),
        'pruned_trials': len(study.get_trials(states=[TrialState.PRUNED])),
        'failed_trials': len(study.get_trials(states=[TrialState.FAIL]))
    }

    # Save summary
    summary_file = os.path.join(study_dir, 'study_summary.json')
    with open(summary_file, 'w') as f:
        json.dump(summary, f, indent=2)

    logger.info(f"Best parameters: {best_params}")
    logger.info(f"Best F1 score: {best_trial.value}")

    return study, best_params

def main():
    """
    The main entry point for running the training pipeline with Bayesian optimization.
    """
    # Base configuration
    base_config = {
        'paths': {
            'absolute_path': "/content/gdrive/My Drive/Projects/Multimodal-Ecommerce/",
            'dataset_path': "/content/gdrive/My Drive/Projects/Multimodal-Ecommerce/Datasets/"
        },
        'filenames': {
            'train_file': 'train_set_400.csv',
            'validation_file': 'validation_set_400.csv'
        },
        'columns': {
            'feature_col': 'Image',
            'label_col': 'Category'
        },
        'hyperparameters': {  # Default values
            'learning_rate': 1e-5,
            'num_epochs': 50,  # Set high, we'll use early stopping
            'batch_size': 16,
            'image_size': 400,
            'weight_decay': 0.05  # Default weight decay for ConvNeXt is higher than ResNet
        },
        'model_config': {
            'convnext_type': 'tiny'  # Default to ConvNeXt Tiny
        }
    }

    try:
        # Create necessary directories
        os.makedirs(base_config['paths']['absolute_path'], exist_ok=True)
        os.makedirs(base_config['paths']['dataset_path'], exist_ok=True)

        # Reset the optuna_trials.txt file
        results_file = os.path.join(base_config['paths']['absolute_path'], 'optuna_trials_convnext.txt')
        with open(results_file, 'w') as f:
            f.write("# Optuna Trials Results for ConvNeXt\n")
            f.write("# Format: One JSON object per line\n")
            f.write("# Created: " + datetime.now().isoformat() + "\n\n")

        # Run Bayesian optimization
        study, best_params = run_bayesian_optimization(base_config, n_trials=100)

        # Train the final model with the best parameters
        final_config = base_config.copy()
        final_config['hyperparameters'].update({
            'learning_rate': best_params.get('learning_rate'),
            'batch_size': best_params.get('batch_size'),
            'weight_decay': best_params.get('weight_decay'),
            'image_size': 400  # Fixed image size
        })
        final_config['model_config'] = {
            'convnext_type': best_params.get('convnext_type')
        }

        logger.info("Training final model with best parameters...")
        final_trainer = ModelTrainer(final_config)
        final_metrics = final_trainer.train(patience=10)  # More patience for final model

        # Save the final model and results
        final_results = final_trainer.track_training_results(final_config, final_metrics)
        final_results_file = os.path.join(base_config['paths']['absolute_path'], 'final_convnext_model_results.json')
        with open(final_results_file, 'w') as f:
            json.dump(final_results, f, indent=2)

        logger.info("\nBayesian Optimization for ConvNeXt Completed!")
        logger.info(f"Final model trained with best parameters: {best_params}")
        logger.info(f"Check results in {base_config['paths']['absolute_path']}")

    except Exception as e:
        logger.error(f"Error in main: {str(e)}")
        raise

if __name__ == "__main__":
    main()



[I 2025-04-16 07:31:03,155] Using an existing study with name 'convnext_optimization' instead of creating a new one.


Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


Training: 100%|██████████| 105/105 [03:03<00:00,  1.75s/it]
Training: 100%|██████████| 105/105 [00:06<00:00, 16.34it/s]
Training: 100%|██████████| 105/105 [00:06<00:00, 15.80it/s]
Training: 100%|██████████| 105/105 [00:06<00:00, 15.84it/s]
Training: 100%|██████████| 105/105 [00:06<00:00, 15.81it/s]
Training: 100%|██████████| 105/105 [00:06<00:00, 16.95it/s]
Training: 100%|██████████| 105/105 [00:06<00:00, 16.02it/s]
Training: 100%|██████████| 105/105 [00:06<00:00, 16.05it/s]
Training: 100%|██████████| 105/105 [00:06<00:00, 15.91it/s]
Training: 100%|██████████| 105/105 [00:06<00:00, 15.73it/s]
Training: 100%|██████████| 105/105 [00:06<00:00, 15.48it/s]
Training: 100%|██████████| 105/105 [00:06<00:00, 15.82it/s]
Training: 100%|██████████| 105/105 [00:06<00:00, 15.80it/s]
Training: 100%|██████████| 105/105 [00:06<00:00, 15.45it/s]
Training: 100%|██████████| 105/105 [00:06<00:00, 15.71it/s]
Training: 100%|██████████| 105/105 [00:06<00:00, 16.18it/s]
Training: 100%|██████████| 105/105 [00:0

In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import time
import os
import json
import logging
from tqdm import tqdm
from google.colab import drive
drive.mount('/content/gdrive')

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('convnext_prediction.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

# Paths
BASE_PATH = "/content/gdrive/My Drive/Projects/Multimodal-Ecommerce/"
DATASET_PATH = "/content/gdrive/My Drive/Projects/Multimodal-Ecommerce/Datasets/"
MODEL_PATH = os.path.join(BASE_PATH, "ConvNeXt-without-pruning-same-params-than-resenet/ConvNeXt-Trained-Models/model_lr_2.1223297272287163e-05_bs_8_es_400.pth")
LABEL_TO_IDX_PATH = os.path.join(BASE_PATH, "ConvNeXt-without-pruning-same-params-than-resenet/ConvNeXt-Trained-Models/model_lr_2.1223297272287163e-05_bs_8_es_400_label_to_idx.json")
TEST_FILE = os.path.join(DATASET_PATH, "test_set_400.csv")
OUTPUT_FILE = os.path.join(DATASET_PATH, "test_set_400_with_convnext_predictions.csv")

class TestImageDataset(Dataset):
    """Dataset for test images"""

    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.dataset_path = DATASET_PATH

    def __len__(self):
        return len(self.data)

    def _get_local_path_from_url(self, url):
        """Convert URL to local file path"""
        filename = url.split('/')[-1]
        folder = url.split('/')[-2]
        local_path = os.path.join(self.dataset_path, folder, filename)
        return local_path

    def __getitem__(self, idx):
        try:
            img_url = self.data.iloc[idx]['Image']  # Assuming 'Image' is the column with image URLs

            # Get local path from URL
            local_path = self._get_local_path_from_url(img_url)

            # Load image
            img = Image.open(local_path).convert('RGB')

            if self.transform:
                img = self.transform(img)

            return img, idx  # Return image and index

        except Exception as e:
            logger.error(f"Error loading image at index {idx}: {str(e)}")
            logger.error(f"URL: {img_url}, Local path: {local_path}")
            raise

def load_model(model_path, label_to_idx_path, device):
    """Load the trained model and label mapping"""
    # Load label to index mapping
    with open(label_to_idx_path, 'r') as f:
        label_to_idx = json.load(f)

    # Print original mapping for debugging
    logger.info(f"Original label_to_idx: {label_to_idx}")

    # Create inverse mapping (index to label)
    # Handle both string and integer keys
    idx_to_label = {}
    for label, idx in label_to_idx.items():
        if isinstance(idx, str):
            idx_to_label[int(idx)] = label
        else:
            idx_to_label[idx] = label

    logger.info(f"Created idx_to_label mapping with {len(idx_to_label)} entries")
    logger.info(f"Sample mapping: {list(idx_to_label.items())[:5]}")

    # Create model architecture (ConvNeXt tiny)
    num_classes = len(label_to_idx)
    logger.info(f"Number of classes: {num_classes}")

    # Initialize ConvNeXt tiny model
    model = models.convnext_tiny(weights=None)  # No need for pretrained weights as we'll load our own

    # Modify the classifier head to match the training architecture
    feature_dim = 768  # ConvNeXt Tiny has 768 features
    model.classifier = nn.Sequential(
        nn.AdaptiveAvgPool2d((1, 1)),
        nn.Flatten(),
        nn.LayerNorm(feature_dim),
        nn.Dropout(0.5),
        nn.Linear(feature_dim, num_classes)
    )

    # Load trained weights
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    model.eval()  # Set to evaluation mode

    return model, idx_to_label

def predict():
    """Make predictions on test set and save results"""
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logger.info(f"Using device: {device}")

    # Load model and mappings
    logger.info("Loading model...")
    model, idx_to_label = load_model(MODEL_PATH, LABEL_TO_IDX_PATH, device)

    # Create test data transforms (same as validation transforms during training)
    test_transform = transforms.Compose([
        transforms.Resize((400, 400)),  # Same as model's image_size
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    # Create test dataset and dataloader
    test_dataset = TestImageDataset(TEST_FILE, transform=test_transform)
    test_loader = DataLoader(
        test_dataset,
        batch_size=16,  # Smaller batch size for ConvNeXt as it might be more memory intensive
        shuffle=False,
        num_workers=min(4, os.cpu_count() or 1),
        pin_memory=True
    )

    # Load test data
    test_df = pd.read_csv(TEST_FILE)
    predictions = [""] * len(test_df)  # Initialize prediction list

    # Start timing
    start_time = time.time()

    # Make predictions
    logger.info("Making predictions...")
    with torch.no_grad():  # No need to track gradients for inference
        for inputs, indices in tqdm(test_loader, desc="Predicting"):
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, predicted_indices = torch.max(outputs, 1)

            # Convert predicted indices to labels
            for i, idx in enumerate(indices):
                pred_idx = predicted_indices[i].item()
                if pred_idx in idx_to_label:
                    predictions[idx] = idx_to_label[pred_idx]
                else:
                    # Debugging info for unknown indices
                    logger.warning(f"Unknown index: {pred_idx}, not found in mapping")
                    predictions[idx] = f"Unknown-{pred_idx}"

    # Calculate execution time and cost
    execution_time = time.time() - start_time
    prediction_cost = 0.000281392488 * execution_time
    print(f"Prediction time: {execution_time:.2f} seconds")
    print(f"Prediction cost: ${prediction_cost:.2f}")

    # Add predictions to dataframe
    test_df['ConvNeXt-tiny-Predictions-400'] = predictions

    # Save updated dataframe
    test_df.to_csv(OUTPUT_FILE, index=False)
    logger.info(f"Predictions saved to {OUTPUT_FILE}")

    # Show sample of predictions
    sample_size = min(10, len(test_df))
    logger.info(f"\nSample of {sample_size} predictions:")
    sample = test_df.sample(sample_size)
    for _, row in sample.iterrows():
        logger.info(f"Image: {row['Image'].split('/')[-1]}, Predicted: {row['ConvNeXt-tiny-Predictions-400']}")

    # Calculate basic statistics
    category_counts = test_df['ConvNeXt-tiny-Predictions-400'].value_counts()
    logger.info(f"\nPrediction distribution:")
    logger.info(category_counts)

    # Count unknown predictions
    unknown_count = sum(1 for pred in predictions if 'Unknown' in str(pred))
    logger.info(f"Number of unknown predictions: {unknown_count} ({unknown_count/len(predictions)*100:.2f}%)")

if __name__ == "__main__":
    predict()

Mounted at /content/gdrive


Predicting: 100%|██████████| 12/12 [00:20<00:00,  1.70s/it]

Prediction time: 20.42 seconds
Prediction cost: $0.01



