In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from torchvision.models import resnet50, ResNet50_Weights, convnext_tiny, ConvNeXt_Tiny_Weights
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score
import itertools
from datetime import datetime
import copy

In [2]:
class RegionDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.data['Region_ID'] = self.data['Region_ID'] - 1  # Convert 1–15 to 0–14
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = self.data.iloc[idx]['filename']
        label = int(self.data.iloc[idx]['Region_ID'])
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        image = image.resize((256, 256))  # Ensure resizing to 256x256

        if self.transform:
            image = self.transform(image)

        return image, label

# ------------------------------
# Model Definition
# ------------------------------
def get_model(model_name='convnext_tiny', num_classes=15, unfreeze_last=False, dropout_rate=0.3, stochastic_depth_prob=0.3):
    if model_name == 'resnet50':
        weights = ResNet50_Weights.DEFAULT
        model = resnet50(weights=weights)
        in_features = model.fc.in_features
        
        # Add dropout before final FC layer
        model.fc = nn.Sequential(
            nn.Dropout(p=dropout_rate),
            nn.Linear(in_features, num_classes)
        )

        if unfreeze_last:
            # Freeze all layers first
            for param in model.parameters():
                param.requires_grad = False

            # Unfreeze only last block: layer4 and fc
            for param in model.layer4.parameters():
                param.requires_grad = True
            for param in model.fc.parameters():
                param.requires_grad = True

    elif model_name == 'convnext_tiny':
            weights = ConvNeXt_Tiny_Weights.DEFAULT
            model = convnext_tiny(weights=weights)
            apply_stochastic_depth(model, stochastic_depth_prob)
            in_features = model.classifier[2].in_features
        
            model.classifier = nn.Sequential(
                nn.Flatten(),
                nn.LayerNorm(in_features, eps=1e-6),
                nn.Dropout(p=dropout_rate),
                nn.Linear(in_features, num_classes)
            )

    elif model_name == 'efficientnet_b2':
        model = models.efficientnet_b2(weights='IMAGENET1K_V1')
        in_features = model.classifier[1].in_features
        
        model.classifier = nn.Sequential(
            nn.Dropout(p=dropout_rate, inplace=True),
            nn.Linear(in_features, num_classes)
        )

        if unfreeze_last:
            for param in model.parameters():
                param.requires_grad = False

            # Unfreeze last block and classifier
            for param in model.features[-3].parameters():
                param.requires_grad = True
            for param in model.classifier.parameters():
                param.requires_grad = True

    else:
        raise ValueError(f"Model {model_name} not supported")

    return model

def apply_stochastic_depth(module, drop_prob):
    for name, submodule in module.named_modules():
        if isinstance(submodule, nn.Dropout):
            submodule.p = drop_prob

# ------------------------------
# Training Function with best model tracking
# ------------------------------
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, device, num_epochs, config_str=""):
    train_losses = []
    val_accuracies = []
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    best_epoch = 0
    
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    model_save_path = f'model_{config_str}_{timestamp}'
    os.makedirs(model_save_path, exist_ok=True)
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            with torch.amp.autocast(device_type='cuda'):
                outputs = model(images)
                loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            # scheduler.step() # for cyclelr scheduler
            running_loss += loss.item()

        epoch_loss = running_loss/len(train_loader)
        train_losses.append(epoch_loss)
        

        print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")
        val_accuracy = evaluate_model(model, val_loader, device)
        val_accuracies.append(val_accuracy)
        
        # Save the model if it has the best validation accuracy so far
        if val_accuracy > best_acc:
            best_acc = val_accuracy
            best_epoch = epoch
            best_model_wts = copy.deepcopy(model.state_dict())
            # Save the best model for this specific epoch
            torch.save(model.state_dict(), f"{model_save_path}/best_epoch_{epoch+1}_acc_{val_accuracy:.4f}.pth")
            print(f"New best model saved! Epoch {epoch+1} with accuracy: {val_accuracy:.4f}")
    
        if scheduler:
            scheduler.step(val_accuracy)
    # Load best model weights
    model.load_state_dict(best_model_wts)
    
    print(f"Best val accuracy: {best_acc:.4f} at epoch {best_epoch+1}")
    
    return {'train_loss': train_losses, 'val_accuracy': val_accuracies, 'best_accuracy': best_acc, 'best_epoch': best_epoch}

# ------------------------------
# Evaluation Function with accuracy return
# ------------------------------
def evaluate_model(model, val_loader, device):
    model.eval()
    preds = []
    targets = []
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            preds.extend(predicted.cpu().numpy())
            targets.extend(labels.numpy())

    acc = accuracy_score(targets, preds)
    print(f"Validation Accuracy: {acc:.4f}")
    return acc


In [3]:
def test_hyperparameter_combinations(train_img_dir, train_csv, val_img_dir, val_csv):
    os.environ["CUDA_LAUNCH_BLOCKING"] = "1"  # For debugging
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    # Define transforms
    train_transforms = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(0.2, 0.2, 0.2),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

    val_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

    # Create datasets
    train_dataset = RegionDataset(train_csv, train_img_dir, transform=train_transforms)
    val_dataset = RegionDataset(val_csv, val_img_dir, transform=val_transforms)
    
    # Define hyperparameter grid
    hyperparams = {
        'learning_rate': [1e-4],
        'epochs': [50],
        'dropout_rate': [0.3],
        'stochastic_depth_prob': [0.3],
        'batch_size': [32]  # Keep batch size fixed for now, but can be varied if needed
    }
    
    # Generate all combinations of hyperparameters
    keys = list(hyperparams.keys())
    values = list(hyperparams.values())
    combinations = list(itertools.product(*values))
    
    results = {}
    best_accuracy = 0
    best_config = None
    best_model_state = None
    best_model_path = None
    
    # Create a timestamp for saving results
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Create results dataframe to track all experiments
    results_df = pd.DataFrame(columns=[
        'learning_rate', 'epochs', 'dropout_rate', 'stochastic_depth_prob', 
        'batch_size', 'final_accuracy', 'best_accuracy', 'best_epoch'
    ])
    
    for i, combination in enumerate(combinations):
        config = {keys[j]: combination[j] for j in range(len(keys))}
        
        # Create config string for display and tracking
        config_str = f"LR={config['learning_rate']}_E={config['epochs']}_D={config['dropout_rate']}_SDP={config['stochastic_depth_prob']}_BS={config['batch_size']}"
        print(f"\n--- Testing Configuration {i+1}/{len(combinations)}: {config_str} ---")
        
        # Create data loaders with current batch size
        train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, num_workers=2)
        val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False, num_workers=2)
        
        # Create model with current hyperparameters
        model = get_model(
            model_name='convnext_tiny',
            num_classes=15,
            dropout_rate=config['dropout_rate'],
            stochastic_depth_prob=config['stochastic_depth_prob']
        )
        model = model.to(device)
        
        # Training setup
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.AdamW(model.parameters(), lr=config['learning_rate'])
        # scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config['epochs'])

        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            mode='max',         # Reduce LR when the monitored quantity stops decreasing
            factor=0.5,         # Multiply LR by this factor when reducing
            patience=1,         # Number of epochs with no improvement after which LR will be reduced
            verbose=True,       # Print a message when LR is reduced
            min_lr=1e-6         # Minimum LR
        )
        
        # Train the model
        history = train_model(
            model, train_loader, val_loader, criterion, optimizer, scheduler, device, 
            num_epochs=config['epochs'], config_str=config_str
        )
        
        # Save results
        results[config_str] = history
        
        # Update results dataframe
        best_accuracy_this_run = history['best_accuracy']
        final_accuracy = history['val_accuracy'][-1]
        best_epoch = history['best_epoch']
        
        new_row = {
            'learning_rate': config['learning_rate'],
            'epochs': config['epochs'],
            'dropout_rate': config['dropout_rate'],
            'stochastic_depth_prob': config['stochastic_depth_prob'],
            'batch_size': config['batch_size'],
            'final_accuracy': final_accuracy,
            'best_accuracy': best_accuracy_this_run,
            'best_epoch': best_epoch + 1  # +1 because epochs are 0-indexed in code
        }
        results_df = pd.concat([results_df, pd.DataFrame([new_row])], ignore_index=True)
        
        # Check if this is the best configuration so far
        if best_accuracy_this_run > best_accuracy:
            best_accuracy = best_accuracy_this_run
            best_config = config
            # We don't need to save model state here because it's already saved during training
            best_model_path = f"model_{config_str}_{timestamp}/best_epoch_{best_epoch+1}_acc_{best_accuracy_this_run:.4f}.pth"
            print(f"New best configuration found: {config_str}")
            print(f"Best model path: {best_model_path}")
        
        # Intermediate save of results
        results_df.to_csv(f'hyperparameter_results_{timestamp}.csv', index=False)
        
        # Clean up to save memory
        del model, optimizer, scheduler
        torch.cuda.empty_cache()
    
    # Final save of results
    results_df.to_csv(f'hyperparameter_results_{timestamp}.csv', index=False)
    
    print("\n--- Best Configuration ---")
    print(f"Best accuracy: {best_accuracy:.4f}")
    for param, value in best_config.items():
        print(f"{param}: {value}")
    print(f"Best model path: {best_model_path}")
    
    return results, results_df, best_config, best_model_path

# ------------------------------
# Plotting Functions
# ------------------------------
def plot_results(results, results_df):
    # 1. Plot all validation accuracy curves
    plt.figure(figsize=(15, 10))
    
    plt.subplot(2, 2, 1)
    for config, history in results.items():
        plt.plot(history['val_accuracy'], label=config)
    plt.title('Validation Accuracy for All Configurations')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='x-small')
    plt.grid(True)
    
    # 2. Plot top 3 configurations
    plt.subplot(2, 2, 2)
    top_configs = results_df.sort_values('best_accuracy', ascending=False).head(3)
    
    for _, row in top_configs.iterrows():
        config_str = f"LR={row['learning_rate']}_E={row['epochs']}_D={row['dropout_rate']}_SDP={row['stochastic_depth_prob']}_BS={row['batch_size']}"
        plt.plot(results[config_str]['val_accuracy'], label=config_str)
    
    plt.title('Top 3 Configurations by Best Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend(fontsize='small')
    plt.grid(True)
    
    # 3. Heatmap of dropout rate vs learning rate (using best accuracy)
    plt.subplot(2, 2, 3)
    try:
        pivot_data = results_df.pivot_table(
            values='best_accuracy', 
            index='dropout_rate', 
            columns='learning_rate'
        )
        plt.imshow(pivot_data, cmap='viridis', aspect='auto')
        plt.colorbar(label='Best Accuracy')
        plt.title('Dropout Rate vs Learning Rate')
        plt.xlabel('Learning Rate Index')
        plt.ylabel('Dropout Rate Index')
        plt.xticks(range(len(pivot_data.columns)), pivot_data.columns)
        plt.yticks(range(len(pivot_data.index)), pivot_data.index)
    except:
        plt.text(0.5, 0.5, "Not enough data for heatmap", ha='center', va='center')
        plt.title('Dropout Rate vs Learning Rate (Error)')
    
    # 4. Stochastic depth prob vs epochs
    plt.subplot(2, 2, 4)
    try:
        pivot_data = results_df.pivot_table(
            values='best_accuracy', 
            index='stochastic_depth_prob', 
            columns='epochs'
        )
        plt.imshow(pivot_data, cmap='viridis', aspect='auto')
        plt.colorbar(label='Best Accuracy')
        plt.title('Stochastic Depth Prob vs Epochs')
        plt.xlabel('Epochs')
        plt.ylabel('Stochastic Depth Prob')
        plt.xticks(range(len(pivot_data.columns)), pivot_data.columns)
        plt.yticks(range(len(pivot_data.index)), pivot_data.index)
    except:
        plt.text(0.5, 0.5, "Not enough data for heatmap", ha='center', va='center')
        plt.title('Stochastic Depth Prob vs Epochs (Error)')
    
    plt.tight_layout()
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    plt.savefig(f'hyperparameter_comparison_{timestamp}.png')
    plt.show()
    
    # Create a summary table of results sorted by best accuracy
    plt.figure(figsize=(12, 8))
    sorted_df = results_df.sort_values('best_accuracy', ascending=False)
    
    # Plot a horizontal bar chart of best accuracies
    plt.barh(range(len(sorted_df)), sorted_df['best_accuracy'])
    # Add labels with configuration details
    config_labels = [f"LR={row['learning_rate']}, D={row['dropout_rate']}, SDP={row['stochastic_depth_prob']}, E={row['best_epoch']}/{row['epochs']}" 
                     for _, row in sorted_df.iterrows()]
    plt.yticks(range(len(sorted_df)), config_labels)
    plt.xlabel('Best Validation Accuracy')
    plt.title('Hyperparameter Configurations Ranked by Best Accuracy')
    plt.grid(True, axis='x')
    plt.tight_layout()
    plt.savefig(f'hyperparameter_ranking_{timestamp}.png')
    plt.show()

# ------------------------------
# Function to evaluate the best model on the validation set
# ------------------------------
def evaluate_best_model(best_model_path, val_img_dir, val_csv, best_config):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Load the model with the same hyperparameters
    model = get_model(
        model_name='convnext_tiny',
        num_classes=15,
        dropout_rate=best_config['dropout_rate'],
        stochastic_depth_prob=best_config['stochastic_depth_prob']
    )
    model.load_state_dict(torch.load(best_model_path))
    model = model.to(device)
    
    # Create validation dataset and loader
    val_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])
    val_dataset = RegionDataset(val_csv, val_img_dir, transform=val_transforms)
    val_loader = DataLoader(val_dataset, batch_size=best_config['batch_size'], shuffle=False, num_workers=2)
    
    # Evaluate
    final_accuracy = evaluate_model(model, val_loader, device)
    print(f"\n--- Final Evaluation of Best Model ---")
    print(f"Best model parameters:")
    for param, value in best_config.items():
        print(f"  {param}: {value}")
    print(f"Model path: {best_model_path}")
    print(f"Final validation accuracy: {final_accuracy:.4f}")
    
    return final_accuracy

In [4]:
def main(train_img_dir, train_csv, val_img_dir, val_csv):
    print("Starting hyperparameter testing...")
    
    # Test hyperparameter combinations
    results, results_df, best_config, best_model_path = test_hyperparameter_combinations(
        train_img_dir, train_csv, val_img_dir, val_csv
    )
    
    # Plot the results
    plot_results(results, results_df)
    
    # Evaluate the best model on the validation set
    final_accuracy = evaluate_best_model(best_model_path, val_img_dir, val_csv, best_config)
    
    print("\n--- Hyperparameter Testing Completed ---")
    print(f"Best configuration: {best_config}")
    print(f"Best model path: {best_model_path}")
    print(f"Final validation accuracy: {final_accuracy:.4f}")
    
    return best_config, best_model_path, final_accuracy

In [5]:
train_img_dir = '/kaggle/input/images/images_train/images_train'
train_csv = '/kaggle/input/labels/labels_train_updated.csv'
val_img_dir = '/kaggle/input/images/images_val/images_val'
val_csv = '/kaggle/input/labels/labels_val_updated.csv'

main(train_img_dir, train_csv, val_img_dir, val_csv)

In [None]:
def evaluate_best_model(best_model_path, val_img_dir, test_img_dir, val_csv, best_config, submission_filename):
    import os
    import torch
    import pandas as pd
    from PIL import Image
    from torch.utils.data import Dataset, DataLoader
    from torchvision import transforms
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Load the model with the same hyperparameters
    model = get_model(
        model_name='convnext_tiny',
        num_classes=15,
        dropout_rate=best_config['dropout_rate'],
        stochastic_depth_prob=best_config['stochastic_depth_prob']
    )
    model.load_state_dict(torch.load(best_model_path))
    model = model.to(device)
    model.eval()
    
    # Create validation dataset and loader
    val_transforms = transforms.Compose([
        # Use a center crop instead of resize to maintain aspect ratio while getting fixed size
        transforms.CenterCrop(256),  # This will only crop if image is larger than 256x256
        transforms.Pad(0, padding_mode='reflect'),  # Add padding if needed to make square
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])
    val_dataset = RegionDataset(val_csv, val_img_dir, transform=val_transforms)
    val_loader = DataLoader(val_dataset, batch_size=best_config['batch_size'], shuffle=False, num_workers=2)
    
    # Evaluate
    final_accuracy = evaluate_model(model, val_loader, device)
    print(f"\n--- Final Evaluation of Best Model ---")
    print(f"Best model parameters:")
    for param, value in best_config.items():
        print(f"  {param}: {value}")
    print(f"Model path: {best_model_path}")
    print(f"Final validation accuracy: {final_accuracy:.4f}")
    
    # Generate predictions for validation set
    val_predictions = []
    val_ids = []
    
    with torch.no_grad():
        for i, (images, labels) in enumerate(val_loader):
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            
            # Convert to integers and move to CPU
            predicted_classes = predicted.cpu().numpy() + 1  # Add 1 since Region_ID is 1-15
            
            # Get actual indices from validation dataset
            batch_ids = list(range(i * best_config['batch_size'], 
                             min((i + 1) * best_config['batch_size'], len(val_dataset))))
            
            val_predictions.extend(predicted_classes)
            val_ids.extend(batch_ids)
    
    # Create DataFrame for validation predictions
    val_df = pd.DataFrame({
        'id': val_ids,
        'Region_ID': val_predictions
    })
    
    # Create and process test dataset
    # For test dataset, we may not have labels, so we'll create a custom dataset
    class TestDataset(Dataset):
        def __init__(self, img_dir, transform=None, img_size=256, expected_count=369):
            self.img_dir = img_dir
            self.transform = transform
            self.img_size = img_size
            
            # Look for all common image file types with case insensitive extensions
            self.image_files = []
            for extension in ['.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff']:
                self.image_files.extend([f for f in os.listdir(img_dir) 
                                       if f.lower().endswith(extension)])
            
            # Sort to maintain consistent order
            self.image_files = sorted(self.image_files)
            
            print(f"Found {len(self.image_files)} test images in {img_dir}")
            
            # Check if we found the expected number of images
            if len(self.image_files) != expected_count:
                print(f"WARNING: Expected {expected_count} test images but found {len(self.image_files)}")
                # List all files in directory to help diagnose the issue
                all_files = os.listdir(img_dir)
                print(f"Total files in directory: {len(all_files)}")
                # Check for hidden files or unusual extensions
                unusual_files = [f for f in all_files if f not in self.image_files]
                if unusual_files:
                    print(f"Files not recognized as images: {unusual_files[:10]}")
                    if len(unusual_files) > 10:
                        print(f"...and {len(unusual_files) - 10} more")
            
        def __len__(self):
            return len(self.image_files)
            
        def __getitem__(self, idx):
            img_name = os.path.join(self.img_dir, self.image_files[idx])
            image = Image.open(img_name).convert('RGB')
            
            # Get original size
            width, height = image.size
            
            # Only resize if image is larger than target size
            if width > self.img_size or height > self.img_size:
                # Preserve aspect ratio while ensuring dimensions don't exceed img_size
                if width > height:
                    new_width = self.img_size
                    new_height = int(height * (self.img_size / width))
                else:
                    new_height = self.img_size
                    new_width = int(width * (self.img_size / height))
                
                image = image.resize((new_width, new_height), Image.LANCZOS)
            
            if self.transform:
                image = self.transform(image)
                
            return image
    
    # Create test dataset and loader
    test_dataset = TestDataset(test_img_dir, transform=val_transforms, expected_count=369)
    
    # If we still don't have all 369 test images, handle this situation
    if len(test_dataset) < 369:
        print(f"WARNING: Will add {369 - len(test_dataset)} placeholder entries for missing test images")
        # We'll continue with the available images, but make sure we generate predictions
        # for the full expected test set size
    # Modify the test loader to use a custom collate function that handles different sized images
    def custom_collate_fn(batch):
        # Find the max dimensions in this batch
        max_h = max([img.shape[1] for img in batch]) 
        max_w = max([img.shape[2] for img in batch])
        
        # Pad each image to the max dimensions
        padded_batch = []
        for img in batch:
            c, h, w = img.shape
            # Create new tensor with max dimensions
            padded_img = torch.zeros((c, max_h, max_w), dtype=img.dtype)
            # Copy the original image data
            padded_img[:, :h, :w] = img
            padded_batch.append(padded_img)
        
        # Stack the padded images
        return torch.stack(padded_batch)
        
    test_loader = DataLoader(test_dataset, batch_size=best_config['batch_size'], 
                            shuffle=False, num_workers=2, 
                            collate_fn=custom_collate_fn)
    
    # Generate predictions for test set
    test_predictions = []
    test_ids = []
    
    with torch.no_grad():
        for i, images in enumerate(test_loader):
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            
            # Convert to integers and move to CPU
            predicted_classes = predicted.cpu().numpy() + 1  # Add 1 since Region_ID is 1-15
            
            # Calculate batch IDs starting from the end of validation set
            start_idx = len(val_dataset)
            batch_ids = list(range(start_idx + i * best_config['batch_size'], 
                             start_idx + min((i + 1) * best_config['batch_size'], len(test_dataset))))
            
            test_predictions.extend(predicted_classes)
            test_ids.extend(batch_ids)
    
    # Check if we need to add placeholder predictions for missing test images
    total_test_samples = 369
    if len(test_predictions) < total_test_samples:
        missing_count = total_test_samples - len(test_predictions)
        print(f"Adding {missing_count} placeholder predictions for missing test images")
        
        # Use the most common class as placeholder (more likely to be correct than random)
        if test_predictions:
            from collections import Counter
            most_common_class = Counter(test_predictions).most_common(1)[0][0]
        else:
            most_common_class = 1  # Default if we have no predictions at all
            
        # Generate sequential IDs for missing entries
        start_idx = len(val_dataset) + len(test_predictions)
        missing_ids = list(range(start_idx, start_idx + missing_count))
        
        # Extend our predictions and IDs
        test_predictions.extend([most_common_class] * missing_count)
        test_ids.extend(missing_ids)
    
    # Create DataFrame for test predictions
    test_df = pd.DataFrame({
        'id': test_ids,
        'Region_ID': test_predictions
    })
    
    # Combine validation and test predictions
    submission_df = pd.concat([val_df, test_df], ignore_index=True)
    
    # Save to CSV
    submission_df.to_csv(submission_filename, index=False)
    print(f"Submission file created: {submission_filename}")
    print(f"Total entries: {len(submission_df)} (Validation: {len(val_df)}, Test: {len(test_df)})")
    
    return final_accuracy, submission_df


# Example usage:
best_model_path = '/kaggle/input/best-model/best_epoch_16_acc_0.9675.pth'
val_img_dir = '/kaggle/input/images/images_val/images_val'
test_img_dir = '/kaggle/input/images-test/images_test'  # Add path to test images
val_csv = '/kaggle/input/labels/labels_val_updated.csv'
best_config = {
    'batch_size': 32,
    'dropout_rate': 0.5,
    'stochastic_depth_prob': 0.5,
}
# Replace with your roll number and version
your_roll_no = '2022101113'
version = '2'
submission_filename = f'{your_roll_no}_{version}.csv'
final_accuracy, submission_df = evaluate_best_model(
    best_model_path, val_img_dir, test_img_dir, val_csv, best_config, submission_filename
)