# 🚀 Multithreaded ResNet-18 Optimizer

**Maximum Performance Animal Classifier**

This notebook implements a highly optimized ResNet-18 model with:
- ⚡ **Multithreading** for data loading and processing
- 🔥 **Maximum CPU/GPU utilization**
- 🎯 **Best accuracy optimization techniques**
- 📊 **Phase 1 & Phase 2 submissions**

---

In [1]:
# 🚀 Environment Detection & Multithreading Setup
import os
import multiprocessing as mp
import torch

# Environment Detection
try:
    import google.colab
    IN_COLAB = True
    print("🌐 Google Colab detected")
except ImportError:
    IN_COLAB = False
    print("💻 Local Jupyter detected")

# Base path configuration (adjusted for Multithreaded_approach folder)
BASE_PATH = '/content' if IN_COLAB else '..'
print(f"📁 Base path: {BASE_PATH}")

# CPU/GPU Detection and Optimization
cpu_count = mp.cpu_count()
print(f"🖥️  Available CPU cores: {cpu_count}")

# Set optimal number of workers
NUM_WORKERS = min(cpu_count, 16)  # Cap at 16 to avoid memory issues
print(f"⚡ Using {NUM_WORKERS} workers for data loading")

# Torch optimization settings
torch.set_num_threads(cpu_count)
if torch.cuda.is_available():
    print(f"🔥 CUDA available - {torch.cuda.device_count()} GPUs")
    # Enable cudnn benchmarking for optimal performance
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.enabled = True
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    print("🍎 Metal Performance Shaders (MPS) available")
else:
    print("💪 Using optimized CPU with maximum threads")

💻 Local Jupyter detected
📁 Base path: ..
🖥️  Available CPU cores: 14
⚡ Using 14 workers for data loading
🍎 Metal Performance Shaders (MPS) available


In [2]:
# 📦 Enhanced Package Installation with Performance Optimization
import sys
import subprocess
import warnings
warnings.filterwarnings('ignore')

def install_packages():
    # Core packages with performance optimizations
    pkgs = [
        'torch', 'torchvision', 'torchaudio',  # Latest PyTorch with optimizations
        'pandas', 'numpy', 'pillow', 'scikit-learn', 
        'tqdm', 'requests', 'matplotlib', 'seaborn',
        'albumentations',  # Advanced augmentations
        'timm',  # State-of-the-art models
        'tensorboard',  # Training visualization
    ]
    
    if IN_COLAB:
        pkgs.extend(['gdown', 'fastai'])  # Additional Colab optimizations
    
    print("🔧 Installing optimized packages...")
    for pkg in pkgs:
        try:
            subprocess.run([sys.executable, '-m', 'pip', 'install', pkg, '--upgrade'], 
                         check=True, capture_output=True)
            print(f"✅ {pkg}")
        except Exception as e:
            print(f"❌ Failed to install {pkg}: {e}")

install_packages()

# Set environment variables for maximum performance
os.environ['OMP_NUM_THREADS'] = str(cpu_count)
os.environ['MKL_NUM_THREADS'] = str(cpu_count)
print(f"🎯 Optimized for {cpu_count} CPU threads")

🔧 Installing optimized packages...
✅ torch
✅ torchvision
✅ torchaudio
✅ pandas
✅ numpy
✅ pillow
✅ scikit-learn
✅ tqdm
✅ requests
✅ matplotlib
✅ seaborn
✅ albumentations
✅ timm
✅ tensorboard
🎯 Optimized for 14 CPU threads


In [None]:
# 🌐 Advanced Data Download & Organization
if IN_COLAB:
    import gdown
    print("📥 Downloading datasets in parallel...")
    
    # Parallel download using threading
    import threading
    
    def download_and_extract(url, filename, extract_to):
        try:
            gdown.download(url, f'{BASE_PATH}/{filename}', quiet=False)
            os.system(f'cd {BASE_PATH} && unzip -q {filename}')
            os.system(f'rm -rf {BASE_PATH}/__MACOSX')
            os.system(f'mv {BASE_PATH}/{extract_to}/* {BASE_PATH}/')
            os.system(f'rm -rf {BASE_PATH}/{extract_to} {BASE_PATH}/{filename}')
            print(f"✅ {filename} processed")
        except Exception as e:
            print(f"❌ Error with {filename}: {e}")
    
    # Download both datasets in parallel
    thread1 = threading.Thread(target=download_and_extract, 
                              args=('https://drive.google.com/uc?id=18MA0qKg1rqP92HApr_Fjck7Zo4Bwdqdu', 
                                   'HV-AI-2025.zip', 'HV-AI-2025'))
    thread2 = threading.Thread(target=download_and_extract,
                              args=('https://drive.google.com/uc?id=1aszVlQFQOwJTy9tt79s7x87VJyYw-Sxy',
                                   'HV-AI-2025-Test.zip', 'HV-AI-2025-Test'))
    
    thread1.start()
    thread2.start()
    thread1.join()
    thread2.join()
    
    print("🎉 All datasets downloaded and organized!")
else:
    print("💻 Assuming data is present in parent directory structure")
    print(f"   - Labeled data: {BASE_PATH}/labeled_data/")
    print(f"   - Unlabeled data: {BASE_PATH}/unlabeled_data/")
    print(f"   - Test images: {BASE_PATH}/test_images/")

In [3]:
# 🔥 Enhanced Imports & Device Optimization
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler
import torch.nn.functional as F
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau
import torchvision.transforms as transforms
import torchvision.models as models

# Enhanced data science imports
import pandas as pd
import numpy as np
from PIL import Image, ImageFilter, ImageEnhance
import albumentations as A
from albumentations.pytorch import ToTensorV2
import timm

# Analysis and visualization
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix
from tqdm import tqdm
import time
import gc

# Device optimization with detailed info
if torch.cuda.is_available():
    device = torch.device('cuda')
    gpu_name = torch.cuda.get_device_name(0)
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
    print(f'🚀 Using CUDA - {gpu_name}')
    print(f'📊 GPU Memory: {gpu_memory:.1f} GB')
    # Enable mixed precision for faster training
    from torch.cuda.amp import autocast, GradScaler
    scaler = GradScaler()
    USE_AMP = True
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
    print('🍎 Using Metal Performance Shaders (MPS)')
    USE_AMP = False  # MPS doesn't support AMP yet
else:
    device = torch.device('cpu')
    print('💪 Using optimized CPU')
    USE_AMP = False

print(f"⚡ PyTorch version: {torch.__version__}")
print(f"🎯 Device: {device}")
print(f"🔥 Mixed Precision: {USE_AMP}")

🍎 Using Metal Performance Shaders (MPS)
⚡ PyTorch version: 2.7.1
🎯 Device: mps
🔥 Mixed Precision: False


In [4]:
# 📊 Advanced Data Analysis & Preprocessing
print("🔍 Loading and analyzing dataset...")

# Load data with enhanced analysis
df = pd.read_csv(f'{BASE_PATH}/labeled_data/labeled_data.csv')
print(f"📈 Dataset shape: {df.shape}")
print(f"🏷️  Columns: {list(df.columns)}")

# Enhanced label encoding and analysis
label_encoder = LabelEncoder()
df['encoded_label'] = label_encoder.fit_transform(df['label'])
num_classes = len(label_encoder.classes_)

print(f"\n🎯 Number of classes: {num_classes}")
print("📊 Class distribution:")
class_counts = df['label'].value_counts()
for label, count in class_counts.items():
    print(f"   {label}: {count} samples")

# Calculate class weights for balanced training
class_counts_array = np.bincount(df['encoded_label'])
class_weights = 1.0 / class_counts_array
class_weights = class_weights / class_weights.sum() * num_classes
class_weights_tensor = torch.FloatTensor(class_weights).to(device)

print(f"\n⚖️  Class weights calculated for balanced training")
print("📈 Data analysis complete!")

# Memory optimization
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()

🔍 Loading and analyzing dataset...
📈 Dataset shape: (779, 2)
🏷️  Columns: ['img_name', 'label']

🎯 Number of classes: 10
📊 Class distribution:
   cane: 145 samples
   ragno: 144 samples
   gallina: 92 samples
   cavallo: 78 samples
   farfalla: 63 samples
   mucca: 55 samples
   scoiattolo: 55 samples
   pecora: 54 samples
   gatto: 50 samples
   elefante: 43 samples

⚖️  Class weights calculated for balanced training
📈 Data analysis complete!


In [5]:
# 🎨 Advanced Augmentations with Albumentations
print("🎨 Setting up advanced augmentations...")

# Heavy training augmentations for maximum generalization
train_transforms = A.Compose([
    A.Resize(256, 256),
    A.RandomCrop(224, 224),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.2),
    A.RandomRotate90(p=0.3),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, val_shift_limit=10, p=0.5),
    A.GaussianBlur(blur_limit=3, p=0.3),
    A.GaussNoise(var_limit=0.01, p=0.3),
    A.CoarseDropout(max_holes=8, max_height=32, max_width=32, p=0.3),
    A.Cutout(num_holes=4, max_h_size=32, max_w_size=32, p=0.3),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

# Validation transforms with Test Time Augmentation options
val_transforms = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

# TTA transforms for inference
tta_transforms = [
    A.Compose([A.Resize(224, 224), A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ToTensorV2()]),
    A.Compose([A.Resize(224, 224), A.HorizontalFlip(p=1.0), A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ToTensorV2()]),
    A.Compose([A.Resize(256, 256), A.CenterCrop(224, 224), A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ToTensorV2()]),
    A.Compose([A.Resize(224, 224), A.VerticalFlip(p=1.0), A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ToTensorV2()]),
]

print("✅ Advanced augmentations configured!")

🎨 Setting up advanced augmentations...


AttributeError: module 'albumentations' has no attribute 'Cutout'

In [None]:
# 🧠 Optimized Dataset with Multithreading
class OptimizedAnimalDataset(Dataset):
    def __init__(self, dataframe, images_dir, transform=None, cache_images=False):
        self.dataframe = dataframe.reset_index(drop=True)
        self.images_dir = images_dir
        self.transform = transform
        self.cache_images = cache_images
        self.image_cache = {} if cache_images else None
        
        # Pre-validate image paths
        self.valid_indices = []
        for idx in range(len(self.dataframe)):
            img_name = self.dataframe.iloc[idx]['img_name']
            img_path = os.path.join(self.images_dir, img_name)
            if os.path.exists(img_path):
                self.valid_indices.append(idx)
        
        print(f"📊 Dataset: {len(self.valid_indices)}/{len(self.dataframe)} valid images")
    
    def __len__(self):
        return len(self.valid_indices)
    
    def __getitem__(self, idx):
        real_idx = self.valid_indices[idx]
        img_name = self.dataframe.iloc[real_idx]['img_name']
        img_path = os.path.join(self.images_dir, img_name)
        
        # Use cached image if available
        if self.cache_images and img_path in self.image_cache:
            image = self.image_cache[img_path].copy()
        else:
            try:
                image = Image.open(img_path).convert('RGB')
                if self.cache_images:
                    self.image_cache[img_path] = image.copy()
            except Exception as e:
                print(f"Error loading {img_path}: {e}")
                # Return a black image as fallback
                image = Image.new('RGB', (224, 224), color='black')
        
        label = self.dataframe.iloc[real_idx]['encoded_label']
        
        if self.transform:
            if isinstance(self.transform, A.Compose):
                # Albumentations transform
                image_np = np.array(image)
                transformed = self.transform(image=image_np)
                image = transformed['image']
            else:
                # Torchvision transform
                image = self.transform(image)
        
        return image, label

# Enhanced data splitting with stratification
print("🔄 Creating optimized train/validation splits...")
train_df, val_df = train_test_split(
    df, test_size=0.2, random_state=42, 
    stratify=df['label']
)

# Create optimized datasets
train_dataset = OptimizedAnimalDataset(
    train_df, f'{BASE_PATH}/labeled_data/images', 
    train_transforms, cache_images=False  # Don't cache training images due to augmentations
)

val_dataset = OptimizedAnimalDataset(
    val_df, f'{BASE_PATH}/labeled_data/images', 
    val_transforms, cache_images=True  # Cache validation images for speed
)

# Calculate optimal batch size based on available memory
if torch.cuda.is_available():
    gpu_memory_gb = torch.cuda.get_device_properties(0).total_memory / 1024**3
    optimal_batch_size = min(64, max(16, int(gpu_memory_gb * 8)))  # Heuristic
else:
    optimal_batch_size = min(32, NUM_WORKERS * 4)

print(f"🎯 Optimal batch size: {optimal_batch_size}")

# Create optimized data loaders with maximum workers
train_loader = DataLoader(
    train_dataset, 
    batch_size=optimal_batch_size, 
    shuffle=True, 
    num_workers=NUM_WORKERS,
    pin_memory=torch.cuda.is_available(),
    persistent_workers=True if NUM_WORKERS > 0 else False,
    prefetch_factor=2 if NUM_WORKERS > 0 else 2
)

val_loader = DataLoader(
    val_dataset, 
    batch_size=optimal_batch_size, 
    shuffle=False, 
    num_workers=NUM_WORKERS//2,  # Use fewer workers for validation
    pin_memory=torch.cuda.is_available(),
    persistent_workers=True if NUM_WORKERS > 0 else False,
    prefetch_factor=2 if NUM_WORKERS > 0 else 2
)

print(f"✅ Data loaders created with {NUM_WORKERS} workers")

In [None]:
# 🏗️ Enhanced ResNet-18 with Optimizations
class EnhancedResNet18(nn.Module):
    def __init__(self, num_classes, pretrained=True, dropout_rate=0.5):
        super(EnhancedResNet18, self).__init__()
        
        # Load pretrained ResNet-18
        self.backbone = models.resnet18(weights='IMAGENET1K_V1' if pretrained else None)
        
        # Remove the final classification layer
        num_features = self.backbone.fc.in_features
        self.backbone.fc = nn.Identity()
        
        # Enhanced classifier head
        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Dropout(dropout_rate),
            nn.Linear(num_features, num_features // 2),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(num_features // 2),
            nn.Dropout(dropout_rate / 2),
            nn.Linear(num_features // 2, num_classes)
        )
        
        # Initialize classifier weights
        for m in self.classifier.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                nn.init.zeros_(m.bias)
    
    def forward(self, x):
        features = self.backbone(x)
        return self.classifier(features)

# Create enhanced model
print("🏗️ Creating enhanced ResNet-18 model...")
model = EnhancedResNet18(num_classes=num_classes, pretrained=True, dropout_rate=0.3)

# Move to device and optimize
model = model.to(device)
if torch.cuda.device_count() > 1:
    print(f"🔥 Using {torch.cuda.device_count()} GPUs with DataParallel")
    model = nn.DataParallel(model)

# Compile model for PyTorch 2.0+ optimization
if hasattr(torch, 'compile'):
    try:
        model = torch.compile(model)
        print("⚡ Model compiled with PyTorch 2.0 optimization")
    except:
        print("⚠️ PyTorch compile not available, using standard model")

print(f"🎯 Model created with {sum(p.numel() for p in model.parameters()):,} parameters")

In [None]:
# 🔥 Advanced Optimizers & Loss Functions
print("🔥 Setting up advanced optimizers and loss functions...")

# Enhanced loss function with class weights and label smoothing
class EnhancedCrossEntropyLoss(nn.Module):
    def __init__(self, weight=None, label_smoothing=0.1, focal_alpha=0.25, focal_gamma=2.0):
        super().__init__()
        self.weight = weight
        self.label_smoothing = label_smoothing
        self.focal_alpha = focal_alpha
        self.focal_gamma = focal_gamma
        
    def forward(self, inputs, targets):
        # Standard cross entropy with label smoothing
        ce_loss = F.cross_entropy(inputs, targets, weight=self.weight, 
                                 label_smoothing=self.label_smoothing, reduction='none')
        
        # Add focal loss component for hard examples
        pt = torch.exp(-ce_loss)
        focal_weight = self.focal_alpha * (1 - pt) ** self.focal_gamma
        focal_loss = focal_weight * ce_loss
        
        return focal_loss.mean()

# Create enhanced loss function
criterion = EnhancedCrossEntropyLoss(
    weight=class_weights_tensor, 
    label_smoothing=0.1,
    focal_alpha=0.25,
    focal_gamma=2.0
)

# Advanced optimizer with weight decay and gradient clipping
optimizer = optim.AdamW(
    model.parameters(), 
    lr=3e-4,  # Lower initial learning rate
    weight_decay=1e-4,
    betas=(0.9, 0.999),
    eps=1e-8
)

# Learning rate scheduler
scheduler = CosineAnnealingLR(
    optimizer, 
    T_max=20,  # Total epochs
    eta_min=1e-6
)

# Backup scheduler for plateau detection
plateau_scheduler = ReduceLROnPlateau(
    optimizer, 
    mode='max', 
    factor=0.5, 
    patience=3, 
    min_lr=1e-7,
    verbose=True
)

print("✅ Advanced optimizers and loss functions configured!")
print(f"📊 Using class-weighted focal loss with label smoothing")
print(f"⚡ AdamW optimizer with cosine annealing scheduler")

In [None]:
# ⚡ Multithreaded Training Loop with All Optimizations
def train_model_optimized(model, train_loader, val_loader, criterion, optimizer, 
                         scheduler, device, epochs=20, patience=5):
    print("🚀 Starting optimized training with all enhancements...")
    
    best_acc = 0
    best_loss = float('inf')
    patience_counter = 0
    train_losses, val_losses = [], []
    train_accs, val_accs = [], []
    
    # Training loop
    for epoch in range(epochs):
        start_time = time.time()
        
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        train_pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} [Train]")
        
        for batch_idx, (images, labels) in enumerate(train_pbar):
            images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            
            # Mixed precision training
            if USE_AMP:
                with autocast():
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                
                # Backward pass with gradient scaling
                optimizer.zero_grad()
                scaler.scale(loss).backward()
                
                # Gradient clipping
                scaler.unscale_(optimizer)
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                
                scaler.step(optimizer)
                scaler.update()
            else:
                # Standard training
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                
                # Gradient clipping
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()
            
            # Statistics
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            # Update progress bar
            current_acc = 100 * correct / total
            train_pbar.set_postfix({
                'Loss': f'{running_loss/(batch_idx+1):.4f}',
                'Acc': f'{current_acc:.2f}%',
                'LR': f'{optimizer.param_groups[0]["lr"]:.2e}'
            })
        
        train_loss = running_loss / len(train_loader)
        train_acc = 100 * correct / total
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            val_pbar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{epochs} [Val]")
            for images, labels in val_pbar:
                images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
                
                if USE_AMP:
                    with autocast():
                        outputs = model(images)
                        loss = criterion(outputs, labels)
                else:
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
                
                val_pbar.set_postfix({
                    'Loss': f'{val_loss/(len(val_pbar)+1):.4f}',
                    'Acc': f'{100*val_correct/val_total:.2f}%'
                })
        
        val_loss /= len(val_loader)
        val_acc = 100 * val_correct / val_total
        
        # Store metrics
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)
        
        # Learning rate scheduling
        scheduler.step()
        plateau_scheduler.step(val_acc)
        
        # Calculate epoch time
        epoch_time = time.time() - start_time
        
        # Print epoch summary
        print(f'Epoch {epoch+1}/{epochs}:')
        print(f'  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
        print(f'  Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')
        print(f'  Time: {epoch_time:.2f}s, LR: {optimizer.param_groups[0]["lr"]:.2e}')
        
        # Save best model
        if val_acc > best_acc:
            best_acc = val_acc
            best_loss = val_loss
            patience_counter = 0
            
            # Save model
            if hasattr(model, 'module'):  # DataParallel
                torch.save(model.module.state_dict(), f'{BASE_PATH}/best_multithreaded_resnet18.pth')
            else:
                torch.save(model.state_dict(), f'{BASE_PATH}/best_multithreaded_resnet18.pth')
            
            print(f'  ✅ New best validation accuracy: {best_acc:.2f}%')
        else:
            patience_counter += 1
            print(f'  ⏳ Patience: {patience_counter}/{patience}')
        
        # Early stopping
        if patience_counter >= patience:
            print(f'🛑 Early stopping triggered after {epoch+1} epochs')
            break
        
        # Memory cleanup
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()
        
        print('-' * 60)
    
    print(f'🎯 Training completed!')
    print(f'📊 Best Validation Accuracy: {best_acc:.2f}%')
    print(f'📉 Best Validation Loss: {best_loss:.4f}')
    
    return model, {
        'train_losses': train_losses,
        'val_losses': val_losses,
        'train_accs': train_accs,
        'val_accs': val_accs,
        'best_acc': best_acc,
        'best_loss': best_loss
    }

# Start optimized training
print("🔥 Starting multithreaded optimized training...")
model, training_history = train_model_optimized(
    model, train_loader, val_loader, criterion, optimizer, 
    scheduler, device, epochs=20, patience=5
)

In [None]:
# 📊 Performance Testing with Test Time Augmentation (TTA)
def test_model_with_tta(model, val_loader, device, use_tta=True):
    print("📊 Testing model performance with TTA...")
    
    model.eval()
    correct = 0
    total = 0
    class_correct = list(0. for i in range(num_classes))
    class_total = list(0. for i in range(num_classes))
    all_predictions = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc="Testing"):
            if use_tta:
                # Test Time Augmentation
                batch_predictions = []
                
                for tta_transform in tta_transforms:
                    tta_images = []
                    for img in images:
                        # Convert tensor back to numpy for albumentations
                        img_np = img.permute(1, 2, 0).numpy()
                        img_np = (img_np * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])) * 255
                        img_np = img_np.astype(np.uint8)
                        
                        # Apply TTA transform
                        transformed = tta_transform(image=img_np)
                        tta_images.append(transformed['image'])
                    
                    tta_batch = torch.stack(tta_images).to(device)
                    
                    if USE_AMP:
                        with autocast():
                            outputs = model(tta_batch)
                    else:
                        outputs = model(tta_batch)
                    
                    batch_predictions.append(F.softmax(outputs, dim=1))
                
                # Average TTA predictions
                outputs = torch.stack(batch_predictions).mean(dim=0)
                predicted = outputs.argmax(dim=1)
            else:
                # Standard inference
                images = images.to(device)
                if USE_AMP:
                    with autocast():
                        outputs = model(images)
                else:
                    outputs = model(images)
                predicted = outputs.argmax(dim=1)
            
            labels = labels.to(device)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            # Store predictions for analysis
            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
            # Per-class accuracy
            c = (predicted == labels).squeeze()
            for i in range(labels.size(0)):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1
    
    # Overall accuracy
    overall_acc = 100 * correct / total
    print(f'🎯 Overall Test Accuracy: {overall_acc:.2f}%')
    
    # Per-class accuracy
    print('\n📊 Per-class Accuracy:')
    class_accuracies = []
    for i in range(num_classes):
        class_name = label_encoder.inverse_transform([i])[0]
        if class_total[i] > 0:
            acc = 100 * class_correct[i] / class_total[i]
            class_accuracies.append(acc)
            print(f'   {class_name}: {acc:.2f}% ({int(class_correct[i])}/{int(class_total[i])})')
        else:
            class_accuracies.append(0.0)
    
    # Classification report
    try:
        from sklearn.metrics import classification_report
        report = classification_report(all_labels, all_predictions, 
                                     target_names=label_encoder.classes_, 
                                     output_dict=True)
        print(f'\n📈 Macro F1-Score: {report["macro avg"]["f1-score"]:.4f}')
        print(f'📈 Weighted F1-Score: {report["weighted avg"]["f1-score"]:.4f}')
    except:
        print("⚠️ Could not generate classification report")
    
    return overall_acc, class_accuracies, all_predictions, all_labels

# Load best model and test
print("📥 Loading best model for testing...")
if hasattr(model, 'module'):  # DataParallel
    model.module.load_state_dict(torch.load(f'{BASE_PATH}/best_multithreaded_resnet18.pth', map_location=device))
else:
    model.load_state_dict(torch.load(f'{BASE_PATH}/best_multithreaded_resnet18.pth', map_location=device))

# Test with TTA
test_acc_tta, class_accs_tta, preds_tta, labels_test = test_model_with_tta(model, val_loader, device, use_tta=True)

print(f"\n🎉 Final Test Results:")
print(f"🎯 Test Accuracy with TTA: {test_acc_tta:.2f}%")
print(f"📊 Average Class Accuracy: {np.mean(class_accs_tta):.2f}%")

In [None]:
# 🚀 Phase 2 Training with Advanced Pseudo-Labeling
class MultithreadedUnlabeledDataset(Dataset):
    def __init__(self, images_dir, transform=None):
        self.images_dir = images_dir
        self.transform = transform
        self.image_files = [f for f in os.listdir(images_dir) 
                           if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        print(f"📁 Found {len(self.image_files)} unlabeled images")
    
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.images_dir, img_name)
        
        try:
            image = Image.open(img_path).convert('RGB')
        except Exception as e:
            print(f"Error loading {img_path}: {e}")
            image = Image.new('RGB', (224, 224), color='black')
        
        if self.transform:
            if isinstance(self.transform, A.Compose):
                image_np = np.array(image)
                transformed = self.transform(image=image_np)
                image = transformed['image']
            else:
                image = self.transform(image)
        
        return image, img_name

def generate_pseudo_labels_advanced(model, unlabeled_loader, confidence_threshold=0.85):
    print(f"🔍 Generating pseudo labels with confidence >= {confidence_threshold}...")
    
    model.eval()
    pseudo_labels = []
    
    with torch.no_grad():
        for images, img_names in tqdm(unlabeled_loader, desc="Pseudo-labeling"):
            images = images.to(device, non_blocking=True)
            
            if USE_AMP:
                with autocast():
                    outputs = model(images)
            else:
                outputs = model(images)
            
            probs = F.softmax(outputs, dim=1)
            max_probs, predicted = torch.max(probs, 1)
            
            for i, (prob, pred, img_name) in enumerate(zip(max_probs, predicted, img_names)):
                confidence = prob.item()
                if confidence >= confidence_threshold:
                    pred_label = label_encoder.inverse_transform([pred.item()])[0]
                    pseudo_labels.append({
                        'img_name': img_name,
                        'label': pred_label,
                        'encoded_label': pred.item(),
                        'confidence': confidence
                    })
    
    return pd.DataFrame(pseudo_labels)

def train_phase2_optimized(model, labeled_df, epochs=10, confidence_threshold=0.85):
    print("🚀 Starting Phase 2 training with advanced pseudo-labeling...")
    
    # Generate pseudo labels
    unlabeled_dir = f'{BASE_PATH}/unlabeled_data/images'
    unlabeled_dataset = MultithreadedUnlabeledDataset(unlabeled_dir, val_transforms)
    unlabeled_loader = DataLoader(
        unlabeled_dataset, 
        batch_size=optimal_batch_size, 
        shuffle=False, 
        num_workers=NUM_WORKERS,
        pin_memory=torch.cuda.is_available()
    )
    
    pseudo_df = generate_pseudo_labels_advanced(model, unlabeled_loader, confidence_threshold)
    print(f"✅ Generated {len(pseudo_df)} pseudo labels")
    
    if len(pseudo_df) == 0:
        print("⚠️ No pseudo labels generated, skipping Phase 2")
        return model
    
    # Combine datasets
    combined_df = pd.concat([labeled_df, pseudo_df], ignore_index=True)
    print(f"📊 Combined dataset: {len(combined_df)} samples ({len(labeled_df)} labeled + {len(pseudo_df)} pseudo)")
    
    # Split combined data
    combined_train_df, combined_val_df = train_test_split(
        combined_df, test_size=0.15, random_state=42, stratify=combined_df['label']
    )
    
    # Combined dataset class
    class CombinedDataset(Dataset):
        def __init__(self, dataframe, labeled_dir, unlabeled_dir, transform=None):
            self.dataframe = dataframe.reset_index(drop=True)
            self.labeled_dir = labeled_dir
            self.unlabeled_dir = unlabeled_dir
            self.transform = transform
        
        def __len__(self):
            return len(self.dataframe)
        
        def __getitem__(self, idx):
            row = self.dataframe.iloc[idx]
            img_name = row['img_name']
            
            # Check directories
            labeled_path = os.path.join(self.labeled_dir, img_name)
            if os.path.exists(labeled_path):
                img_path = labeled_path
            else:
                img_path = os.path.join(self.unlabeled_dir, img_name)
            
            try:
                image = Image.open(img_path).convert('RGB')
            except Exception as e:
                print(f"Error loading {img_path}: {e}")
                image = Image.new('RGB', (224, 224), color='black')
            
            label = row['encoded_label']
            
            if self.transform:
                if isinstance(self.transform, A.Compose):
                    image_np = np.array(image)
                    transformed = self.transform(image=image_np)
                    image = transformed['image']
                else:
                    image = self.transform(image)
            
            return image, label
    
    # Create combined datasets
    combined_train_dataset = CombinedDataset(
        combined_train_df,
        f'{BASE_PATH}/labeled_data/images',
        f'{BASE_PATH}/unlabeled_data/images',
        train_transforms
    )
    
    combined_val_dataset = CombinedDataset(
        combined_val_df,
        f'{BASE_PATH}/labeled_data/images',
        f'{BASE_PATH}/unlabeled_data/images',
        val_transforms
    )
    
    # Create combined loaders
    combined_train_loader = DataLoader(
        combined_train_dataset,
        batch_size=optimal_batch_size,
        shuffle=True,
        num_workers=NUM_WORKERS,
        pin_memory=torch.cuda.is_available(),
        persistent_workers=True if NUM_WORKERS > 0 else False
    )
    
    combined_val_loader = DataLoader(
        combined_val_dataset,
        batch_size=optimal_batch_size,
        shuffle=False,
        num_workers=NUM_WORKERS//2,
        pin_memory=torch.cuda.is_available(),
        persistent_workers=True if NUM_WORKERS > 0 else False
    )
    
    # Phase 2 optimizer with lower learning rate
    phase2_optimizer = optim.AdamW(
        model.parameters(),
        lr=1e-5,  # Much lower learning rate for fine-tuning
        weight_decay=1e-4
    )
    
    phase2_scheduler = CosineAnnealingLR(phase2_optimizer, T_max=epochs, eta_min=1e-7)
    
    # Phase 2 training
    print("🔥 Starting Phase 2 fine-tuning...")
    model, phase2_history = train_model_optimized(
        model, combined_train_loader, combined_val_loader,
        criterion, phase2_optimizer, phase2_scheduler,
        device, epochs=epochs, patience=3
    )
    
    # Save Phase 2 model
    if hasattr(model, 'module'):
        torch.save(model.module.state_dict(), f'{BASE_PATH}/best_multithreaded_resnet18_phase2.pth')
    else:
        torch.save(model.state_dict(), f'{BASE_PATH}/best_multithreaded_resnet18_phase2.pth')
    
    print("✅ Phase 2 training completed!")
    return model

# Run Phase 2 training
model_phase2 = train_phase2_optimized(model, df, epochs=10, confidence_threshold=0.85)

In [None]:
# 📈 Advanced Submission Generation with TTA
def predict_and_save_with_tta(model, test_dir, label_encoder, output_csv, use_tta=True):
    print(f"🔮 Generating predictions for {output_csv}...")
    
    model.eval()
    results = []
    
    # Get all test images
    test_files = [f for f in os.listdir(test_dir) 
                  if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    test_files.sort()
    
    print(f"📁 Found {len(test_files)} test images")
    
    with torch.no_grad():
        for fname in tqdm(test_files, desc="Predicting"):
            img_path = os.path.join(test_dir, fname)
            
            try:
                image = Image.open(img_path).convert('RGB')
            except Exception as e:
                print(f"Error loading {img_path}: {e}")
                # Use most common class as fallback
                pred_label = label_encoder.classes_[0]
                results.append({'path': fname, 'predicted_label': pred_label})
                continue
            
            if use_tta:
                # Test Time Augmentation
                predictions = []
                
                for tta_transform in tta_transforms:
                    # Apply TTA transform
                    image_np = np.array(image)
                    transformed = tta_transform(image=image_np)
                    img_tensor = transformed['image'].unsqueeze(0).to(device)
                    
                    if USE_AMP:
                        with autocast():
                            output = model(img_tensor)
                    else:
                        output = model(img_tensor)
                    
                    predictions.append(F.softmax(output, dim=1))
                
                # Average TTA predictions
                avg_pred = torch.stack(predictions).mean(dim=0)
                pred_idx = avg_pred.argmax(1).item()
            else:
                # Standard prediction
                image_np = np.array(image)
                transformed = val_transforms(image=image_np)
                img_tensor = transformed['image'].unsqueeze(0).to(device)
                
                if USE_AMP:
                    with autocast():
                        output = model(img_tensor)
                else:
                    output = model(img_tensor)
                
                pred_idx = output.argmax(1).item()
            
            pred_label = label_encoder.inverse_transform([pred_idx])[0]
            results.append({'path': fname, 'predicted_label': pred_label})
    
    # Save predictions
    pred_df = pd.DataFrame(results)
    pred_df.to_csv(output_csv, index=False)
    print(f"✅ Saved {len(results)} predictions to {output_csv}")
    
    # Show prediction distribution
    print("📊 Prediction distribution:")
    pred_counts = pred_df['predicted_label'].value_counts()
    for label, count in pred_counts.items():
        print(f"   {label}: {count} predictions ({100*count/len(results):.1f}%)")
    
    return pred_df

# Generate Phase 1 predictions (trained on labeled data only)
print("🎯 Generating Phase 1 predictions...")
test_dir = f'{BASE_PATH}/test_images'

if hasattr(model, 'module'):
    model.module.load_state_dict(torch.load(f'{BASE_PATH}/best_multithreaded_resnet18.pth', map_location=device))
else:
    model.load_state_dict(torch.load(f'{BASE_PATH}/best_multithreaded_resnet18.pth', map_location=device))

phase1_predictions = predict_and_save_with_tta(
    model, test_dir, label_encoder, 
    'phase1_predictions_multithreaded.csv', use_tta=True
)

# Generate Phase 2 predictions (trained on labeled + pseudo-labeled data)
print("\n🎯 Generating Phase 2 predictions...")

if hasattr(model_phase2, 'module'):
    model_phase2.module.load_state_dict(torch.load(f'{BASE_PATH}/best_multithreaded_resnet18_phase2.pth', map_location=device))
else:
    model_phase2.load_state_dict(torch.load(f'{BASE_PATH}/best_multithreaded_resnet18_phase2.pth', map_location=device))

phase2_predictions = predict_and_save_with_tta(
    model_phase2, test_dir, label_encoder, 
    'phase2_predictions_multithreaded.csv', use_tta=True
)

print("\n🎉 All predictions generated successfully!")
print("📁 Files created:")
print("   - phase1_predictions_multithreaded.csv")
print("   - phase2_predictions_multithreaded.csv")

In [None]:
# 📤 Evaluation Integration & Results Submission
import requests

def send_results_for_evaluation(name, csv_file, email):
    """Submit predictions to evaluation server"""
    url = "http://43.205.49.236:5050/inference"
    
    try:
        with open(csv_file, 'rb') as f:
            files = {'file': f}
            data = {'email': email, 'name': name}
            response = requests.post(url, files=files, data=data, timeout=30)
            response.raise_for_status()
            return response.json()
    except requests.exceptions.RequestException as e:
        print(f"❌ Error submitting {csv_file}: {e}")
        return None
    except Exception as e:
        print(f"❌ Unexpected error: {e}")
        return None

# Submit Phase 1 results
print("📤 Submitting Phase 1 results for evaluation...")
phase1_result = send_results_for_evaluation(
    'Hariharan Mudaliar - Multithreaded ResNet18 Phase1', 
    'phase1_predictions_multithreaded.csv', 
    'hm4144@srmist.edu.in'
)

if phase1_result:
    print("✅ Phase 1 Results:")
    print(f"   {phase1_result}")
else:
    print("❌ Phase 1 submission failed")

# Submit Phase 2 results
print("\n📤 Submitting Phase 2 results for evaluation...")
phase2_result = send_results_for_evaluation(
    'Hariharan Mudaliar - Multithreaded ResNet18 Phase2', 
    'phase2_predictions_multithreaded.csv', 
    'hm4144@srmist.edu.in'
)

if phase2_result:
    print("✅ Phase 2 Results:")
    print(f"   {phase2_result}")
else:
    print("❌ Phase 2 submission failed")

print("\n" + "="*80)
print("🎉 MULTITHREADED RESNET-18 OPTIMIZATION COMPLETE!")
print("="*80)
print(f"🔥 Features implemented:")
print(f"   ✅ Maximum CPU/GPU utilization ({NUM_WORKERS} workers)")
print(f"   ✅ Advanced data augmentations (Albumentations)")
print(f"   ✅ Mixed precision training (AMP)")
print(f"   ✅ Class-weighted focal loss with label smoothing")
print(f"   ✅ AdamW optimizer with cosine annealing")
print(f"   ✅ Gradient clipping and early stopping")
print(f"   ✅ Test Time Augmentation (TTA)")
print(f"   ✅ Advanced pseudo-labeling for Phase 2")
print(f"   ✅ Model compilation optimization")
print(f"   ✅ Memory optimization and garbage collection")
print(f"   ✅ Comprehensive performance monitoring")
print("\n📊 Performance Summary:")
if 'training_history' in locals():
    print(f"   🎯 Best Validation Accuracy: {training_history['best_acc']:.2f}%")
    print(f"   📉 Best Validation Loss: {training_history['best_loss']:.4f}")
if 'test_acc_tta' in locals():
    print(f"   🔮 Test Accuracy with TTA: {test_acc_tta:.2f}%")
print(f"\n📁 Generated Files:")
print(f"   📄 phase1_predictions_multithreaded.csv")
print(f"   📄 phase2_predictions_multithreaded.csv") 
print(f"   💾 best_multithreaded_resnet18.pth")
print(f"   💾 best_multithreaded_resnet18_phase2.pth")
print("\n🚀 Ready for submission to evaluation server!")
print("="*80)