In [None]:
import torch
import numpy as np
import torch.nn as nn
from tqdm import tqdm
import torch.optim as optim
import matplotlib.pyplot as plt
from collections import defaultdict
from torch.utils.data import DataLoader, random_split, Dataset

## Dataset

In [None]:
def custom_collate(batch):
    """Custom collate function for digit dataset"""
    transformed = torch.stack([item['transformed'] for item in batch])
    labels = torch.stack([item['label'] for item in batch])
    return {
        'transformed': transformed,
        'label': labels
    }

class DigitDataset(Dataset):
    """Dataset class for digit data"""
    def __init__(self, images, labels):
        self.images = torch.FloatTensor(images).reshape(-1, 1, 40, 168)
        # Normalize but keep single channel
        self.model_input = (self.images - self.images.mean()) / self.images.std()
        self.labels = torch.LongTensor(labels)
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return {
            'transformed': self.model_input[idx],
            'label': self.labels[idx]
        }

## Model

In [None]:
class CustomCNN(nn.Module):
    def __init__(self, num_classes=37):  # 0 to 36 inclusive
        super().__init__()
        
        # Feature extraction layers
        self.features = nn.Sequential(
            # First conv block
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.25),
            
            # Second conv block
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.25),
            
            # Third conv block
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.25)
        )
        
        # Calculate size of flattened features
        self._to_linear = self._get_conv_output_size((1, 40, 168))
        
        # Classification layers
        self.classifier = nn.Sequential(
            nn.Linear(self._to_linear, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )
        
        # Initialize weights
        self._initialize_weights()
    
    def _get_conv_output_size(self, shape):
        """Calculate the size of flattened features after convolutions"""
        batch_size = 1
        input = torch.autograd.Variable(torch.rand(batch_size, *shape))
        output_feat = self.features(input)
        n_size = output_feat.data.view(batch_size, -1).size(1)
        return n_size
    
    def _initialize_weights(self):
        """Initialize model weights"""
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
    
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

## Training

In [None]:
def train_model(train_loader, val_loader, model, criterion, optimizer, device, num_epochs=100, lr=1e-3, batch_size=256):
    """Train the model and return training history"""
    history = defaultdict(list)
    best_val_acc = 0
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0
        
        train_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Train]')
        for batch in train_bar:
            inputs = batch['transformed'].to(device)
            labels = batch['label'].to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted = outputs.max(1)
            train_total += labels.size(0)
            train_correct += predicted.eq(labels).sum().item()
            
            # Update progress bar
            train_bar.set_postfix({
                'loss': f'{train_loss/train_total:.3f}',
                'acc': f'{100.*train_correct/train_total:.2f}%'
            })
        
        # Validation phase
        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            val_bar = tqdm(val_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Val]')
            for batch in val_bar:
                inputs = batch['transformed'].to(device)
                labels = batch['label'].to(device)
                
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                val_total += labels.size(0)
                val_correct += predicted.eq(labels).sum().item()
                
                # Update progress bar
                val_bar.set_postfix({
                    'loss': f'{val_loss/val_total:.3f}',
                    'acc': f'{100.*val_correct/val_total:.2f}%'
                })
        
        # Save metrics
        train_acc = 100. * train_correct / train_total
        val_acc = 100. * val_correct / val_total
        
        history['train_loss'].append(train_loss / train_total)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss / val_total)
        history['val_acc'].append(val_acc)
        
        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), f'best_digit_sum_cnn_batch{batch_size}_lr{lr}.pt')
            print(f'New best model saved with validation accuracy: {val_acc:.2f}%')
    
    return history

def plot_training_history(history):
    """Plot training and validation metrics"""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    
    # Plot losses
    ax1.plot(history['train_loss'], label='Train Loss')
    ax1.plot(history['val_loss'], label='Validation Loss')
    ax1.set_title('Loss over epochs')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend()
    
    # Plot accuracies
    ax2.plot(history['train_acc'], label='Train Accuracy')
    ax2.plot(history['val_acc'], label='Validation Accuracy')
    ax2.set_title('Accuracy over epochs')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy (%)')
    ax2.legend()
    
    plt.tight_layout()
    plt.savefig('training_history_cnn.png')
    plt.close()

In [1]:
def evaluate_model(model, test_loader, criterion, device):
    """Evaluate model on test set"""
    model.eval()
    test_loss = 0
    test_correct = 0
    test_total = 0
    
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        test_bar = tqdm(test_loader, desc='Testing')
        for batch in test_bar:
            inputs = batch['transformed'].to(device)
            labels = batch['label'].to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            test_total += labels.size(0)
            test_correct += predicted.eq(labels).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
            test_bar.set_postfix({
                'loss': f'{test_loss/test_total:.3f}',
                'acc': f'{100.*test_correct/test_total:.2f}%'
            })
    
    test_acc = 100. * test_correct / test_total
    print(f'\nTest Accuracy: {test_acc:.2f}%')
    print(f'Test Loss: {test_loss/test_total:.3f}')
    
    return test_acc, test_loss/test_total, all_preds, all_labels

def run_hyperparameter_search(train_dataset, val_dataset, test_dataset, device):
    """Run grid search over batch sizes and learning rates"""
    
    # Define hyperparameter grid
    batch_sizes = [64]
    # batch_sizes = [64, 128, 256]
    learning_rates = [1e-4]
    # learning_rates = [1e-3, 5e-4, 1e-4]
    
    # Store results
    results = []
    
    # Open file for logging results
    with open('hyperparameter_search_results.txt', 'w') as f:
        f.write("Hyperparameter Search Results\n")
        f.write("============================\n\n")
        
        for batch_size in tqdm(batch_sizes, desc="Batch sizes"):
            for lr in learning_rates:
                print(f"\nTesting batch_size={batch_size}, learning_rate={lr}")
                f.write(f"\nBatch Size: {batch_size}, Learning Rate: {lr}\n")
                f.write("-----------------------------------------\n")
                
                # Create data loaders
                train_loader = DataLoader(
                    train_dataset, 
                    batch_size=batch_size, 
                    shuffle=True, 
                    collate_fn=custom_collate,
                    num_workers=4
                )
                val_loader = DataLoader(
                    val_dataset, 
                    batch_size=batch_size, 
                    shuffle=False, 
                    collate_fn=custom_collate,
                    num_workers=4
                )
                test_loader = DataLoader(
                    test_dataset, 
                    batch_size=batch_size, 
                    shuffle=False, 
                    collate_fn=custom_collate,
                    num_workers=4
                )
                
                # Initialize model
                model = CustomCNN().to(device)
                
                # Define loss function and optimizer
                criterion = nn.CrossEntropyLoss()
                optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=5e-4)
                
                # Train model
                history = train_model(
                    train_loader,
                    val_loader,
                    model,
                    criterion,
                    optimizer,
                    device,
                    num_epochs=100,
                    lr=lr,
                    batch_size=batch_size
                )
                
                # Save training curves for this configuration
                plt.figure(figsize=(15, 5))
                plt.subplot(1, 2, 1)
                plt.plot(history['train_loss'], label='Train Loss')
                plt.plot(history['val_loss'], label='Val Loss')
                plt.title(f'Loss (batch={batch_size}, lr={lr})')
                plt.xlabel('Epoch')
                plt.ylabel('Loss')
                plt.legend()
                
                plt.subplot(1, 2, 2)
                plt.plot(history['train_acc'], label='Train Acc')
                plt.plot(history['val_acc'], label='Val Acc')
                plt.title(f'Accuracy (batch={batch_size}, lr={lr})')
                plt.xlabel('Epoch')
                plt.ylabel('Accuracy (%)')
                plt.legend()
                
                plt.tight_layout()
                plt.savefig(f'training_curves_batch{batch_size}_lr{lr}.png')
                plt.close()
                
                # Evaluate on test set
                test_acc, test_loss, preds, labels = evaluate_model(
                    model, test_loader, criterion, device
                )
                
                # Store results
                result = {
                    'batch_size': batch_size,
                    'learning_rate': lr,
                    'final_train_loss': history['train_loss'][-1],
                    'final_val_loss': history['val_loss'][-1],
                    'final_train_acc': history['train_acc'][-1],
                    'final_val_acc': history['val_acc'][-1],
                    'test_acc': test_acc,
                    'test_loss': test_loss
                }
                results.append(result)
                
                # Log results
                f.write(f"Final Train Loss: {result['final_train_loss']:.4f}\n")
                f.write(f"Final Val Loss: {result['final_val_loss']:.4f}\n")
                f.write(f"Final Train Accuracy: {result['final_train_acc']:.2f}%\n")
                f.write(f"Final Val Accuracy: {result['final_val_acc']:.2f}%\n")
                f.write(f"Test Accuracy: {result['test_acc']:.2f}%\n")
                f.write(f"Test Loss: {result['test_loss']:.4f}\n\n")
                    
    # Find best configuration
    best_result = max(results, key=lambda x: x['test_acc'])
    print("\nBest Configuration:")
    print(f"Batch Size: {best_result['batch_size']}")
    print(f"Learning Rate: {best_result['learning_rate']}")
    print(f"Test Accuracy: {best_result['test_acc']:.2f}%")
    
    # Save all results to numpy file
    np.save('hyperparameter_search_results.npy', results)
    
    return results

def main():
    # Set random seed for reproducibility
    torch.manual_seed(42)
    
    # Load data
    print("Loading data...")
    data_path = '/scratch/gaurav.bhole/MLNS_data/'
    
    # Load data files with progress bar
    data_files = {
        'data0': 'data0.npy',
        'data1': 'data1.npy',
        'data2': 'data2.npy',
        'lab0': 'lab0.npy',
        'lab1': 'lab1.npy',
        'lab2': 'lab2.npy'
    }
    
    loaded_data = {}
    for name, filename in tqdm(data_files.items(), desc="Loading data files"):
        loaded_data[name] = np.load(data_path + filename)
    
    # Combine the data
    print("\nCombining datasets...")
    train_data = np.concatenate(
        (loaded_data['data0'], loaded_data['data1'], loaded_data['data2']), 
        axis=0
    )
    lab_data = np.concatenate(
        (loaded_data['lab0'], loaded_data['lab1'], loaded_data['lab2']), 
        axis=0
    )
    
    print(f"Final data shapes - Images: {train_data.shape}, Labels: {lab_data.shape}")
    
    # Create dataset
    print("\nCreating dataset...")
    dataset = DigitDataset(train_data, lab_data)
    
    # Calculate split sizes
    total_size = len(dataset)
    train_size = int(0.75 * total_size)
    val_size = int(0.10 * total_size)
    test_size = total_size - train_size - val_size
    
    print(f"\nSplitting dataset:")
    print(f"Train size: {train_size}")
    print(f"Validation size: {val_size}")
    print(f"Test size: {test_size}")
    
    # Split dataset
    train_dataset, val_dataset, test_dataset = random_split(
        dataset, [train_size, val_size, test_size]
    )
    
    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'\nUsing device: {device}')
    
    # Run hyperparameter search
    print("\nStarting hyperparameter search...")
    results = run_hyperparameter_search(train_dataset, val_dataset, test_dataset, device)
    
    # Plot final results
    print("\nPlotting hyperparameter search results...")
    plt.figure(figsize=(12, 6))
    
    # Create scatter plot of results
    batch_sizes = sorted(list(set([r['batch_size'] for r in results])))
    learning_rates = sorted(list(set([r['learning_rate'] for r in results])))
    colors = ['red', 'blue', 'green']
    
    for i, bs in enumerate(batch_sizes):
        bs_results = [r for r in results if r['batch_size'] == bs]
        plt.plot(
            [r['learning_rate'] for r in bs_results],
            [r['test_acc'] for r in bs_results],
            'o-',
            label=f'Batch Size {bs}',
            color=colors[i]
        )
    
    plt.xscale('log')
    plt.xlabel('Learning Rate')
    plt.ylabel('Test Accuracy (%)')
    plt.title('Hyperparameter Search Results')
    plt.legend()
    plt.grid(True)
    plt.savefig('hyperparameter_search_plot.png')
    plt.close()
    
    print("\nDone! Check the results in:")
    print("1. hyperparameter_search_results.txt")
    print("2. hyperparameter_search_results.npy")
    print("3. hyperparameter_search_plot.png")
    print("4. Individual training curves in training_curves_*.png files")

if __name__ == '__main__':
    main()

Loading data...


Loading data files: 100%|██████████| 6/6 [00:00<00:00, 66.66it/s]


Combining datasets...
Final data shapes - Images: (30000, 40, 168), Labels: (30000,)

Creating dataset...






Splitting dataset:
Train size: 22500
Validation size: 3000
Test size: 4500

Using device: cuda

Starting hyperparameter search...


Batch sizes:   0%|          | 0/1 [00:00<?, ?it/s]


Testing batch_size=64, learning_rate=0.0001


Epoch 1/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 18.82it/s, loss=0.051, acc=5.83%]
Epoch 1/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.62it/s, loss=0.050, acc=5.87%]


New best model saved with validation accuracy: 5.87%


Epoch 2/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 20.42it/s, loss=0.050, acc=6.28%]
Epoch 2/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 45.14it/s, loss=0.050, acc=7.00%]


New best model saved with validation accuracy: 7.00%


Epoch 3/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.69it/s, loss=0.050, acc=6.44%]
Epoch 3/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.72it/s, loss=0.050, acc=6.57%]
Epoch 4/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.48it/s, loss=0.050, acc=6.46%]
Epoch 4/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 37.41it/s, loss=0.050, acc=6.97%]
Epoch 5/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.76it/s, loss=0.049, acc=6.44%]
Epoch 5/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 45.45it/s, loss=0.049, acc=7.00%]
Epoch 6/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.74it/s, loss=0.048, acc=7.56%]
Epoch 6/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.19it/s, loss=0.047, acc=9.00%]


New best model saved with validation accuracy: 9.00%


Epoch 7/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.79it/s, loss=0.046, acc=8.57%]
Epoch 7/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 39.26it/s, loss=0.043, acc=10.47%]


New best model saved with validation accuracy: 10.47%


Epoch 8/100 [Train]: 100%|██████████| 352/352 [00:20<00:00, 17.11it/s, loss=0.044, acc=9.95%]
Epoch 8/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 46.21it/s, loss=0.042, acc=12.63%]


New best model saved with validation accuracy: 12.63%


Epoch 9/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 20.10it/s, loss=0.042, acc=11.19%]
Epoch 9/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 39.14it/s, loss=0.044, acc=8.67%]
Epoch 10/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.35it/s, loss=0.041, acc=12.00%]
Epoch 10/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.95it/s, loss=0.040, acc=12.43%]
Epoch 11/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.68it/s, loss=0.040, acc=12.79%]
Epoch 11/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.18it/s, loss=0.039, acc=13.90%]


New best model saved with validation accuracy: 13.90%


Epoch 12/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.42it/s, loss=0.040, acc=13.51%]
Epoch 12/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 41.44it/s, loss=0.038, acc=15.37%]


New best model saved with validation accuracy: 15.37%


Epoch 13/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.43it/s, loss=0.039, acc=14.30%]
Epoch 13/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.40it/s, loss=0.037, acc=16.53%]


New best model saved with validation accuracy: 16.53%


Epoch 14/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 20.00it/s, loss=0.038, acc=14.49%]
Epoch 14/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 39.38it/s, loss=0.037, acc=15.10%]
Epoch 15/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.75it/s, loss=0.038, acc=14.74%]
Epoch 15/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 39.13it/s, loss=0.037, acc=15.67%]
Epoch 16/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.34it/s, loss=0.037, acc=15.37%]
Epoch 16/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 39.08it/s, loss=0.036, acc=17.87%]


New best model saved with validation accuracy: 17.87%


Epoch 17/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.77it/s, loss=0.037, acc=15.83%]
Epoch 17/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 39.06it/s, loss=0.037, acc=15.33%]
Epoch 18/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.67it/s, loss=0.036, acc=16.06%]
Epoch 18/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 37.75it/s, loss=0.035, acc=18.10%]


New best model saved with validation accuracy: 18.10%


Epoch 19/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.42it/s, loss=0.036, acc=16.32%]
Epoch 19/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 37.62it/s, loss=0.036, acc=17.23%]
Epoch 20/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 20.34it/s, loss=0.036, acc=17.05%]
Epoch 20/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.55it/s, loss=0.036, acc=16.60%]
Epoch 21/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.34it/s, loss=0.035, acc=17.51%]
Epoch 21/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.79it/s, loss=0.034, acc=22.43%]


New best model saved with validation accuracy: 22.43%


Epoch 22/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.80it/s, loss=0.035, acc=17.67%]
Epoch 22/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 36.96it/s, loss=0.033, acc=20.90%]
Epoch 23/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 20.56it/s, loss=0.035, acc=18.04%]
Epoch 23/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 35.45it/s, loss=0.034, acc=19.33%]
Epoch 24/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.75it/s, loss=0.034, acc=18.61%]
Epoch 24/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.81it/s, loss=0.032, acc=22.17%]
Epoch 25/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.45it/s, loss=0.034, acc=18.88%]
Epoch 25/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 39.67it/s, loss=0.034, acc=19.30%]
Epoch 26/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.85it/s, loss=0.034, acc=19.62%]
Epoch 26/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 41.67it/s, loss=0.032, acc=23.57%]


New best model saved with validation accuracy: 23.57%


Epoch 27/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.37it/s, loss=0.034, acc=19.75%]
Epoch 27/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.08it/s, loss=0.032, acc=22.83%]
Epoch 28/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.37it/s, loss=0.033, acc=19.96%]
Epoch 28/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 36.98it/s, loss=0.032, acc=22.97%]
Epoch 29/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 20.41it/s, loss=0.033, acc=19.95%]
Epoch 29/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 34.64it/s, loss=0.033, acc=20.57%]
Epoch 30/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.44it/s, loss=0.033, acc=20.89%]
Epoch 30/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 37.25it/s, loss=0.031, acc=24.30%]


New best model saved with validation accuracy: 24.30%


Epoch 31/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.26it/s, loss=0.033, acc=20.78%]
Epoch 31/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.45it/s, loss=0.031, acc=24.63%]


New best model saved with validation accuracy: 24.63%


Epoch 32/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 20.57it/s, loss=0.032, acc=21.04%]
Epoch 32/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.63it/s, loss=0.032, acc=22.07%]
Epoch 33/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.39it/s, loss=0.032, acc=21.25%]
Epoch 33/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.39it/s, loss=0.031, acc=24.63%]
Epoch 34/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.37it/s, loss=0.032, acc=21.50%]
Epoch 34/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 37.76it/s, loss=0.030, acc=26.43%]


New best model saved with validation accuracy: 26.43%


Epoch 35/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 20.59it/s, loss=0.032, acc=21.61%]
Epoch 35/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.90it/s, loss=0.031, acc=24.20%]
Epoch 36/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.51it/s, loss=0.032, acc=22.24%]
Epoch 36/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 37.52it/s, loss=0.030, acc=26.47%]


New best model saved with validation accuracy: 26.47%


Epoch 37/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.39it/s, loss=0.031, acc=22.31%]
Epoch 37/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 37.52it/s, loss=0.030, acc=27.23%]


New best model saved with validation accuracy: 27.23%


Epoch 38/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.69it/s, loss=0.031, acc=22.97%]
Epoch 38/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 39.10it/s, loss=0.030, acc=25.67%]
Epoch 39/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.46it/s, loss=0.031, acc=22.64%]
Epoch 39/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 37.68it/s, loss=0.030, acc=25.50%]
Epoch 40/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.52it/s, loss=0.031, acc=23.89%]
Epoch 40/100 [Val]: 100%|██████████| 47/47 [00:00<00:00, 47.27it/s, loss=0.029, acc=28.60%]


New best model saved with validation accuracy: 28.60%


Epoch 41/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.54it/s, loss=0.031, acc=23.21%]
Epoch 41/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 39.30it/s, loss=0.030, acc=26.57%]
Epoch 42/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.42it/s, loss=0.030, acc=24.30%]
Epoch 42/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 37.45it/s, loss=0.029, acc=29.30%]


New best model saved with validation accuracy: 29.30%


Epoch 43/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.60it/s, loss=0.030, acc=24.08%]
Epoch 43/100 [Val]: 100%|██████████| 47/47 [00:00<00:00, 50.14it/s, loss=0.029, acc=28.73%]
Epoch 44/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.39it/s, loss=0.030, acc=24.04%]
Epoch 44/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.45it/s, loss=0.029, acc=29.13%]
Epoch 45/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.33it/s, loss=0.030, acc=24.70%]
Epoch 45/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 36.93it/s, loss=0.030, acc=26.70%]
Epoch 46/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.61it/s, loss=0.030, acc=25.13%]
Epoch 46/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 35.41it/s, loss=0.028, acc=29.40%]


New best model saved with validation accuracy: 29.40%


Epoch 47/100 [Train]: 100%|██████████| 352/352 [00:20<00:00, 16.98it/s, loss=0.030, acc=25.27%]
Epoch 47/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 37.26it/s, loss=0.028, acc=29.63%]


New best model saved with validation accuracy: 29.63%


Epoch 48/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.66it/s, loss=0.030, acc=24.91%]
Epoch 48/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 39.23it/s, loss=0.028, acc=30.87%]


New best model saved with validation accuracy: 30.87%


Epoch 49/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 20.10it/s, loss=0.029, acc=25.42%]
Epoch 49/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.78it/s, loss=0.029, acc=27.53%]
Epoch 50/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.39it/s, loss=0.030, acc=25.77%]
Epoch 50/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.29it/s, loss=0.028, acc=31.10%]


New best model saved with validation accuracy: 31.10%


Epoch 51/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.35it/s, loss=0.029, acc=26.13%]
Epoch 51/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 41.75it/s, loss=0.028, acc=31.57%]


New best model saved with validation accuracy: 31.57%


Epoch 52/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.30it/s, loss=0.029, acc=26.45%]
Epoch 52/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 39.28it/s, loss=0.027, acc=33.50%]


New best model saved with validation accuracy: 33.50%


Epoch 53/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.38it/s, loss=0.029, acc=26.15%]
Epoch 53/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 40.31it/s, loss=0.028, acc=33.00%]
Epoch 54/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.74it/s, loss=0.029, acc=27.00%]
Epoch 54/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 42.70it/s, loss=0.028, acc=33.17%]
Epoch 55/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.42it/s, loss=0.029, acc=27.04%]
Epoch 55/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.17it/s, loss=0.029, acc=25.87%]
Epoch 56/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.37it/s, loss=0.028, acc=27.38%]
Epoch 56/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.28it/s, loss=0.027, acc=33.20%]
Epoch 57/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.66it/s, loss=0.028, acc=27.45%]
Epoch 57/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 40.59it/s, loss=0.027, acc=31.07%]
Epoch 58/100 [Train]: 100%|██████████| 352/352 [00:18<00:00,

New best model saved with validation accuracy: 34.63%


Epoch 61/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.45it/s, loss=0.028, acc=28.08%]
Epoch 61/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.19it/s, loss=0.027, acc=34.33%]
Epoch 62/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.44it/s, loss=0.028, acc=28.37%]
Epoch 62/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.13it/s, loss=0.027, acc=31.60%]
Epoch 63/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 20.02it/s, loss=0.028, acc=28.94%]
Epoch 63/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 39.73it/s, loss=0.026, acc=34.83%]


New best model saved with validation accuracy: 34.83%


Epoch 64/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.45it/s, loss=0.028, acc=28.71%]
Epoch 64/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 37.89it/s, loss=0.026, acc=36.23%]


New best model saved with validation accuracy: 36.23%


Epoch 65/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.31it/s, loss=0.027, acc=29.20%]
Epoch 65/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 45.29it/s, loss=0.028, acc=29.90%]
Epoch 66/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.77it/s, loss=0.027, acc=29.14%]
Epoch 66/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 37.39it/s, loss=0.026, acc=34.30%]
Epoch 67/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.04it/s, loss=0.027, acc=29.39%]
Epoch 67/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 36.02it/s, loss=0.027, acc=33.23%]
Epoch 68/100 [Train]: 100%|██████████| 352/352 [00:20<00:00, 17.05it/s, loss=0.027, acc=29.58%]
Epoch 68/100 [Val]: 100%|██████████| 47/47 [00:00<00:00, 48.06it/s, loss=0.027, acc=32.90%]
Epoch 69/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.42it/s, loss=0.027, acc=29.77%]
Epoch 69/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 39.30it/s, loss=0.027, acc=31.93%]
Epoch 70/100 [Train]: 100%|██████████| 352/352 [00:18<00:00,

New best model saved with validation accuracy: 36.63%


Epoch 73/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.19it/s, loss=0.027, acc=30.53%]
Epoch 73/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 39.94it/s, loss=0.026, acc=35.87%]
Epoch 74/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.98it/s, loss=0.027, acc=30.79%]
Epoch 74/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 37.35it/s, loss=0.026, acc=36.63%]
Epoch 75/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.18it/s, loss=0.026, acc=31.16%]
Epoch 75/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 39.52it/s, loss=0.026, acc=36.50%]
Epoch 76/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.31it/s, loss=0.026, acc=31.12%]
Epoch 76/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.86it/s, loss=0.025, acc=38.43%]


New best model saved with validation accuracy: 38.43%


Epoch 77/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 20.46it/s, loss=0.026, acc=31.09%]
Epoch 77/100 [Val]: 100%|██████████| 47/47 [00:00<00:00, 56.33it/s, loss=0.026, acc=37.37%]
Epoch 78/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.25it/s, loss=0.026, acc=31.44%]
Epoch 78/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 39.53it/s, loss=0.026, acc=35.93%]
Epoch 79/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.18it/s, loss=0.026, acc=32.48%]
Epoch 79/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 37.13it/s, loss=0.025, acc=38.40%]
Epoch 80/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.61it/s, loss=0.026, acc=31.84%]
Epoch 80/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 35.81it/s, loss=0.026, acc=35.80%]
Epoch 81/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.55it/s, loss=0.026, acc=32.42%]
Epoch 81/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 38.86it/s, loss=0.025, acc=38.00%]
Epoch 82/100 [Train]: 100%|██████████| 352/352 [00:18<00:00,

New best model saved with validation accuracy: 40.27%


Epoch 91/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.27it/s, loss=0.025, acc=34.17%]
Epoch 91/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 39.06it/s, loss=0.025, acc=39.90%]
Epoch 92/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.67it/s, loss=0.025, acc=34.41%]
Epoch 92/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 46.68it/s, loss=0.025, acc=38.87%]
Epoch 93/100 [Train]: 100%|██████████| 352/352 [00:17<00:00, 19.92it/s, loss=0.025, acc=34.94%]
Epoch 93/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 37.87it/s, loss=0.025, acc=38.13%]
Epoch 94/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.24it/s, loss=0.025, acc=33.90%]
Epoch 94/100 [Val]: 100%|██████████| 47/47 [00:01<00:00, 36.87it/s, loss=0.025, acc=38.03%]
Epoch 95/100 [Train]: 100%|██████████| 352/352 [00:18<00:00, 19.14it/s, loss=0.025, acc=34.17%]
Epoch 95/100 [Val]: 100%|██████████| 47/47 [00:00<00:00, 50.72it/s, loss=0.025, acc=37.80%]
Epoch 96/100 [Train]: 100%|██████████| 352/352 [00:17<00:00,


Test Accuracy: 38.84%
Test Loss: 0.025

Best Configuration:
Batch Size: 64
Learning Rate: 0.0001
Test Accuracy: 38.84%

Plotting hyperparameter search results...

Done! Check the results in:
1. hyperparameter_search_results.txt
2. hyperparameter_search_results.npy
3. hyperparameter_search_plot.png
4. Individual training curves in training_curves_*.png files
