In [7]:
#pip installs
!pip3 install -q ipython-autotime
!pip3 install h5py -q

In [8]:
%reload_ext autotime
import numpy as np
from matplotlib import pyplot as plt

import torch
from torch.utils.data import Dataset, DataLoader, random_split
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.utils.class_weight import compute_class_weight
from itertools import islice

import gc
import h5py
from tqdm import tqdm
import librosa
import random

time: 6.28 s (started: 2024-11-19 05:56:00 +00:00)


In [24]:
class SplitDataset(Dataset):
    def __init__(self, h5_file, indices):
        self.h5_file = h5_file
        self.indices = indices
    
    def __len__(self):
        return len(self.indices)
    
    def __getitem__(self, idx):
        with h5py.File(self.h5_file, 'r') as hf:
            index = self.indices[idx]
            spectrogram = hf['spectrograms'][index] / 255.0  # Load one item and normalize
            label = hf['labels'][index]
            spectrogram = torch.tensor(spectrogram, dtype=torch.float32)
            spectrogram = spectrogram.unsqueeze(0)
            label = torch.tensor(label, dtype=torch.long)

            # Call garbage collector periodically
            if idx % 1000 == 0:
                gc.collect()
            
            return spectrogram, label

time: 934 µs (started: 2024-11-19 05:58:26 +00:00)


In [25]:
def create_splits(h5_path, test_split=0.1, val_split=0.2, seed=42):
    """Create dataset splits without loading entire dataset"""
    # Get dataset size without loading data
    with h5py.File(h5_path, 'r') as f:
        total_size = len(f['spectrograms'])
        labels = f['labels'][:]

    labels = labels.argmax(axis=1)
    class_weights = compute_class_weight('balanced', classes=np.unique(labels), y=labels)
    class_weights = torch.tensor(class_weights, dtype=torch.float32)

    # Generate globally shuffled indices
    rng = np.random.RandomState(seed)
    all_indices = np.arange(total_size)
    rng.shuffle(all_indices)
    
    # Calculate split sizes
    test_size = int(test_split * total_size)
    val_size = int(val_split * total_size)
    train_size = total_size - test_size - val_size
    
    # Create index lists for each split
    test_indices = all_indices[:test_size]
    val_indices = all_indices[test_size:test_size + val_size]
    train_indices = all_indices[test_size + val_size:]
    
    # Create datasets using your SplitDataset class
    train_dataset = SplitDataset(h5_path, train_indices)
    val_dataset = SplitDataset(h5_path, val_indices)
    test_dataset = SplitDataset(h5_path, test_indices)
    
    return train_dataset, val_dataset, test_dataset, class_weights

time: 1.21 ms (started: 2024-11-19 05:58:27 +00:00)


In [26]:
def create_data_loaders(h5_path, batch_size=32, test_split=0.1, val_split=0.2,
                       seed=42, num_workers=4):
    """Create data loaders with memory-efficient splitting"""
    
    # Clear memory before creating splits
    gc.collect()
    torch.cuda.empty_cache()
    
    # Create datasets
    train_dataset, val_dataset, test_dataset, class_weights = create_splits(
        h5_path, test_split, val_split, seed
    )
    
    # Create data loaders with optimized settings
    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True,
        prefetch_factor=2,
        persistent_workers=True
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,  # No need to shuffle validation
        num_workers=num_workers,
        pin_memory=True,
        prefetch_factor=2,
        persistent_workers=True
    )
    
    test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,  # No need to shuffle test
        num_workers=num_workers,
        pin_memory=True,
        prefetch_factor=2,
        persistent_workers=True
    )
    
    return train_loader, val_loader, test_loader, class_weights

time: 1.04 ms (started: 2024-11-19 05:58:29 +00:00)


In [27]:
DATASET_SIZE = 105526
BATCH_SIZE = 32
TEST_SPLIT = 0.1
VAL_SPLIT = 0.2
NUM_WORKERS = 4
H5_PATH = '/kaggle/input/raga-ieee-preprocessing/processed_spectrograms.h5'

train_loader, val_loader, test_loader, class_weights = create_data_loaders(
        h5_path=H5_PATH,
        batch_size=BATCH_SIZE,
        test_split=TEST_SPLIT,
        val_split=VAL_SPLIT,
        num_workers=NUM_WORKERS,
        seed=420
    )

print(f"\nDataset splits:")
print(f"Train size: {len(train_loader.dataset)}")
print(f"Val size: {len(val_loader.dataset)}")
print(f"Test size: {len(test_loader.dataset)}")


Dataset splits:
Train size: 73869
Val size: 21105
Test size: 10552
time: 2.37 s (started: 2024-11-19 05:58:30 +00:00)


In [8]:
# from collections import defaultdict

# def label_distribution(dataloader):
#     label_counts = defaultdict(int)
    
#     for _, labels in dataloader:
#         # Convert one-hot encoded labels to class indices
#         label_indices = labels.argmax(dim=1)  # Get the index of the 1 in each one-hot label
        
#         for label in label_indices:
#             label_counts[label.item()] += 1
    
#     # Calculate the distribution as percentages
#     total_count = sum(label_counts.values())
#     label_distribution = {label: count / total_count * 100 for label, count in label_counts.items()}
    
#     print("Label distribution (in percentages):")
#     for label, percentage in label_distribution.items():
#         print(f"Class {label}: {percentage:.2f}% ({label_counts[label]} samples)")
    
#     return label_counts


# x = label_distribution(test_loader)
# plt.bar(x.keys(),x.values())

time: 10.7 ms (started: 2024-11-18 21:17:11 +00:00)


In [9]:
# def plot_mel_spectrogram(mel_spectrogram, sr=16000, hop_length=512, title="Mel Spectrogram"):
#     plt.figure(figsize=(10, 4))
#     librosa.display.specshow(mel_spectrogram, 
#                              sr=sr, 
#                              hop_length=hop_length, 
#                              x_axis='time', 
#                              y_axis='mel')
#     plt.colorbar(format='%+2.0f dB')
#     plt.title(title)
#     plt.tight_layout()
#     plt.show()

time: 9.2 ms (started: 2024-11-18 21:17:11 +00:00)


In [10]:
# # Get one random batch from the train_loader
# data_iter = iter(train_loader)
# spectrograms, labels = next(data_iter)

# # Select a random index within the batch
# random_idx = random.randint(0, spectrograms.size(0) - 1)

# # Get the spectrogram and label at that index
# spectrogram = spectrograms[random_idx].squeeze().numpy()
# label = labels[random_idx].argmax().item()  # Assuming one-hot encoded labels

# plot_mel_spectrogram(spectrogram, title=f"Mel Spectrogram for {label}")

time: 4.88 ms (started: 2024-11-18 21:17:11 +00:00)


In [11]:
# class RagaFeatureExtractorCNN(nn.Module):
#     def __init__(self, input_channels=1, num_classes=10):
#         super(RagaFeatureExtractorCNN, self).__init__()
        
#         # 1st Block: Conv + BatchNorm + ReLU + MaxPool
#         self.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, padding=3)
#         self.bn1 = nn.BatchNorm2d(64)
        
#         # 2nd Block: Conv + BatchNorm + ReLU + MaxPool
#         self.conv2 = nn.Conv2d(64, 128, kernel_size=5, padding=2)
#         self.bn2 = nn.BatchNorm2d(128)
        
#         # 3rd Block: Depthwise Separable Convolution + BatchNorm + ReLU + MaxPool
#         self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
#         self.bn3 = nn.BatchNorm2d(256)
        
#         # 4th Block: Depthwise Separable Convolution + BatchNorm + ReLU + MaxPool
#         self.conv4 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
#         self.bn4 = nn.BatchNorm2d(512)
        
#         # 5th Block: Conv + BatchNorm + ReLU
#         self.conv5 = nn.Conv2d(512, 1024, kernel_size=3, padding=1)
#         self.bn5 = nn.BatchNorm2d(1024)
        
#         # 6th Block: Conv + BatchNorm + ReLU
#         self.conv6 = nn.Conv2d(1024, 2048, kernel_size=3, padding=1)
#         self.bn6 = nn.BatchNorm2d(2048)
        
#         # Global Average Pooling Layer
#         self.global_pool = nn.AdaptiveAvgPool2d(1)
        
#         # Fully connected layers for classification
#         self.fc = nn.Linear(2048, num_classes)
        
#         # Dropout for regularization
#         self.dropout = nn.Dropout(0.5)

#         # Attention (Squeeze-and-Excitation) blocks
#         self.se1 = SEBlock(64)
#         self.se2 = SEBlock(128)
#         self.se3 = SEBlock(256)
#         self.se4 = SEBlock(512)
#         self.se5 = SEBlock(1024)

#     def forward(self, x):
#         x = F.relu(self.bn1(self.conv1(x)))
#         x = self.se1(x)
#         x = F.max_pool2d(x, 2, 2)
        
#         x = F.relu(self.bn2(self.conv2(x)))
#         x = self.se2(x)
#         x = F.max_pool2d(x, 2, 2)
    
#         x = F.relu(self.bn3(self.conv3(x)))
#         x = self.se3(x)
#         x = F.max_pool2d(x, 2, 2)
    
#         x = F.relu(self.bn4(self.conv4(x)))
#         x = self.se4(x)
#         x = F.max_pool2d(x, 2, 2)
        
#         x = F.relu(self.bn5(self.conv5(x)))
#         x = self.se5(x)
        
#         x = F.relu(self.bn6(self.conv6(x)))
    
#         x = self.global_pool(x)
    
#         x = torch.flatten(x, 1)  # Flatten to feed into FC layer
    
#         x = self.dropout(self.fc(x))
#         return x

# class SEBlock(nn.Module):
#     def __init__(self, in_channels, reduction=16):
#         super(SEBlock, self).__init__()
#         self.fc1 = nn.Linear(in_channels, in_channels // reduction)
#         self.fc2 = nn.Linear(in_channels // reduction, in_channels)
        
#     def forward(self, x):
#         # Squeeze operation
#         b, c, _, _ = x.size()
#         y = F.adaptive_avg_pool2d(x, (1, 1))
#         y = y.view(b, c)
        
#         # Excitation operation
#         y = F.relu(self.fc1(y))
#         y = torch.sigmoid(self.fc2(y)).view(b, c, 1, 1)
        
#         # Scale the feature map
#         return x * y

time: 5.54 ms (started: 2024-11-18 21:17:11 +00:00)


In [13]:
class CNNForSpectrograms(nn.Module):
    def __init__(self):
        super(CNNForSpectrograms, self).__init__()
        
        # First block
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)  # (256x256) -> (256x256)
        self.pool1 = nn.MaxPool2d(2, 2)  # (256x256) -> (128x128)
        self.dropout1 = nn.Dropout(0.2)  # Dropout after first block
        
        # Second block
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)  # (128x128) -> (128x128)
        self.pool2 = nn.MaxPool2d(2, 2)  # (128x128) -> (64x64)
        self.dropout2 = nn.Dropout(0.2)  # Dropout after second block
        
        # Third block
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)  # (64x64) -> (64x64)
        self.pool3 = nn.MaxPool2d(2, 2)  # (64x64) -> (32x32)
        self.dropout3 = nn.Dropout(0.3)  # Dropout after third block
        
        # Fourth block
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)  # (32x32) -> (32x32)
        self.pool4 = nn.MaxPool2d(2, 2)  # (32x32) -> (16x16)
        self.dropout4 = nn.Dropout(0.3)  # Dropout after fourth block
        
        # Final fully connected layer to reduce to a 16x16 feature space
        self.fc = nn.Linear(256 * 16 * 16, 1024)  # Flattened 16x16x256 -> 1024
        self.dropout_fc = nn.Dropout(0.4)  # Dropout before final output
        
        # Output layer (for classification)
        self.output = nn.Linear(1024, 10)  # For 10 thaat classes

    def forward(self, x):
        # Forward pass through convolutional blocks
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = self.dropout1(x)  # Apply dropout
        
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = self.dropout2(x)  # Apply dropout
        
        x = F.relu(self.conv3(x))
        x = self.pool3(x)
        x = self.dropout3(x)  # Apply dropout
        
        x = F.relu(self.conv4(x))
        x = self.pool4(x)
        x = self.dropout4(x)  # Apply dropout
        
        # Flatten the output from convolutional layers
        x = x.view(x.size(0), -1)  # Flatten: batch_size x (256*16*16)
        
        # Fully connected layer with dropout
        x = F.relu(self.fc(x))
        x = self.dropout_fc(x)  # Apply dropout
        
        # Output layer for classification
        x = self.output(x)
        
        return x

time: 2.29 ms (started: 2024-11-19 05:57:17 +00:00)


In [18]:
def validate_model(model, val_loader, criterion, device):
    model.eval()
    val_loss = 0.0
    val_predictions = []
    val_labels = []
    
    # Create progress bar for validation
    val_pbar = tqdm(val_loader, desc='Validation')
    
    with torch.no_grad():
        for spectrograms, labels in val_pbar:
            spectrograms = spectrograms.to(device)
            labels = torch.argmax(labels, dim=1).to(device).long()
            
            outputs = model(spectrograms)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_predictions.extend(predicted.cpu().numpy())
            val_labels.extend(labels.cpu().numpy())
            
            val_pbar.set_postfix({'loss': loss.item()})
    
    val_loss = val_loss / len(val_loader)
    val_accuracy = accuracy_score(val_labels, val_predictions)
    
    return val_loss, val_accuracy

time: 1.07 ms (started: 2024-11-19 05:58:05 +00:00)


In [30]:
def test_model(model, test_loader, device):
    model.eval()
    test_predictions = []
    test_labels = []
    
    # Create progress bar for testing
    test_pbar = tqdm(test_loader, desc='Testing')
    
    with torch.no_grad():
        for spectrograms, labels in test_pbar:
            spectrograms = spectrograms.to(device)
            labels = labels.to(device)
            
            outputs = model(spectrograms)
            _, predicted = torch.max(outputs.data, 1)  # Get predicted class indices
            
            test_predictions.extend(predicted.cpu().numpy())  # Convert to numpy array of class indices
            test_labels.extend(torch.argmax(labels, dim=1).cpu().numpy())  # Convert one-hot to class indices
    
    # Convert to numpy arrays for consistency
    test_predictions = np.array(test_predictions)
    test_labels = np.array(test_labels)
    
    # Calculate metrics
    accuracy = accuracy_score(test_labels, test_predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(test_labels, test_predictions, average='weighted')
    
    print("\nTest Results:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    
    return accuracy, precision, recall, f1

time: 1.47 ms (started: 2024-11-19 06:20:06 +00:00)


In [20]:
def plot_training_history(train_losses, val_losses, train_accuracies, val_accuracies):
    plt.figure(figsize=(12, 4))
    
    # Plot losses
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    # Plot accuracies
    plt.subplot(1, 2, 2)
    plt.plot(train_accuracies, label='Train Accuracy')
    plt.plot(val_accuracies, label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    
    plt.tight_layout()
    plt.show()

time: 806 µs (started: 2024-11-19 05:58:06 +00:00)


In [21]:
def train_model(model, train_loader, val_loader, num_epochs, class_weights, device='cuda'):
    # Initialize model, loss, and optimizer
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = optim.Adam(model.parameters(), lr=3e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)
    
    # Initialize lists to store metrics
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []
    best_val_loss = float('inf')
    epochs_without_improvement = 0
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        train_predictions = []
        train_labels = []
        
        # Create progress bar for training
        train_pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Train]')
        
        for spectrograms, labels in train_pbar:
            # Move data to device
            spectrograms = spectrograms.to(device)
            labels = torch.argmax(labels, dim=1).to(device).long()
            # Zero the gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(spectrograms)
            loss = criterion(outputs, labels)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            # Update metrics
            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            train_predictions.extend(predicted.cpu().numpy())
            train_labels.extend(labels.cpu().numpy())
            
            # Update progress bar
            train_pbar.set_postfix({'loss': loss.item()})
        
        # Calculate epoch metrics
        train_loss = train_loss / len(train_loader)
        train_accuracy = accuracy_score(train_labels, train_predictions)
        
        # Validation phase
        val_loss, val_accuracy = validate_model(model, val_loader, criterion, device)
        
        # Learning rate scheduling
        scheduler.step(val_loss)
        
        # Store metrics
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accuracies.append(train_accuracy)
        val_accuracies.append(val_accuracy)
        
        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_without_improvement = 0
            # Save best model
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_loss': val_loss,
            }, 'best_model.pth')
        else:
            epochs_without_improvement += 1
            if epochs_without_improvement >= 5:  # Early stopping patience
                print(f'\nEarly stopping triggered after {epoch+1} epochs')
                break
        
        # Print epoch metrics
        print(f'\nEpoch {epoch+1}/{num_epochs}:')
        print(f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}')
        print(f'Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

        torch.cuda.empty_cache()
    
    return train_losses, val_losses, train_accuracies, val_accuracies

time: 3.62 ms (started: 2024-11-19 05:58:07 +00:00)


In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# model = RagaFeatureExtractorCNN(input_channels=1, num_classes=10).to(device)
model = CNNForSpectrograms().to(device)
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs!")
    model = nn.DataParallel(model)

print(model)

Using 2 GPUs!
DataParallel(
  (module): CNNForSpectrograms(
    (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (dropout1): Dropout(p=0.2, inplace=False)
    (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (dropout2): Dropout(p=0.2, inplace=False)
    (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (dropout3): Dropout(p=0.3, inplace=False)
    (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (dropout4): Dropout(p=0.3, inplace=False)
    (fc): Linear(in_features=65536, out_features=1024, bias=True)
    (dropout_fc): Dr

In [None]:
history = train_model(model, train_loader, val_loader, num_epochs=25, class_weights=class_weights.to(device), device=device)

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Epoch 1/25 [Train]: 100%|██████████| 2309/2309 [08:27<00:00,  4.55it/s, loss=2.17]
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Validation: 100%|██████████| 660/660 [02:16<00:00,  4.84it/s, loss=2.11]



Epoch 1/25:
Train Loss: 2.2801, Train Accuracy: 0.1547
Val Loss: 2.2010, Val Accuracy: 0.1734


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Epoch 2/25 [Train]: 100%|██████████| 2309/2309 [09:25<00:00,  4.08it/s, loss=0.682]
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Validation: 100%|██████████| 660/660 [02:20<00:00,  4.69it/s, loss=0.946]



Epoch 2/25:
Train Loss: 1.6638, Train Accuracy: 0.4095
Val Loss: 1.0369, Val Accuracy: 0.6669


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Epoch 3/25 [Train]: 100%|██████████| 2309/2309 [08:16<00:00,  4.65it/s, loss=0.92] 
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Validation: 100%|██████████| 660/660 [02:52<00:00,  3.82it/s, loss=0.634]



Epoch 3/25:
Train Loss: 0.9485, Train Accuracy: 0.6731
Val Loss: 0.6292, Val Accuracy: 0.7949


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Epoch 4/25 [Train]: 100%|██████████| 2309/2309 [08:22<00:00,  4.59it/s, loss=1.25] 
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Validation: 100%|██████████| 660/660 [02:21<00:00,  4.66it/s, loss=0.452] 



Epoch 4/25:
Train Loss: 0.6561, Train Accuracy: 0.7735
Val Loss: 0.4479, Val Accuracy: 0.8561


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Epoch 5/25 [Train]: 100%|██████████| 2309/2309 [08:19<00:00,  4.62it/s, loss=0.674] 
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Validation: 100%|██████████| 660/660 [02:15<00:00,  4.87it/s, loss=0.296] 



Epoch 5/25:
Train Loss: 0.4962, Train Accuracy: 0.8253
Val Loss: 0.3748, Val Accuracy: 0.8821


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Epoch 6/25 [Train]: 100%|██████████| 2309/2309 [07:59<00:00,  4.81it/s, loss=0.255] 
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Validation: 100%|██████████| 660/660 [02:06<00:00,  5.21it/s, loss=0.117] 



Epoch 6/25:
Train Loss: 0.3984, Train Accuracy: 0.8591
Val Loss: 0.3236, Val Accuracy: 0.8948


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Epoch 7/25 [Train]: 100%|██████████| 2309/2309 [07:46<00:00,  4.95it/s, loss=0.0301]
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Validation: 100%|██████████| 660/660 [02:05<00:00,  5.27it/s, loss=0.0816]



Epoch 7/25:
Train Loss: 0.3344, Train Accuracy: 0.8818
Val Loss: 0.2752, Val Accuracy: 0.9159


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Epoch 8/25 [Train]: 100%|██████████| 2309/2309 [07:26<00:00,  5.18it/s, loss=0.534] 
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Validation: 100%|██████████| 660/660 [02:00<00:00,  5.47it/s, loss=0.0894]



Epoch 8/25:
Train Loss: 0.2873, Train Accuracy: 0.8984
Val Loss: 0.2490, Val Accuracy: 0.9247


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Epoch 9/25 [Train]: 100%|██████████| 2309/2309 [07:48<00:00,  4.93it/s, loss=0.0221]
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Validation: 100%|██████████| 660/660 [02:10<00:00,  5.06it/s, loss=0.0799] 



Epoch 9/25:
Train Loss: 0.2481, Train Accuracy: 0.9105
Val Loss: 0.2428, Val Accuracy: 0.9259


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Epoch 10/25 [Train]: 100%|██████████| 2309/2309 [07:56<00:00,  4.85it/s, loss=0.12]   
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Validation: 100%|██████████| 660/660 [02:19<00:00,  4.74it/s, loss=0.0648] 



Epoch 10/25:
Train Loss: 0.2222, Train Accuracy: 0.9203
Val Loss: 0.2333, Val Accuracy: 0.9307


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Epoch 11/25 [Train]: 100%|██████████| 2309/2309 [08:14<00:00,  4.67it/s, loss=0.31]  
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Validation: 100%|██████████| 660/660 [02:17<00:00,  4.80it/s, loss=0.0377] 



Epoch 11/25:
Train Loss: 0.2040, Train Accuracy: 0.9266
Val Loss: 0.2199, Val Accuracy: 0.9366


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Epoch 12/25 [Train]: 100%|██████████| 2309/2309 [07:55<00:00,  4.86it/s, loss=0.182]  
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Validation: 100%|██████████| 660/660 [02:09<00:00,  5.11it/s, loss=0.0314] 



Epoch 12/25:
Train Loss: 0.1875, Train Accuracy: 0.9323
Val Loss: 0.2162, Val Accuracy: 0.9378


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Epoch 13/25 [Train]:   6%|▌         | 136/2309 [00:29<05:10,  7.00it/s, loss=0.1]   

In [35]:
checkpoint = torch.load('best_model.pth', weights_only=True)
history = model.load_state_dict(checkpoint['model_state_dict'])

val_losses = checkpoint['val_loss']

model.eval()

DataParallel(
  (module): CNNForSpectrograms(
    (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (dropout1): Dropout(p=0.2, inplace=False)
    (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (dropout2): Dropout(p=0.2, inplace=False)
    (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (dropout3): Dropout(p=0.3, inplace=False)
    (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (dropout4): Dropout(p=0.3, inplace=False)
    (fc): Linear(in_features=65536, out_features=1024, bias=True)
    (dropout_fc): Dropout(p=0.4, i

time: 781 ms (started: 2024-11-19 07:27:44 +00:00)


In [33]:
plot_training_history(*history)

TypeError: plot_training_history() missing 2 required positional arguments: 'train_accuracies' and 'val_accuracies'

time: 16.9 ms (started: 2024-11-19 06:28:07 +00:00)


In [32]:
test_metrics = test_model(model, test_loader, device)

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Testing: 100%|██████████| 330/330 [01:21<00:00,  4.05it/s]

Test labels shape: (10552,)
Test predictions shape: (10552,)
Test labels: [0 8 3 9 0 6 3 4 2 4]
Test predictions: [0 8 3 9 0 7 3 2 2 4]

Test Results:
Accuracy: 0.9408
Precision: 0.9410
Recall: 0.9408
F1 Score: 0.9408
time: 1min 21s (started: 2024-11-19 06:22:02 +00:00)



