<a href="https://colab.research.google.com/github/Sazim2019331087/voice_model/blob/main/CNN_RNN_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
print("--- Installing required libraries ---")
!pip install --upgrade pip
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 # Install PyTorch with CUDA support
!pip install pandas scikit-learn joblib tqdm # tqdm for progress bars
!pip install ffmpeg-python # Python wrapper for ffmpeg
!apt-get update && apt-get install -y ffmpeg # Install ffmpeg on Colab for audio processing

--- Installing required libraries ---
Looking in indexes: https://download.pytorch.org/whl/cu118
Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:6 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists... Done
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sou

In [6]:
#Setup and Install Libraries

print("--- Installing required libraries ---")
!pip install --upgrade pip
!pip install pandas scikit-learn joblib tqdm
!pip install ffmpeg-python
!apt-get update && apt-get install -y ffmpeg

# CRITICAL: Re-install PyTorch and TorchAudio to ensure CUDA version compatibility
# This command will usually select the correct torchaudio version automatically.
!pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118
# We specifically target torch==2.0.1 and torchaudio==2.0.2 for cu118,

# After this, restart the runtime as before.
print("Installation complete. Please RESTART YOUR COLAB RUNTIME (Runtime -> Restart session) and then run all cells from the beginning.")

--- Installing required libraries ---
Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:4 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists... Done
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Reading package lists... Done
Bu

In [7]:
# Mount Google Drive and Load Data

from google.colab import drive
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torchaudio
from torchaudio.transforms import MFCC # For MFCC feature extraction
import numpy as np
from tqdm.notebook import tqdm # For progress bars
import joblib # For saving and loading the speaker mapping and model

# Mount Google Drive
print("\n--- Mounting Google Drive ---")
drive.mount('/content/drive')


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py", line 37, in <module>
    ColabKernelApp.launch_instance()
  File "/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py", line 712, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.11/dist-package


--- Mounting Google Drive ---
Mounted at /content/drive


In [8]:
# Define the paths to your data
# IMPORTANT: Make sure these paths match your Google Drive structure exactly.

PROJECT_ROOT_DIR = '/content/drive/MyDrive/project'
CSV_PATH = os.path.join(PROJECT_ROOT_DIR, 'training.csv')
AUDIO_FOLDER_PATH = os.path.join(PROJECT_ROOT_DIR, 'voices')

# Check if the directories and files exist
if not os.path.exists(PROJECT_ROOT_DIR):
    raise FileNotFoundError(f"Error: Project folder '{PROJECT_ROOT_DIR}' not found. Please check the path and your Google Drive structure.")
elif not os.path.exists(CSV_PATH):
    raise FileNotFoundError(f"Error: CSV file '{CSV_PATH}' not found. Please ensure it's in the correct location.")
elif not os.path.exists(AUDIO_FOLDER_PATH):
    raise FileNotFoundError(f"Error: Audio folder '{AUDIO_FOLDER_PATH}' not found. Please check the path and upload your audio files.")
else:
    print(f"Successfully located project folder at: {PROJECT_ROOT_DIR}")

# Load the CSV file into a pandas DataFrame
print("\n--- Loading data from CSV ---")
df = pd.read_csv(CSV_PATH)

# Construct the full path to each audio file
df['audio_path'] = df['audio_file'].apply(lambda x: os.path.join(AUDIO_FOLDER_PATH, x))

# --- IMPORTANT: Filter out missing/unreadable audio files ---
# This step is crucial to prevent errors during audio loading.
print("\n--- Verifying audio file paths and formats... ---")
verified_data_for_df = [] # To store valid rows for the new DataFrame

# Create a temporary MFCC transform to test audio loading and feature shape
test_mfcc_transform = MFCC(
    sample_rate=16000,
    n_mfcc=40, # Assuming 40 MFCCs
    melkwargs={
        'n_fft': 400,
        'hop_length': 160,
        'n_mels': 128
    }
)

problematic_files = []

# Iterate through each row to verify audio files
for idx, row in tqdm(df.iterrows(), total=len(df), desc="Verifying audio files"):
    audio_file_path = row['audio_path']

    if not os.path.exists(audio_file_path):
        problematic_files.append((row['audio_file'], row['email'], "File Not Found"))
        continue

    try:
        waveform, sample_rate = torchaudio.load(audio_file_path, frame_offset=0, num_frames=16000 * 2) # Load first 2 seconds
        if sample_rate != 16000:
            resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
            waveform = resampler(waveform)
        if waveform.shape[0] > 1: # Convert to mono
            waveform = torch.mean(waveform, dim=0, keepdim=True)
        # Ensure waveform is 2D (channels, samples)
        elif waveform.ndim == 1:
            waveform = waveform.unsqueeze(0) # Add channel dimension if it was (samples,)

        # Test MFCC transformation
        temp_mfcc_features = test_mfcc_transform(waveform)

        # Squeeze the channel dimension for the test if it's there
        if temp_mfcc_features.ndim == 3 and temp_mfcc_features.shape[0] == 1:
            temp_mfcc_features = temp_mfcc_features.squeeze(0)

        # Verify it's 2D (num_mfcc, num_frames) after squeezing
        if temp_mfcc_features.ndim != 2:
            raise ValueError(f"MFCC features for {audio_file_path} unexpected shape after squeeze: {temp_mfcc_features.shape}")

        # If successfully processed, add the original row to our verified list
        verified_data_for_df.append(row.to_dict())

    except Exception as e:
        problematic_files.append((row['audio_file'], row['email'], f"Format Error: {e}"))

if problematic_files:
    print(f"\n--- {len(problematic_files)} Problematic audio files found and skipped ---")
    problematic_df = pd.DataFrame(problematic_files, columns=['audio_ffile', 'email', 'Reason'])
    print(problematic_df.to_markdown(index=False, numalign="left", stralign="left"))
    print("\nTip: Use `ffmpeg -i input.wav -ar 16000 -ac 1 -c:a pcm_s16le output_converted.wav` to convert problematic files.")
else:
    print("\nAll audio files verified successfully!")

# Create a new DataFrame with only verified files
if not verified_data_for_df:
    raise ValueError("No valid audio files found after verification. Please check your data.")

existing_files_df = pd.DataFrame(verified_data_for_df)
existing_files_df['speaker_id'] = existing_files_df['email'].astype('category').cat.codes
speaker_mapping = dict(enumerate(existing_files_df['email'].astype('category').cat.categories))
num_speakers = len(speaker_mapping)

print(f"\n--- Speaker Mapping (Total Unique Speakers: {num_speakers}) ---")
print(speaker_mapping)

# --- CRITICAL FIX for ValueError in train_test_split ---
# Split data into training and testing sets
# We use test_size=num_speakers to ensure there's at least one sample per speaker in the test set.
# This is required for stratification when the number of speakers is high and samples per speaker are low.
if len(existing_files_df) < num_speakers * 2: # Check if there are enough samples for a reasonable split
    print("\nWARNING: Dataset has very few samples per speaker. Stratification might be difficult.")
    print(f"Total samples: {len(existing_files_df)}, Unique speakers: {num_speakers}")
    # Fallback to a non-stratified split if stratification is impossible
    # Or adjust test_size to be a fraction that might pass, but not ideal for evaluation
    train_df, test_df = train_test_split(
        existing_files_df,
        test_size=max(1, min(int(0.2 * len(existing_files_df)), num_speakers)), # At least 1, max 20% or num_speakers
        random_state=42,
        stratify=None # Disable stratification as it's problematic with few samples per class
    )
    print("Proceeding with NON-STRATIFIED split due to limited samples per speaker.")
else:
    train_df, test_df = train_test_split(
        existing_files_df,
        test_size=num_speakers, # Set test_size to the absolute number of speakers for stratification
        random_state=42,
        stratify=existing_files_df['speaker_id'] # This now ensures each speaker is in the test set
    )
    print(f"Using STRATIFIED split with {len(test_df)} samples in test set.")

print(f"\n--- Dataset Split for Training and Testing ---")
print(f"Training samples: {len(train_df)}")
print(f"Testing samples: {len(test_df)}")

Successfully located project folder at: /content/drive/MyDrive/project

--- Loading data from CSV ---

--- Verifying audio file paths and formats... ---




Verifying audio files:   0%|          | 0/147 [00:00<?, ?it/s]


All audio files verified successfully!

--- Speaker Mapping (Total Unique Speakers: 52) ---
{0: 'Ayeshasiddika@email.com', 1: 'Hafsa2021bd@gmail.com', 2: 'Sowmickpaul668@gmail.com', 3: 'aaratrikutsav.16@gmail.com', 4: 'antordu@gmail.com', 5: 'apu121@gmail.com', 6: 'apusaha567@gmail.com', 7: 'ashesroy234@gmail.com', 8: 'ashrafijannat75@gmail.com', 9: 'avijit107@gmail.com', 10: 'bipul205@gmail.com', 11: 'dip123@gmail.com', 12: 'diponroydipu29@gmail.com', 13: 'hakim25@gmail.com', 14: 'hasibshikderpss@gmail.com', 15: 'ihsmurf22@gmail.com', 16: 'imranhosen@gmail.com', 17: 'indoniciasarver82@gmail.com', 18: 'istiak23234@gmail.com', 19: 'jamilemon2005@gmail.com', 20: 'jopu123@gmail.com', 21: 'mamun1234@gmail.com', 22: 'mdfizz@gmail.com', 23: 'mimroy2025@gmail.com', 24: 'mukul20@gmail.com', 25: 'musahid12@gmail.com', 26: 'noyonraydu@gmail.com', 27: 'pappu251618@gmail.com', 28: 'pipasha20@gmail.com', 29: 'pollobiroypoly10@gmail.com', 30: 'pordun2025@gmail.com', 31: 'porichoy25@gmail.com', 32: 

In [9]:
# Create a Custom PyTorch Dataset with MFCCs

class SpeakerDatasetMFCC(Dataset):
    def __init__(self, dataframe, target_sr=16000, num_mfcc=40, n_fft=400, hop_length=160):
        self.dataframe = dataframe
        self.target_sr = target_sr
        self.num_mfcc = num_mfcc
        self.max_len_sec = 30 # Fixed duration for training
        self.max_len_samples = self.max_len_sec * self.target_sr

        self.mfcc_transform = MFCC(
            sample_rate=target_sr, n_mfcc=num_mfcc, melkwargs={'n_fft': n_fft, 'hop_length': hop_length, 'n_mels': 128}
        )

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        audio_path = row['audio_path']
        label = row['speaker_id']

        try:
            waveform, sample_rate = torchaudio.load(audio_path)

            if sample_rate != self.target_sr:
                resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=self.target_sr)
                waveform = resampler(waveform)

            if waveform.shape[0] > 1: # Convert to mono if stereo
                waveform = torch.mean(waveform, dim=0, keepdim=True)
            # Ensure waveform is 2D (channels, samples) expected by MFCC transform
            elif waveform.ndim == 1:
                waveform = waveform.unsqueeze(0) # Add channel dimension if it's just (samples,)

            # Pad or truncate to a fixed length (max_len_samples)
            if waveform.shape[1] > self.max_len_samples:
                waveform = waveform[:, :self.max_len_samples]
            elif waveform.shape[1] < self.max_len_samples:
                padding = self.max_len_samples - waveform.shape[1]
                waveform = torch.nn.functional.pad(waveform, (0, padding))

            mfcc_features = self.mfcc_transform(waveform)

            # CRITICAL FIX: Squeeze the channel dimension (dim=0) here
            # mfcc_features original shape is (1, num_mfcc, num_frames) for mono audio
            # We want (num_mfcc, num_frames) for the Conv1d input after batching

            if mfcc_features.ndim == 3 and mfcc_features.shape[0] == 1:
                mfcc_features = mfcc_features.squeeze(0)

            return mfcc_features, torch.tensor(label, dtype=torch.long)

        except Exception as e:
            print(f"Error processing {audio_path}: {e}. Skipping this sample.")
            return None, None

# Custom collate_fn to handle None values (from failed audio loads) and ensure consistent stacking
def collate_fn(batch):
    batch = [item for item in batch if item[0] is not None]
    if not batch:
        return None, None

    mfccs, labels = zip(*batch)

    mfccs_stacked = torch.stack(mfccs)
    labels_stacked = torch.stack(labels)

    return mfccs_stacked, labels_stacked

# Create datasets and data loaders
train_dataset = SpeakerDatasetMFCC(train_df)
test_dataset = SpeakerDatasetMFCC(test_df)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)

print(f"\n--- DataLoader created. Total training batches: {len(train_loader)} ---")


--- DataLoader created. Total training batches: 24 ---




In [10]:
# Define a Combined CNN-RNN Model from Scratch
# ==============================================================================
# This model uses CNN layers to extract local features from MFCCs and RNN (GRU)
# layers to capture temporal dependencies, followed by a classification head.

class SpeakerCNN_RNN(nn.Module):
    def __init__(self, num_speakers, num_mfcc=40, hidden_dim=128, rnn_layers=2, dropout_rate=0.3):
        super(SpeakerCNN_RNN, self).__init__()

        # CNN layers for feature extraction from MFCCs
        # Input: (batch_size, num_mfcc, sequence_length_frames)
        self.conv_layers = nn.Sequential(
            nn.Conv1d(num_mfcc, 64, kernel_size=5, padding=2), # Output: (B, 64, L)
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2), # Output: (B, 64, L/2)

            nn.Conv1d(64, 128, kernel_size=5, padding=2), # Output: (B, 128, L/2)
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2), # Output: (B, 128, L/4)

            nn.Conv1d(128, 256, kernel_size=5, padding=2), # Output: (B, 256, L/4)
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2) # Output: (B, 256, L/8)
        )

        # The output of CNN layers will be (batch_size, features, new_sequence_length)
        # We need to calculate the actual sequence length after pooling to correctly initialize RNN
        # Let's assume input sequence length for MFCCs of 30s @ 16kHz, hop_length=160
        # Frame length = (16000 * 30) = 480000 samples
        # Number of frames = (480000 - 400) / 160 + 1 = ~3000 frames
        # After 3 MaxPool1d(kernel_size=2), sequence length becomes 3000 / 2 / 2 / 2 = 375 frames

        rnn_input_size = 256 # Number of features from CNN output

        # RNN (GRU) layers for temporal modeling
        # Input to RNN: (batch_size, sequence_length_frames, features)
        self.rnn = nn.GRU(
            input_size=rnn_input_size,
            hidden_size=hidden_dim,
            num_layers=rnn_layers,
            bidirectional=True, # Bidirectional GRU for better context over time
            batch_first=True # Input and output tensors are provided as (batch, seq, feature)
        )

        # Global Average Pooling after RNN to get a fixed-size embedding for classification
        # We take the mean across the sequence dimension (dim=1)
        self.global_pool = nn.AdaptiveAvgPool1d(1)

        # Final fully connected layer for classification
        # hidden_dim * 2 because of bidirectional GRU
        self.fc_layer = nn.Linear(hidden_dim * 2, num_speakers)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # x shape: (batch_size, num_mfcc, sequence_length_frames)

        # CNN layers
        x = self.conv_layers(x) # Output shape: (batch_size, 256, reduced_sequence_length_frames)

        # Permute for RNN input: (batch_size, sequence_length_frames, features)
        x = x.permute(0, 2, 1)

        # RNN layers
        rnn_out, _ = self.rnn(x) # rnn_out shape: (batch_size, sequence_length_frames, hidden_dim * 2)

        # Apply Global Average Pooling across the sequence length dimension (dim=1)
        # Squeeze the resulting 1-dimensional output
        # Input for global_pool needs to be (batch_size, features, sequence_length)
        # So we permute rnn_out back
        pooled_output = self.global_pool(rnn_out.permute(0, 2, 1)).squeeze(-1) # Output: (batch_size, hidden_dim * 2)

        # Dropout for regularization
        x = self.dropout(pooled_output)

        # Final fully connected layer for classification
        x = self.fc_layer(x)
        return x

# Determine the device (GPU if available, else CPU)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"\n--- Initializing model on device: {device} ---")

# Initialize the model with the correct number of speakers and MFCC features
model = SpeakerCNN_RNN(num_speakers=num_speakers, num_mfcc=train_dataset.num_mfcc).to(device)

# Define Loss function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4) # Start with a small learning rate



--- Initializing model on device: cuda ---


In [11]:
# Train the Model

def train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs=100):
    model.train()
    print("\n--- Starting Training ---")
    best_accuracy = 0.0

    for epoch in range(num_epochs):
        running_loss = 0.0
        correct_train_predictions = 0
        total_train_samples = 0

        pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} (Train)")
        for i, (inputs, labels) in enumerate(pbar):
            if inputs is None: # Skip batches with no valid samples
                pbar.set_postfix_str("Skipping empty batch")
                continue

            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad() # Zero the gradients
            outputs = model(inputs) # Forward pass
            loss = criterion(outputs, labels) # Calculate loss
            loss.backward() # Backward pass
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_train_samples += labels.size(0)
            correct_train_predictions += (predicted == labels).sum().item()

            pbar.set_postfix({'loss': running_loss / (i+1), 'train_acc': 100 * correct_train_predictions / total_train_samples})

        epoch_train_loss = running_loss / len(train_loader)
        epoch_train_accuracy = 100 * correct_train_predictions / total_train_samples

        # Evaluate on the test set after each epoch
        model.eval() # Set model to evaluation mode
        correct_test_predictions = 0
        total_test_samples = 0
        test_loss = 0.0

        with torch.no_grad(): # Disable gradient calculations for evaluation
            test_pbar = tqdm(test_loader, desc=f"Epoch {epoch+1}/{num_epochs} (Test)")
            for inputs, labels in test_pbar:
                if inputs is None: continue
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                test_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                total_test_samples += labels.size(0)
                correct_test_predictions += (predicted == labels).sum().item()
                test_pbar.set_postfix({'test_loss': test_loss / (test_pbar.n + 1), 'test_acc': 100 * correct_test_predictions / total_test_samples})

        epoch_test_loss = test_loss / len(test_loader)
        epoch_test_accuracy = 100 * correct_test_predictions / total_test_samples

        print(f"Epoch {epoch+1} Summary: Train Loss: {epoch_train_loss:.4f}, Train Acc: {epoch_train_accuracy:.2f}%, "
              f"Test Loss: {epoch_test_loss:.4f}, Test Acc: {epoch_test_accuracy:.2f}%")

        # Save the model if it's the best one so far (based on test accuracy)
        if epoch_test_accuracy > best_accuracy:
            best_accuracy = epoch_test_accuracy
            SAVE_DIR = os.path.join(PROJECT_ROOT_DIR, 'saved_models_scratch_mfcc_rnn')
            os.makedirs(SAVE_DIR, exist_ok=True)
            model_save_path = os.path.join(SAVE_DIR, 'speaker_cnn_rnn_best.pth')
            torch.save(model.state_dict(), model_save_path)
            print(f"New best model saved with Test Accuracy: {best_accuracy:.2f}%")

        model.train() # Set model back to training mode for the next epoch

train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs=100) # Training for 100 epochs



--- Starting Training ---


Epoch 1/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 1/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 1 Summary: Train Loss: 3.9889, Train Acc: 2.11%, Test Loss: 3.8794, Test Acc: 7.69%
New best model saved with Test Accuracy: 7.69%


Epoch 2/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 2/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 2 Summary: Train Loss: 3.8509, Train Acc: 8.42%, Test Loss: 3.7961, Test Acc: 7.69%


Epoch 3/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 3/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 3 Summary: Train Loss: 3.7708, Train Acc: 7.37%, Test Loss: 3.7360, Test Acc: 11.54%
New best model saved with Test Accuracy: 11.54%


Epoch 4/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 4/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 4 Summary: Train Loss: 3.7011, Train Acc: 14.74%, Test Loss: 3.6692, Test Acc: 11.54%


Epoch 5/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 5/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 5 Summary: Train Loss: 3.6412, Train Acc: 13.68%, Test Loss: 3.6059, Test Acc: 15.38%
New best model saved with Test Accuracy: 15.38%


Epoch 6/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 6/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 6 Summary: Train Loss: 3.5434, Train Acc: 12.63%, Test Loss: 3.5440, Test Acc: 17.31%
New best model saved with Test Accuracy: 17.31%


Epoch 7/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 7/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 7 Summary: Train Loss: 3.5154, Train Acc: 16.84%, Test Loss: 3.4841, Test Acc: 7.69%


Epoch 8/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 8/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 8 Summary: Train Loss: 3.4470, Train Acc: 15.79%, Test Loss: 3.4271, Test Acc: 17.31%


Epoch 9/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 9/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 9 Summary: Train Loss: 3.3899, Train Acc: 21.05%, Test Loss: 3.3887, Test Acc: 19.23%
New best model saved with Test Accuracy: 19.23%


Epoch 10/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 10/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 10 Summary: Train Loss: 3.3401, Train Acc: 17.89%, Test Loss: 3.3261, Test Acc: 17.31%


Epoch 11/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 11/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 11 Summary: Train Loss: 3.2808, Train Acc: 23.16%, Test Loss: 3.2768, Test Acc: 21.15%
New best model saved with Test Accuracy: 21.15%


Epoch 12/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 12/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 12 Summary: Train Loss: 3.1763, Train Acc: 23.16%, Test Loss: 3.2222, Test Acc: 26.92%
New best model saved with Test Accuracy: 26.92%


Epoch 13/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 13/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 13 Summary: Train Loss: 3.1922, Train Acc: 24.21%, Test Loss: 3.1764, Test Acc: 25.00%


Epoch 14/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 14/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 14 Summary: Train Loss: 3.1003, Train Acc: 28.42%, Test Loss: 3.1259, Test Acc: 28.85%
New best model saved with Test Accuracy: 28.85%


Epoch 15/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 15/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 15 Summary: Train Loss: 3.0071, Train Acc: 34.74%, Test Loss: 3.0825, Test Acc: 28.85%


Epoch 16/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 16/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 16 Summary: Train Loss: 2.9335, Train Acc: 34.74%, Test Loss: 3.0398, Test Acc: 38.46%
New best model saved with Test Accuracy: 38.46%


Epoch 17/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 17/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 17 Summary: Train Loss: 2.9246, Train Acc: 33.68%, Test Loss: 2.9837, Test Acc: 36.54%


Epoch 18/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 18/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 18 Summary: Train Loss: 2.9469, Train Acc: 31.58%, Test Loss: 2.9469, Test Acc: 36.54%


Epoch 19/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 19/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 19 Summary: Train Loss: 2.7916, Train Acc: 36.84%, Test Loss: 2.8864, Test Acc: 40.38%
New best model saved with Test Accuracy: 40.38%


Epoch 20/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 20/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 20 Summary: Train Loss: 2.7746, Train Acc: 38.95%, Test Loss: 2.8259, Test Acc: 40.38%


Epoch 21/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 21/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 21 Summary: Train Loss: 2.7403, Train Acc: 48.42%, Test Loss: 2.7743, Test Acc: 42.31%
New best model saved with Test Accuracy: 42.31%


Epoch 22/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 22/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 22 Summary: Train Loss: 2.6392, Train Acc: 51.58%, Test Loss: 2.7500, Test Acc: 40.38%


Epoch 23/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 23/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 23 Summary: Train Loss: 2.6000, Train Acc: 48.42%, Test Loss: 2.6854, Test Acc: 44.23%
New best model saved with Test Accuracy: 44.23%


Epoch 24/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 24/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 24 Summary: Train Loss: 2.5310, Train Acc: 45.26%, Test Loss: 2.6160, Test Acc: 53.85%
New best model saved with Test Accuracy: 53.85%


Epoch 25/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 25/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 25 Summary: Train Loss: 2.4634, Train Acc: 52.63%, Test Loss: 2.6149, Test Acc: 46.15%


Epoch 26/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 26/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 26 Summary: Train Loss: 2.3873, Train Acc: 56.84%, Test Loss: 2.5476, Test Acc: 48.08%


Epoch 27/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 27/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 27 Summary: Train Loss: 2.2625, Train Acc: 54.74%, Test Loss: 2.5040, Test Acc: 48.08%


Epoch 28/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 28/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 28 Summary: Train Loss: 2.3143, Train Acc: 58.95%, Test Loss: 2.4510, Test Acc: 51.92%


Epoch 29/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 29/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 29 Summary: Train Loss: 2.1389, Train Acc: 58.95%, Test Loss: 2.4321, Test Acc: 51.92%


Epoch 30/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 30/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 30 Summary: Train Loss: 2.1836, Train Acc: 57.89%, Test Loss: 2.3646, Test Acc: 50.00%


Epoch 31/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 31/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 31 Summary: Train Loss: 2.1216, Train Acc: 57.89%, Test Loss: 2.3206, Test Acc: 55.77%
New best model saved with Test Accuracy: 55.77%


Epoch 32/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 32/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 32 Summary: Train Loss: 2.0722, Train Acc: 60.00%, Test Loss: 2.2744, Test Acc: 51.92%


Epoch 33/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 33/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 33 Summary: Train Loss: 2.0129, Train Acc: 58.95%, Test Loss: 2.2668, Test Acc: 57.69%
New best model saved with Test Accuracy: 57.69%


Epoch 34/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 34/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 34 Summary: Train Loss: 1.9202, Train Acc: 66.32%, Test Loss: 2.1914, Test Acc: 53.85%


Epoch 35/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 35/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 35 Summary: Train Loss: 1.9029, Train Acc: 62.11%, Test Loss: 2.1863, Test Acc: 59.62%
New best model saved with Test Accuracy: 59.62%


Epoch 36/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 36/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 36 Summary: Train Loss: 1.7869, Train Acc: 67.37%, Test Loss: 2.1388, Test Acc: 55.77%


Epoch 37/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 37/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 37 Summary: Train Loss: 1.7794, Train Acc: 65.26%, Test Loss: 2.0949, Test Acc: 55.77%


Epoch 38/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 38/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 38 Summary: Train Loss: 1.7011, Train Acc: 68.42%, Test Loss: 2.0645, Test Acc: 55.77%


Epoch 39/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 39/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 39 Summary: Train Loss: 1.7405, Train Acc: 64.21%, Test Loss: 2.0172, Test Acc: 59.62%


Epoch 40/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 40/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 40 Summary: Train Loss: 1.7075, Train Acc: 70.53%, Test Loss: 1.9767, Test Acc: 55.77%


Epoch 41/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 41/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 41 Summary: Train Loss: 1.6566, Train Acc: 71.58%, Test Loss: 1.9570, Test Acc: 59.62%


Epoch 42/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 42/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 42 Summary: Train Loss: 1.5849, Train Acc: 68.42%, Test Loss: 1.9121, Test Acc: 59.62%


Epoch 43/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 43/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 43 Summary: Train Loss: 1.6175, Train Acc: 66.32%, Test Loss: 1.9107, Test Acc: 59.62%


Epoch 44/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 44/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 44 Summary: Train Loss: 1.4810, Train Acc: 74.74%, Test Loss: 1.8734, Test Acc: 59.62%


Epoch 45/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 45/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 45 Summary: Train Loss: 1.4619, Train Acc: 74.74%, Test Loss: 1.8453, Test Acc: 63.46%
New best model saved with Test Accuracy: 63.46%


Epoch 46/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 46/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 46 Summary: Train Loss: 1.3718, Train Acc: 73.68%, Test Loss: 1.8429, Test Acc: 65.38%
New best model saved with Test Accuracy: 65.38%


Epoch 47/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 47/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 47 Summary: Train Loss: 1.3864, Train Acc: 76.84%, Test Loss: 1.8120, Test Acc: 63.46%


Epoch 48/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 48/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 48 Summary: Train Loss: 1.3144, Train Acc: 78.95%, Test Loss: 1.7757, Test Acc: 63.46%


Epoch 49/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 49/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 49 Summary: Train Loss: 1.3067, Train Acc: 77.89%, Test Loss: 1.7336, Test Acc: 63.46%


Epoch 50/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 50/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 50 Summary: Train Loss: 1.2513, Train Acc: 76.84%, Test Loss: 1.7224, Test Acc: 67.31%
New best model saved with Test Accuracy: 67.31%


Epoch 51/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 51/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 51 Summary: Train Loss: 1.1729, Train Acc: 84.21%, Test Loss: 1.6752, Test Acc: 61.54%


Epoch 52/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 52/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 52 Summary: Train Loss: 1.2036, Train Acc: 81.05%, Test Loss: 1.6329, Test Acc: 63.46%


Epoch 53/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 53/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 53 Summary: Train Loss: 1.2507, Train Acc: 76.84%, Test Loss: 1.6300, Test Acc: 63.46%


Epoch 54/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 54/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 54 Summary: Train Loss: 1.1721, Train Acc: 78.95%, Test Loss: 1.6361, Test Acc: 65.38%


Epoch 55/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 55/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 55 Summary: Train Loss: 1.1142, Train Acc: 81.05%, Test Loss: 1.5852, Test Acc: 67.31%


Epoch 56/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 56/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 56 Summary: Train Loss: 1.1096, Train Acc: 78.95%, Test Loss: 1.5664, Test Acc: 73.08%
New best model saved with Test Accuracy: 73.08%


Epoch 57/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 57/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 57 Summary: Train Loss: 1.0441, Train Acc: 81.05%, Test Loss: 1.5501, Test Acc: 65.38%


Epoch 58/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 58/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 58 Summary: Train Loss: 0.9940, Train Acc: 86.32%, Test Loss: 1.5033, Test Acc: 69.23%


Epoch 59/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 59/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 59 Summary: Train Loss: 0.9625, Train Acc: 86.32%, Test Loss: 1.4959, Test Acc: 63.46%


Epoch 60/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 60/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 60 Summary: Train Loss: 0.9363, Train Acc: 85.26%, Test Loss: 1.4663, Test Acc: 71.15%


Epoch 61/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 61/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 61 Summary: Train Loss: 0.8814, Train Acc: 91.58%, Test Loss: 1.4550, Test Acc: 69.23%


Epoch 62/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 62/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 62 Summary: Train Loss: 0.8392, Train Acc: 92.63%, Test Loss: 1.4215, Test Acc: 67.31%


Epoch 63/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 63/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 63 Summary: Train Loss: 0.8636, Train Acc: 90.53%, Test Loss: 1.4164, Test Acc: 67.31%


Epoch 64/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 64/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 64 Summary: Train Loss: 0.8098, Train Acc: 91.58%, Test Loss: 1.3724, Test Acc: 71.15%


Epoch 65/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 65/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 65 Summary: Train Loss: 0.7655, Train Acc: 92.63%, Test Loss: 1.3430, Test Acc: 71.15%


Epoch 66/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 66/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 66 Summary: Train Loss: 0.7390, Train Acc: 95.79%, Test Loss: 1.3550, Test Acc: 65.38%


Epoch 67/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 67/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 67 Summary: Train Loss: 0.7480, Train Acc: 94.74%, Test Loss: 1.3242, Test Acc: 71.15%


Epoch 68/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 68/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 68 Summary: Train Loss: 0.7131, Train Acc: 97.89%, Test Loss: 1.2752, Test Acc: 73.08%


Epoch 69/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 69/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 69 Summary: Train Loss: 0.7414, Train Acc: 94.74%, Test Loss: 1.2838, Test Acc: 71.15%


Epoch 70/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 70/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 70 Summary: Train Loss: 0.6431, Train Acc: 95.79%, Test Loss: 1.2850, Test Acc: 71.15%


Epoch 71/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 71/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 71 Summary: Train Loss: 0.6929, Train Acc: 93.68%, Test Loss: 1.2675, Test Acc: 69.23%


Epoch 72/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 72/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 72 Summary: Train Loss: 0.5990, Train Acc: 98.95%, Test Loss: 1.2440, Test Acc: 73.08%


Epoch 73/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 73/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 73 Summary: Train Loss: 0.5663, Train Acc: 96.84%, Test Loss: 1.2348, Test Acc: 73.08%


Epoch 74/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 74/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 74 Summary: Train Loss: 0.5203, Train Acc: 97.89%, Test Loss: 1.2070, Test Acc: 71.15%


Epoch 75/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 75/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 75 Summary: Train Loss: 0.5089, Train Acc: 98.95%, Test Loss: 1.2247, Test Acc: 71.15%


Epoch 76/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 76/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 76 Summary: Train Loss: 0.6009, Train Acc: 97.89%, Test Loss: 1.2514, Test Acc: 69.23%


Epoch 77/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 77/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 77 Summary: Train Loss: 0.5641, Train Acc: 97.89%, Test Loss: 1.1825, Test Acc: 76.92%
New best model saved with Test Accuracy: 76.92%


Epoch 78/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 78/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 78 Summary: Train Loss: 0.5604, Train Acc: 93.68%, Test Loss: 1.2244, Test Acc: 73.08%


Epoch 79/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 79/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 79 Summary: Train Loss: 0.4976, Train Acc: 95.79%, Test Loss: 1.1508, Test Acc: 75.00%


Epoch 80/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 80/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 80 Summary: Train Loss: 0.4642, Train Acc: 97.89%, Test Loss: 1.1285, Test Acc: 75.00%


Epoch 81/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 81/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 81 Summary: Train Loss: 0.4729, Train Acc: 98.95%, Test Loss: 1.1489, Test Acc: 73.08%


Epoch 82/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 82/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 82 Summary: Train Loss: 0.4669, Train Acc: 98.95%, Test Loss: 1.0912, Test Acc: 76.92%


Epoch 83/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 83/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 83 Summary: Train Loss: 0.4011, Train Acc: 98.95%, Test Loss: 1.1013, Test Acc: 75.00%


Epoch 84/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 84/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 84 Summary: Train Loss: 0.3683, Train Acc: 98.95%, Test Loss: 1.0773, Test Acc: 78.85%
New best model saved with Test Accuracy: 78.85%


Epoch 85/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 85/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 85 Summary: Train Loss: 0.3589, Train Acc: 100.00%, Test Loss: 1.0958, Test Acc: 76.92%


Epoch 86/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 86/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 86 Summary: Train Loss: 0.3523, Train Acc: 100.00%, Test Loss: 1.0930, Test Acc: 76.92%


Epoch 87/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 87/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 87 Summary: Train Loss: 0.3779, Train Acc: 100.00%, Test Loss: 1.0627, Test Acc: 78.85%


Epoch 88/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 88/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 88 Summary: Train Loss: 0.3420, Train Acc: 100.00%, Test Loss: 1.0695, Test Acc: 76.92%


Epoch 89/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 89/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 89 Summary: Train Loss: 0.2971, Train Acc: 100.00%, Test Loss: 1.0658, Test Acc: 76.92%


Epoch 90/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 90/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 90 Summary: Train Loss: 0.2971, Train Acc: 100.00%, Test Loss: 1.0240, Test Acc: 76.92%


Epoch 91/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 91/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 91 Summary: Train Loss: 0.2891, Train Acc: 100.00%, Test Loss: 1.0161, Test Acc: 78.85%


Epoch 92/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 92/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 92 Summary: Train Loss: 0.2700, Train Acc: 100.00%, Test Loss: 1.0393, Test Acc: 78.85%


Epoch 93/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 93/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 93 Summary: Train Loss: 0.2754, Train Acc: 100.00%, Test Loss: 1.0173, Test Acc: 75.00%


Epoch 94/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 94/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 94 Summary: Train Loss: 0.2742, Train Acc: 100.00%, Test Loss: 0.9996, Test Acc: 75.00%


Epoch 95/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 95/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 95 Summary: Train Loss: 0.2856, Train Acc: 98.95%, Test Loss: 1.0013, Test Acc: 78.85%


Epoch 96/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 96/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 96 Summary: Train Loss: 0.2417, Train Acc: 100.00%, Test Loss: 0.9926, Test Acc: 76.92%


Epoch 97/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 97/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 97 Summary: Train Loss: 0.2505, Train Acc: 100.00%, Test Loss: 0.9815, Test Acc: 76.92%


Epoch 98/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 98/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 98 Summary: Train Loss: 0.2254, Train Acc: 100.00%, Test Loss: 0.9960, Test Acc: 78.85%


Epoch 99/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 99/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 99 Summary: Train Loss: 0.2310, Train Acc: 98.95%, Test Loss: 0.9636, Test Acc: 76.92%


Epoch 100/100 (Train):   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 100/100 (Test):   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 100 Summary: Train Loss: 0.2139, Train Acc: 100.00%, Test Loss: 0.9892, Test Acc: 76.92%


In [12]:
# Step 6: Save the Trained Model and Speaker Mapping (Final Save)
# ==============================================================================
# Even if a "best" model was saved, we save the final state as well.

SAVE_DIR = os.path.join(PROJECT_ROOT_DIR, 'saved_models_scratch_mfcc_rnn')
os.makedirs(SAVE_DIR, exist_ok=True) # Ensure directory exists
model_final_save_path = os.path.join(SAVE_DIR, 'speaker_cnn_rnn_final.pth')
mapping_save_path = os.path.join(SAVE_DIR, 'speaker_mapping.joblib')

joblib.dump(speaker_mapping, mapping_save_path)
torch.save(model.state_dict(), model_final_save_path)

print(f"\n--- Final Trained Model Saved to: {model_final_save_path} ---")
print(f"Speaker Mapping Saved to: {mapping_save_path}")



--- Final Trained Model Saved to: /content/drive/MyDrive/project/saved_models_scratch_mfcc_rnn/speaker_cnn_rnn_final.pth ---
Speaker Mapping Saved to: /content/drive/MyDrive/project/saved_models_scratch_mfcc_rnn/speaker_mapping.joblib


In [1]:
# Inference (Detect a Person from a New Audio File - Interactive Upload)


from google.colab import files
import soundfile as sf
import numpy as np
import os # Ensure os is imported for path operations
import torch # Make sure torch is imported
from torchaudio.transforms import MFCC # Make sure MFCC is imported
import joblib # Make sure joblib is imported
import pandas as pd # Make sure pandas is imported

# --- The predict_speaker_from_audio function (remains the same as last fix) ---
def predict_speaker_from_audio(model, audio_file_path, speaker_mapping,
                               target_sr=16000, num_mfcc=40, n_fft=400, hop_length=160):
    model.eval() # Set model to evaluation mode
    mfcc_transform = MFCC(sample_rate=target_sr, n_mfcc=num_mfcc, melkwargs={'n_fft': n_fft, 'hop_length': hop_length})
    max_len_samples = 30 * target_sr # Ensure consistent audio length for inference

    try:
        if not os.path.exists(audio_file_path):
            raise FileNotFoundError(f"Audio file not found: {audio_file_path}")

        waveform, sample_rate = torchaudio.load(audio_file_path)
        if sample_rate != target_sr:
            resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sr)
            waveform = resampler(waveform)
        if waveform.shape[0] > 1: # Convert to mono
            waveform = torch.mean(waveform, dim=0, keepdim=True)
        elif waveform.ndim == 1: # Add channel dimension if it was (samples,)
            waveform = waveform.unsqueeze(0)

        # Pad/truncate to max_len_samples
        if waveform.shape[1] > max_len_samples:
            waveform = waveform[:, :max_len_samples]
        elif waveform.shape[1] < max_len_samples:
            padding = max_len_samples - waveform.shape[1]
            waveform = torch.nn.functional.pad(waveform, (0, padding))

        # Convert to MFCCs
        mfcc_features = mfcc_transform(waveform)

        # CRITICAL FIX: Squeeze the channel dimension (dim=0) for inference
        if mfcc_features.ndim == 3 and mfcc_features.shape[0] == 1:
            mfcc_features = mfcc_features.squeeze(0)

        # Get device from model parameters
        input_tensor = mfcc_features.unsqueeze(0).to(next(model.parameters()).device)

        with torch.no_grad(): # No need to calculate gradients for inference
            outputs = model(input_tensor)
            probabilities = torch.softmax(outputs, dim=1)
            confidence, predicted_id_tensor = torch.max(probabilities, 1)

            predicted_id = predicted_id_tensor.item()
            predicted_confidence = confidence.item()

        predicted_email = speaker_mapping[predicted_id]

        return predicted_email, predicted_confidence

    except Exception as e:
        print(f"Error during inference for {audio_file_path}: {e}")
        return None, None


# --- Reload the trained model and speaker mapping for inference ---
# This section is now made more robust to run independently if needed.

# Define necessary paths and parameters (MUST MATCH TRAINING CONFIG)
PROJECT_ROOT_DIR = '/content/drive/MyDrive/project'
MODEL_SAVE_DIR = os.path.join(PROJECT_ROOT_DIR, 'saved_models_scratch_mfcc_rnn')
BEST_MODEL_PATH = os.path.join(MODEL_SAVE_DIR, 'speaker_cnn_rnn_best.pth')
FINAL_MODEL_PATH = os.path.join(MODEL_SAVE_DIR, 'speaker_cnn_rnn_final.pth')
MAPPING_PATH = os.path.join(MODEL_SAVE_DIR, 'speaker_mapping.joblib')
CSV_PATH = os.path.join(PROJECT_ROOT_DIR, 'main_data.csv')
AUDIO_FOLDER_PATH = os.path.join(PROJECT_ROOT_DIR, 'voices') # Needed for existing_files_df

# Ensure Google Drive is mounted if running this cell independently
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True) # force_remount=True can help if issues persist
except ImportError:
    print("Not in Colab environment. Skipping drive mount.")


# Load speaker mapping first to get num_speakers
try:
    loaded_speaker_mapping = joblib.load(MAPPING_PATH)
    num_speakers = len(loaded_speaker_mapping)
    print(f"Loaded speaker mapping. Total unique speakers: {num_speakers}")
except FileNotFoundError:
    print(f"Error: Speaker mapping not found at {MAPPING_PATH}. Please ensure training completed successfully.")
    # Fallback or exit if mapping is crucial
    num_speakers = 52 # Fallback to a known number if mapping not found, adjust as needed
    loaded_speaker_mapping = {i: f"unknown_speaker_{i}" for i in range(num_speakers)} # Dummy mapping
    print("Using dummy speaker mapping. Prediction results may be inaccurate.")
except Exception as e:
    print(f"Error loading speaker mapping: {e}")
    # Handle other potential errors during loading

# Define the model architecture (MUST MATCH TRAINING)
# Assuming num_mfcc was 40 during training.
num_mfcc_features_trained = 40 # This must match the `num_mfcc` used in SpeakerDatasetMFCC during training
from torch.nn import Sequential, Conv1d, BatchNorm1d, ReLU, MaxPool1d, GRU, AdaptiveAvgPool1d, Linear, Dropout # Import necessary layers
# Re-define the model class if not globally accessible (copy from Step 4)
class SpeakerCNN_RNN(nn.Module):
    def __init__(self, num_speakers, num_mfcc=40, hidden_dim=128, rnn_layers=2, dropout_rate=0.3):
        super(SpeakerCNN_RNN, self).__init__()
        self.conv_layers = Sequential(
            Conv1d(num_mfcc, 64, kernel_size=5, padding=2), BatchNorm1d(64), ReLU(), MaxPool1d(kernel_size=2),
            Conv1d(64, 128, kernel_size=5, padding=2), BatchNorm1d(128), ReLU(), MaxPool1d(kernel_size=2),
            Conv1d(128, 256, kernel_size=5, padding=2), BatchNorm1d(256), ReLU(), MaxPool1d(kernel_size=2)
        )
        rnn_input_size = 256
        self.rnn = GRU(input_size=rnn_input_size, hidden_size=hidden_dim, num_layers=rnn_layers, bidirectional=True, batch_first=True)
        self.global_pool = AdaptiveAvgPool1d(1)
        self.fc_layer = Linear(hidden_dim * 2, num_speakers)
        self.dropout = Dropout(dropout_rate)

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.permute(0, 2, 1)
        rnn_out, _ = self.rnn(x)
        pooled_output = self.global_pool(rnn_out.permute(0, 2, 1)).squeeze(-1)
        x = self.dropout(pooled_output)
        x = self.fc_layer(x)
        return x

# Determine the device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Initialize and load the model state dict
loaded_model = SpeakerCNN_RNN(num_speakers=num_speakers, num_mfcc=num_mfcc_features_trained).to(device)

try:
    if os.path.exists(BEST_MODEL_PATH):
        loaded_model.load_state_dict(torch.load(BEST_MODEL_PATH, map_location=device))
        print(f"Loaded best model from: {BEST_MODEL_PATH}")
    elif os.path.exists(FINAL_MODEL_PATH):
        loaded_model.load_state_dict(torch.load(FINAL_MODEL_PATH, map_location=device))
        print(f"Loaded final model from: {FINAL_MODEL_PATH} (Best model not found)")
    else:
        raise FileNotFoundError("No trained model found. Please ensure training completed successfully.")
except Exception as e:
    print(f"Error loading model state dict: {e}")
    print("Please ensure the model was trained and saved correctly to Google Drive.")
    # Exit or handle gracefully if model loading fails
    # For now, we'll let it proceed, but prediction will fail without a loaded model.


# Load existing_files_df to get names
try:
    # Reload the full DataFrame and re-process it to get existing_files_df
    full_df = pd.read_csv(CSV_PATH)
    full_df['audio_path'] = full_df['audio_ffile'].apply(lambda x: os.path.join(AUDIO_FOLDER_PATH, x))

    # Re-filter for existing files to match the training data's `existing_files_df`
    # This part is simplified; in a real app, you'd save/load this filtered df
    existing_files_df_temp = full_df[full_df['audio_path'].apply(os.path.exists)].copy().reset_index(drop=True)
    existing_files_df_temp['speaker_id'] = existing_files_df_temp['email'].astype('category').cat.codes
    existing_files_df = existing_files_df_temp # Assign to the variable used in the main script
    print("Reloaded existing_files_df for name lookup.")

except Exception as e:
    print(f"Error reloading existing_files_df for name lookup: {e}")
    existing_files_df = pd.DataFrame({'email': [], 'name': []}) # Empty fallback


# --- Interactive File Upload for Prediction ---
print("\n--- Upload an audio file from your PC for speaker detection ---")
uploaded_files = files.upload() # This will open a file dialog

if uploaded_files:
    uploaded_file_name = list(uploaded_files.keys())[0]
    uploaded_file_path = os.path.join('/content/', uploaded_file_name)

    print(f"\nUploaded file: {uploaded_file_name}")
    print(f"File saved to: {uploaded_file_path}")

    print(f"\n--- Performing Inference on the uploaded audio file ---")

    # Predict the speaker using the uploaded file
    detected_email, confidence = predict_speaker_from_audio(
        loaded_model, uploaded_file_path, loaded_speaker_mapping,
        num_mfcc=num_mfcc_features_trained # Use the same num_mfcc as used for training
    )

    if detected_email:
        # Find the corresponding name from the original DataFrame
        detected_person_df = existing_files_df[existing_files_df['email'] == detected_email]
        detected_name = detected_person_df['name'].iloc[0] if not detected_person_df.empty else "Unknown"

        print("\n--- Detection Result ---")
        print(f"Corresponding Email ID: {detected_email}")
        print(f"Confidence: {confidence:.4f}")

    else:
        print("Detection failed for the uploaded file. No matching speaker found or an error occurred.")
        print("Ensure it's a clear recording of one of the trained speakers.")
        print(f"Best confidence achieved (if any): {confidence:.4f}") # Show confidence even if no match

else:
    print("No file was uploaded.")

Mounted at /content/drive
Loaded speaker mapping. Total unique speakers: 52


NameError: name 'nn' is not defined