In [1]:
!export PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True"

In [2]:
!jupyter nbextension enable --py widgetsnbextension

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: problems found:
        - require? [31m X[0m jupyter-js-widgets/extension


In [3]:
!export TF_CPP_MIN_LOG_LEVEL=2

In [4]:
import gc, torch
gc.collect()
torch.cuda.empty_cache()

In [5]:
import os
import torch
import numpy as np
import pandas as pd
import gc
from torch import nn
from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm
import librosa
import json
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
from transformers import EarlyStoppingCallback
import wandb
import colorama
from colorama import Fore, Back, Style

# Initialize colorama for cross-platform colored output
colorama.init()

class YAMNetBase(nn.Module):
    def __init__(self, num_classes=3):
        super(YAMNetBase, self).__init__()
        
        # YAMNet-like architecture
        self.conv1 = nn.Sequential(
            nn.Conv1d(1, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv1d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )
        
        self.conv3 = nn.Sequential(
            nn.Conv1d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )
        
        self.conv4 = nn.Sequential(
            nn.Conv1d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )
        
        # Adaptive pooling to handle variable length inputs
        self.adaptive_pool = nn.AdaptiveAvgPool1d(1)
        
        self.classifier = nn.Sequential(
            nn.Linear(512, 1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, num_classes)
        )

    def forward(self, x):
        # Input shape: (batch_size, 1, time_steps)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        
        # Global pooling
        x = self.adaptive_pool(x)
        x = x.flatten(1)
        
        # Classification
        x = self.classifier(x)
        return x

class AudioAugmenter:
    """Audio augmentation techniques with length preservation"""
    @staticmethod
    def pad_or_truncate(audio, max_length=32000):
        if len(audio) > max_length:
            return audio[:max_length]
        elif len(audio) < max_length:
            return np.pad(audio, (0, max_length - len(audio)), 'constant')
        return audio

    @staticmethod
    def add_noise(audio, noise_factor=0.005):
        try:
            noise = np.random.randn(len(audio))
            augmented = audio + noise_factor * noise
            augmented = np.clip(augmented, -1.0, 1.0)
            return augmented
        except Exception as e:
            print(f"Warning: Error in add_noise: {str(e)}")
            return audio

    @staticmethod
    def time_shift(audio, shift_max=0.1):
        try:
            shift = int(len(audio) * shift_max)
            return np.roll(audio, shift) if shift > 0 else audio
        except Exception as e:
            print(f"Warning: Error in time_shift: {str(e)}")
            return audio

    @staticmethod
    def change_speed(audio, speed_factor=0.2):
        try:
            audio = np.clip(audio, -1.0, 1.0)
            speed_change = np.random.uniform(low=0.9, high=1.1)
            
            augmented = librosa.effects.time_stretch(audio, rate=speed_change)
            augmented = np.clip(augmented, -1.0, 1.0)
            
            # Ensure fixed length after speed change
            augmented = AudioAugmenter.pad_or_truncate(augmented)
            
            return augmented
        except Exception as e:
            print(f"Warning: Error in change_speed: {str(e)}")
            return audio

    @staticmethod
    def augment(audio):
        audio = AudioAugmenter.pad_or_truncate(audio)
        audio = np.clip(audio, -1.0, 1.0)
        
        augmentation_list = ['noise', 'shift', 'speed']
        num_augments = np.random.randint(1, 3)
        selected_augments = np.random.choice(augmentation_list, num_augments, replace=False)
        
        augmented = audio.copy()
        for aug_type in selected_augments:
            try:
                if aug_type == 'noise':
                    augmented = AudioAugmenter.add_noise(augmented)
                elif aug_type == 'shift':
                    augmented = AudioAugmenter.time_shift(augmented)
                elif aug_type == 'speed':
                    augmented = AudioAugmenter.change_speed(augmented)
                
                augmented = np.clip(augmented, -1.0, 1.0)
                
            except Exception as e:
                print(f"Warning: Error during {aug_type} augmentation: {str(e)}")
                continue
        
        return augmented.astype(np.float32)

class AudioDataset(Dataset):
    def __init__(self, audio_data, labels, max_length=32000):
        self.audio_data = audio_data
        self.labels = labels
        self.max_length = max_length
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        audio = self.audio_data[idx].astype(np.float32)
        audio = self.pad_or_truncate(audio)
        
        # Add channel dimension for Conv1d
        audio = torch.FloatTensor(audio).unsqueeze(0)
        
        return {
            'input_values': audio,
            'label': torch.tensor(self.labels[idx])
        }
    
    def pad_or_truncate(self, audio):
        if len(audio) > self.max_length:
            return audio[:self.max_length]
        elif len(audio) < self.max_length:
            return np.pad(audio, (0, self.max_length - len(audio)), 'constant')
        return audio

class ConsoleVisualizer:
    """Handles console-based visualization of plots"""
    @staticmethod
    def plot_confusion_matrix(cm, labels):
        print("\nConfusion Matrix:")
        print("-" * 40)
        
        # Header
        print(f"{'':>10}", end='')
        for label in labels:
            print(f"{label:>10}", end='')
        print("\n")
        
        # Matrix
        for i, label in enumerate(labels):
            print(f"{label:>10}", end='')
            for j in range(len(labels)):
                if cm[i][j] == 0:
                    color = Fore.WHITE
                elif cm[i][j] == np.max(cm[i]):
                    color = Fore.GREEN
                else:
                    color = Fore.YELLOW
                print(f"{color}{cm[i][j]:>10}{Style.RESET_ALL}", end='')
            print()
        print("-" * 40)

    @staticmethod
    def plot_training_history(history):
        print("\nTraining History:")
        print("-" * 40)
        
        for epoch, metrics in enumerate(history):
            print(f"Epoch {epoch+1:>2}: "
                  f"Loss: {metrics['train_loss']:.4f} "
                  f"Val Loss: {metrics.get('val_loss', 'N/A')} "
                  f"Acc: {metrics.get('val_accuracy', 'N/A')}")

class YAMNetClassifier:
    def __init__(self, num_labels=3):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = YAMNetBase(num_labels).to(self.device)
        self.label_map = {'crying': 0, 'screaming': 1, 'normal': 2}
        self.augmenter = AudioAugmenter()
        self.visualizer = ConsoleVisualizer()
        self.max_length = 32000  # 2 seconds at 16kHz

    def load_audio_file(self, file_path, target_sr=16000):
        try:
            audio, sr = librosa.load(file_path, sr=target_sr, mono=True)
            audio = audio / (np.max(np.abs(audio)) + 1e-6)
            audio = AudioAugmenter.pad_or_truncate(audio, self.max_length)
            return audio.astype(np.float32)
        except Exception as e:
            print(f"Error processing {file_path}: {str(e)}")
            return None

    def prepare_dataset(self, data_dir, metadata_file, augment=False):
        print(f"Loading metadata from {metadata_file}")
        if not os.path.exists(metadata_file):
            raise FileNotFoundError(f"Metadata file not found: {metadata_file}")
        
        df = pd.read_csv(metadata_file)
        print(f"Loaded {len(df)} entries from metadata")
        
        audio_data = []
        labels = []
        
        for _, row in tqdm(df.iterrows(), total=len(df), desc="Loading audio files"):
            file_path = os.path.join(data_dir, row['file_name'])
            if not os.path.exists(file_path):
                print(f"Warning: File not found: {file_path}")
                continue
                
            audio = self.load_audio_file(file_path)
            if audio is not None:
                audio_data.append(audio)
                labels.append(self.label_map[row['label']])
                
                if augment:
                    augmented_audio = self.augmenter.augment(audio)
                    audio_data.append(augmented_audio)
                    labels.append(self.label_map[row['label']])
        
        dataset = AudioDataset(audio_data, labels, self.max_length)
        print(f"Created dataset with {len(dataset)} examples")
        return dataset

    def save_model(self, path):
        os.makedirs(path, exist_ok=True)
        
        # Save model state
        torch.save(self.model.state_dict(), os.path.join(path, "model.pt"))
        
        # Save label map
        with open(os.path.join(path, "label_map.json"), "w") as f:
            json.dump(self.label_map, f)
        
        print(f"Model saved at {path}")

    def train_epoch(self, train_loader, optimizer, criterion):
        self.model.train()
        total_loss = 0
        correct = 0
        total = 0
        
        for batch in tqdm(train_loader, desc="Training"):
            optimizer.zero_grad()
            
            inputs = batch['input_values'].to(self.device)
            labels = batch['label'].to(self.device)
            
            outputs = self.model(inputs)
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        
        return total_loss / len(train_loader), correct / total

    def evaluate(self, val_loader, criterion):
        self.model.eval()
        total_loss = 0
        correct = 0
        total = 0
        
        all_preds = []
        all_labels = []
        
        with torch.no_grad():
            for batch in tqdm(val_loader, desc="Evaluating"):
                inputs = batch['input_values'].to(self.device)
                labels = batch['label'].to(self.device)
                
                outputs = self.model(inputs)
                loss = criterion(outputs, labels)
                
                total_loss += loss.item()
                
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()
                
                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        return (
            total_loss / len(val_loader),
            correct / total,
            all_preds,
            all_labels
        )

    def train_kfold(self, dataset, output_dir, n_splits=5, use_wandb=False):
        kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
        indices = np.arange(len(dataset))
        
        fold_metrics = []
        
        for fold, (train_idx, val_idx) in enumerate(kf.split(indices)):
            print(f"\nTraining Fold {fold + 1}/{n_splits}")
            
            train_fold = torch.utils.data.Subset(dataset, train_idx)
            val_fold = torch.utils.data.Subset(dataset, val_idx)
            
            train_loader = DataLoader(train_fold, batch_size=8, shuffle=True)
            val_loader = DataLoader(val_fold, batch_size=8)
            
            optimizer = torch.optim.AdamW(self.model.parameters(), lr=3e-5, weight_decay=0.01)
            criterion = nn.CrossEntropyLoss()
            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer, mode='min', factor=0.1, patience=2, verbose=True
            )
            
            best_val_loss = float('inf')
            patience = 3
            patience_counter = 0
            history = []
            
            for epoch in range(25):
                train_loss, train_acc = self.train_epoch(train_loader, optimizer, criterion)
                val_loss, val_acc, _, _ = self.evaluate(val_loader, criterion)
                
                metrics = {
                    'train_loss': train_loss,
                    'val_loss': val_loss,
                    'val_accuracy': val_acc
                }
                history.append(metrics)
                
                if use_wandb:
                    wandb.log(metrics)
                
                print(f"Epoch {epoch+1}: Train Loss: {train_loss:.4f}, "
                      f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
                
                scheduler.step(val_loss)
                
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    best_model_path = os.path.join(output_dir, f"fold_{fold + 1}_best.pt")
                    torch.save(self.model.state_dict(), best_model_path)
                    patience_counter = 0
                else:
                    patience_counter += 1
                
                if patience_counter >= patience:
                    print("Early stopping triggered")
                    break
            
            # Load best model for evaluation
            self.model.load_state_dict(torch.load(best_model_path))
            val_loss, val_acc, preds, labels = self.evaluate(val_loader, criterion)
            
            # Calculate and store fold metrics
            precision, recall, f1, _ = precision_recall_fscore_support(
                labels, preds, average='weighted'
            )
            
            fold_metrics.append({
                'val_loss': val_loss,
                'accuracy': val_acc,
                'f1': f1,
                'precision': precision,
                'recall': recall
            })
            
            # Visualize fold results
            self.visualizer.plot_training_history(history)
            
            # Clear CUDA memory
            del train_loader, val_loader
            torch.cuda.empty_cache()
            gc.collect()
        
        # Print average metrics across folds
        print("\nAverage Metrics Across Folds:")
        avg_metrics = {
            key: np.mean([fold[key] for fold in fold_metrics])
            for key in fold_metrics[0].keys()
        }
        print(json.dumps(avg_metrics, indent=2))
        
        return avg_metrics

    def generate_performance_report(self, test_dataset, output_dir):
        """Generate and visualize performance metrics"""
        test_loader = DataLoader(test_dataset, batch_size=8)
        criterion = nn.CrossEntropyLoss()
        
        # Evaluate model
        val_loss, accuracy, preds, labels = self.evaluate(test_loader, criterion)
        
        # Calculate metrics
        precision, recall, f1, _ = precision_recall_fscore_support(
            labels, preds, average='weighted'
        )
        
        metrics = {
            'test_loss': val_loss,
            'accuracy': accuracy,
            'f1': f1,
            'precision': precision,
            'recall': recall
        }
        
        # Generate and display confusion matrix
        cm = confusion_matrix(labels, preds)
        label_names = list(self.label_map.keys())
        self.visualizer.plot_confusion_matrix(cm, label_names)
        
        # Print metrics
        print("\nPerformance Metrics:")
        print("-" * 40)
        for metric, value in metrics.items():
            print(f"{metric}: {value:.4f}")
        
        # Save metrics
        with open(os.path.join(output_dir, 'metrics.json'), 'w') as f:
            json.dump(metrics, f, indent=4)
        
        return metrics

def main():
    # Set up output directory for YAMNet
    output_dir = "yamnet_model_output"
    os.makedirs(output_dir, exist_ok=True)
    
    try:
        print("Initializing YAMNet classifier...")
        classifier = YAMNetClassifier()
        
        print("\nPreparing datasets with augmentation...")
        dataset = classifier.prepare_dataset(
            "Split_Data/train",
            "Split_Data/train_metadata.csv",
            augment=True
        )
        
        # Train with k-fold cross validation
        print("\nStarting k-fold cross validation training...")
        metrics = classifier.train_kfold(dataset, output_dir, n_splits=5)
        
        # Save best model
        best_model_path = os.path.join(output_dir, "best_model")
        classifier.save_model(best_model_path)
        print(f"\nBest model saved at: {best_model_path}")
        
        # Generate performance report on test set
        print("\nPreparing test dataset...")
        test_dataset = classifier.prepare_dataset(
            "Split_Data/test",
            "Split_Data/test_metadata.csv",
            augment=False
        )
        
        print("\nGenerating performance report...")
        test_metrics = classifier.generate_performance_report(test_dataset, output_dir)
        
    except Exception as e:
        print(f"\nError during execution: {str(e)}")
        raise

if __name__ == "__main__":
    main()

Initializing YAMNet classifier...

Preparing datasets with augmentation...
Loading metadata from Split_Data/train_metadata.csv
Loaded 3129 entries from metadata


Loading audio files:   0%|          | 0/3129 [00:00<?, ?it/s]

Created dataset with 6258 examples

Starting k-fold cross validation training...

Training Fold 1/5


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 1: Train Loss: 0.8252, Val Loss: 0.6886, Val Acc: 0.6917


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 2: Train Loss: 0.7177, Val Loss: 0.6285, Val Acc: 0.7364


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 3: Train Loss: 0.6596, Val Loss: 0.5601, Val Acc: 0.7796


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 4: Train Loss: 0.6178, Val Loss: 0.5715, Val Acc: 0.7436


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 5: Train Loss: 0.5885, Val Loss: 0.6099, Val Acc: 0.7141


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 6: Train Loss: 0.5648, Val Loss: 0.4834, Val Acc: 0.8083


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 7: Train Loss: 0.5432, Val Loss: 0.5604, Val Acc: 0.7380


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 8: Train Loss: 0.5254, Val Loss: 0.4781, Val Acc: 0.8075


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 9: Train Loss: 0.5073, Val Loss: 0.4564, Val Acc: 0.8179


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 10: Train Loss: 0.5073, Val Loss: 0.4394, Val Acc: 0.8235


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 11: Train Loss: 0.4837, Val Loss: 0.5217, Val Acc: 0.7796


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 12: Train Loss: 0.4841, Val Loss: 0.4273, Val Acc: 0.8458


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 13: Train Loss: 0.4645, Val Loss: 0.4166, Val Acc: 0.8259


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 14: Train Loss: 0.4657, Val Loss: 0.4683, Val Acc: 0.7979


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 15: Train Loss: 0.4555, Val Loss: 0.4318, Val Acc: 0.8283


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 16: Train Loss: 0.4492, Val Loss: 0.4082, Val Acc: 0.8315


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 17: Train Loss: 0.4387, Val Loss: 0.3747, Val Acc: 0.8546


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 18: Train Loss: 0.4384, Val Loss: 0.3708, Val Acc: 0.8562


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 19: Train Loss: 0.4147, Val Loss: 0.3969, Val Acc: 0.8387


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 20: Train Loss: 0.4209, Val Loss: 0.3844, Val Acc: 0.8458


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 21: Train Loss: 0.4061, Val Loss: 0.4255, Val Acc: 0.8243
Early stopping triggered


Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]


Training History:
----------------------------------------
Epoch  1: Loss: 0.8252 Val Loss: 0.6885948266573013 Acc: 0.6916932907348243
Epoch  2: Loss: 0.7177 Val Loss: 0.6284679390822246 Acc: 0.7364217252396166
Epoch  3: Loss: 0.6596 Val Loss: 0.5600653132245799 Acc: 0.7795527156549521
Epoch  4: Loss: 0.6178 Val Loss: 0.5715206830175059 Acc: 0.7436102236421726
Epoch  5: Loss: 0.5885 Val Loss: 0.6098888739468945 Acc: 0.7140575079872205
Epoch  6: Loss: 0.5648 Val Loss: 0.4834252852163497 Acc: 0.8083067092651757
Epoch  7: Loss: 0.5432 Val Loss: 0.5603707989404916 Acc: 0.7380191693290735
Epoch  8: Loss: 0.5254 Val Loss: 0.478119468850315 Acc: 0.8075079872204473
Epoch  9: Loss: 0.5073 Val Loss: 0.456443318658194 Acc: 0.8178913738019169
Epoch 10: Loss: 0.5073 Val Loss: 0.4393716072960264 Acc: 0.8234824281150159
Epoch 11: Loss: 0.4837 Val Loss: 0.5217363264435416 Acc: 0.7795527156549521
Epoch 12: Loss: 0.4841 Val Loss: 0.42730150855840393 Acc: 0.8458466453674122
Epoch 13: Loss: 0.4645 Val Lo

Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 1: Train Loss: 0.4226, Val Loss: 0.4127, Val Acc: 0.8259


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 2: Train Loss: 0.4201, Val Loss: 0.3770, Val Acc: 0.8506


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 3: Train Loss: 0.4036, Val Loss: 0.3765, Val Acc: 0.8466


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 4: Train Loss: 0.4084, Val Loss: 0.4064, Val Acc: 0.8203


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 5: Train Loss: 0.4007, Val Loss: 0.3586, Val Acc: 0.8586


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 6: Train Loss: 0.3978, Val Loss: 0.3532, Val Acc: 0.8634


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 7: Train Loss: 0.3875, Val Loss: 0.3958, Val Acc: 0.8331


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 8: Train Loss: 0.3863, Val Loss: 0.3438, Val Acc: 0.8674


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 9: Train Loss: 0.3833, Val Loss: 0.3779, Val Acc: 0.8498


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 10: Train Loss: 0.3884, Val Loss: 0.3441, Val Acc: 0.8578


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 11: Train Loss: 0.3787, Val Loss: 0.4516, Val Acc: 0.7923
Early stopping triggered


Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]


Training History:
----------------------------------------
Epoch  1: Loss: 0.4226 Val Loss: 0.4126754237493132 Acc: 0.8258785942492013
Epoch  2: Loss: 0.4201 Val Loss: 0.3770373932969798 Acc: 0.8506389776357828
Epoch  3: Loss: 0.4036 Val Loss: 0.3765328195254514 Acc: 0.8466453674121406
Epoch  4: Loss: 0.4084 Val Loss: 0.4063609736455474 Acc: 0.8202875399361023
Epoch  5: Loss: 0.4007 Val Loss: 0.35862012254964015 Acc: 0.8586261980830671
Epoch  6: Loss: 0.3978 Val Loss: 0.35323838681979164 Acc: 0.8634185303514377
Epoch  7: Loss: 0.3875 Val Loss: 0.39578201557705356 Acc: 0.8330670926517572
Epoch  8: Loss: 0.3863 Val Loss: 0.343760255486912 Acc: 0.8674121405750799
Epoch  9: Loss: 0.3833 Val Loss: 0.37786255527738555 Acc: 0.8498402555910544
Epoch 10: Loss: 0.3884 Val Loss: 0.3440577535872247 Acc: 0.8578274760383386
Epoch 11: Loss: 0.3787 Val Loss: 0.4515590135269104 Acc: 0.792332268370607

Training Fold 3/5


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 1: Train Loss: 0.3948, Val Loss: 0.3384, Val Acc: 0.8570


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 2: Train Loss: 0.3842, Val Loss: 0.3190, Val Acc: 0.8666


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 3: Train Loss: 0.3767, Val Loss: 0.3293, Val Acc: 0.8642


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 4: Train Loss: 0.3713, Val Loss: 0.3867, Val Acc: 0.8347


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 5: Train Loss: 0.3631, Val Loss: 0.3400, Val Acc: 0.8674
Early stopping triggered


Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]


Training History:
----------------------------------------
Epoch  1: Loss: 0.3948 Val Loss: 0.3384090769490239 Acc: 0.8570287539936102
Epoch  2: Loss: 0.3842 Val Loss: 0.31896506615315273 Acc: 0.8666134185303515
Epoch  3: Loss: 0.3767 Val Loss: 0.32926075654045034 Acc: 0.8642172523961661
Epoch  4: Loss: 0.3713 Val Loss: 0.3866745247060706 Acc: 0.8346645367412141
Epoch  5: Loss: 0.3631 Val Loss: 0.3400428330252884 Acc: 0.8674121405750799

Training Fold 4/5


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 1: Train Loss: 0.3701, Val Loss: 0.3338, Val Acc: 0.8737


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 2: Train Loss: 0.3662, Val Loss: 0.3104, Val Acc: 0.8865


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 3: Train Loss: 0.3657, Val Loss: 0.3602, Val Acc: 0.8425


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 4: Train Loss: 0.3631, Val Loss: 0.3345, Val Acc: 0.8665


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 5: Train Loss: 0.3600, Val Loss: 0.3991, Val Acc: 0.8241
Early stopping triggered


Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]


Training History:
----------------------------------------
Epoch  1: Loss: 0.3701 Val Loss: 0.33378822778820233 Acc: 0.873701039168665
Epoch  2: Loss: 0.3662 Val Loss: 0.3103532313256507 Acc: 0.8864908073541167
Epoch  3: Loss: 0.3657 Val Loss: 0.36018256964102674 Acc: 0.8425259792166268
Epoch  4: Loss: 0.3631 Val Loss: 0.3345058272788479 Acc: 0.8665067945643485
Epoch  5: Loss: 0.3600 Val Loss: 0.3990614946670593 Acc: 0.82414068745004

Training Fold 5/5


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 1: Train Loss: 0.3676, Val Loss: 0.3392, Val Acc: 0.8569


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 2: Train Loss: 0.3671, Val Loss: 0.3112, Val Acc: 0.8809


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 3: Train Loss: 0.3611, Val Loss: 0.3328, Val Acc: 0.8617


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 4: Train Loss: 0.3592, Val Loss: 0.2922, Val Acc: 0.8833


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 5: Train Loss: 0.3536, Val Loss: 0.3375, Val Acc: 0.8689


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 6: Train Loss: 0.3489, Val Loss: 0.3286, Val Acc: 0.8577


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 7: Train Loss: 0.3518, Val Loss: 0.2878, Val Acc: 0.8897


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 8: Train Loss: 0.3490, Val Loss: 0.3012, Val Acc: 0.8929


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 9: Train Loss: 0.3341, Val Loss: 0.3665, Val Acc: 0.8457


Training:   0%|          | 0/626 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 10: Train Loss: 0.3392, Val Loss: 0.2953, Val Acc: 0.8857
Early stopping triggered


Evaluating:   0%|          | 0/157 [00:00<?, ?it/s]


Training History:
----------------------------------------
Epoch  1: Loss: 0.3676 Val Loss: 0.33917253628183325 Acc: 0.8569144684252598
Epoch  2: Loss: 0.3671 Val Loss: 0.3111643418432421 Acc: 0.8808952837729817
Epoch  3: Loss: 0.3611 Val Loss: 0.3327703738146147 Acc: 0.8617106314948042
Epoch  4: Loss: 0.3592 Val Loss: 0.292180992437491 Acc: 0.8832933653077538
Epoch  5: Loss: 0.3536 Val Loss: 0.33751254050642443 Acc: 0.8689048760991207
Epoch  6: Loss: 0.3489 Val Loss: 0.3285818680242938 Acc: 0.8577138289368506
Epoch  7: Loss: 0.3518 Val Loss: 0.28778177863995363 Acc: 0.8896882494004796
Epoch  8: Loss: 0.3490 Val Loss: 0.3012047198476495 Acc: 0.8928856914468425
Epoch  9: Loss: 0.3341 Val Loss: 0.36651169030577135 Acc: 0.8457234212629896
Epoch 10: Loss: 0.3392 Val Loss: 0.29530318508482284 Acc: 0.885691446842526

Average Metrics Across Folds:
{
  "val_loss": 0.326322738486965,
  "accuracy": 0.8732869295617819,
  "f1": 0.8728563493103503,
  "precision": 0.8765682022717295,
  "recall": 0.

Loading audio files:   0%|          | 0/671 [00:00<?, ?it/s]

Created dataset with 671 examples

Generating performance report...


Evaluating:   0%|          | 0/84 [00:00<?, ?it/s]


Confusion Matrix:
----------------------------------------
              crying screaming    normal

    crying       197        19         7
 screaming        43       172         9
    normal         9         3       212
----------------------------------------

Performance Metrics:
----------------------------------------
test_loss: 0.3484
accuracy: 0.8659
f1: 0.8653
precision: 0.8693
recall: 0.8659
