In [None]:
# Import libraries
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from tqdm.notebook import tqdm

# Add src to path
sys.path.append('../../')
from src.models.cnn import AudioCNN
from src.models.lstm import AudioLSTM
from src.utils.config import load_config

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Set random seeds
torch.manual_seed(42)
np.random.seed(42)

## 1. Load Configuration and Data

In [None]:
# Load config
config = load_config('../../configs/config.yaml')

# Load preprocessed features
processed_dir = '../../' + config['data']['processed_dir']
features = np.load(os.path.join(processed_dir, 'features.npy'))
labels = np.load(os.path.join(processed_dir, 'labels.npy'))

print(f"Features shape: {features.shape}")
print(f"Labels shape: {labels.shape}")
print(f"Number of classes: {len(np.unique(labels))}")
print(f"\nClass distribution:")
unique, counts = np.unique(labels, return_counts=True)
for label, count in zip(unique, counts):
    print(f"  Class {label}: {count} samples")

## 2. Data Splitting and Preparation

In [None]:
# Split data
n_samples = len(features)
indices = np.random.permutation(n_samples)

train_split = int(n_samples * config['data']['train_split'])
val_split = int(n_samples * (config['data']['train_split'] + config['data']['val_split']))

train_idx = indices[:train_split]
val_idx = indices[train_split:val_split]
test_idx = indices[val_split:]

X_train, y_train = features[train_idx], labels[train_idx]
X_val, y_val = features[val_idx], labels[val_idx]
X_test, y_test = features[test_idx], labels[test_idx]

print(f"Train set: {len(X_train)} samples")
print(f"Validation set: {len(X_val)} samples")
print(f"Test set: {len(X_test)} samples")

In [None]:
# Prepare data for CNN (add channel dimension)
X_train_cnn = np.expand_dims(X_train, axis=1)  # (N, 1, H, W)
X_val_cnn = np.expand_dims(X_val, axis=1)
X_test_cnn = np.expand_dims(X_test, axis=1)

print(f"CNN input shape: {X_train_cnn.shape}")

# Create DataLoaders
batch_size = config['training']['batch_size']

train_dataset = TensorDataset(torch.FloatTensor(X_train_cnn), torch.LongTensor(y_train))
val_dataset = TensorDataset(torch.FloatTensor(X_val_cnn), torch.LongTensor(y_val))
test_dataset = TensorDataset(torch.FloatTensor(X_test_cnn), torch.LongTensor(y_test))

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"✓ Data loaders created with batch size: {batch_size}")

## 3. Build Model

In [None]:
# Create model
model = AudioCNN(
    num_classes=config['model']['num_classes'],
    input_channels=1,
    dropout=config['model']['dropout']
).to(device)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Model Architecture: {config['model']['architecture'].upper()}")
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"\nModel summary:")
print(model)

## 4. Training Setup

In [None]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=config['training']['learning_rate'])
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, factor=0.5)

# Training history
history = {
    'train_loss': [],
    'train_acc': [],
    'val_loss': [],
    'val_acc': []
}

best_val_acc = 0
epochs_no_improve = 0
patience = config['training']['early_stopping']['patience']

print(f"Training configuration:")
print(f"  Epochs: {config['training']['epochs']}")
print(f"  Learning rate: {config['training']['learning_rate']}")
print(f"  Optimizer: {config['training']['optimizer']}")
print(f"  Early stopping patience: {patience}")

## 5. Training Loop

In [None]:
def train_epoch(model, loader, criterion, optimizer, device):
    """Train for one epoch."""
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for data, target in tqdm(loader, desc='Training', leave=False):
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = output.max(1)
        total += target.size(0)
        correct += predicted.eq(target).sum().item()
    
    return total_loss / len(loader), 100. * correct / total

def validate(model, loader, criterion, device):
    """Validate the model."""
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data, target in tqdm(loader, desc='Validation', leave=False):
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            
            total_loss += loss.item()
            _, predicted = output.max(1)
            total += target.size(0)
            correct += predicted.eq(target).sum().item()
    
    return total_loss / len(loader), 100. * correct / total

In [None]:
# Training loop
num_epochs = 50  # Can adjust this

print("\nStarting training...\n")

for epoch in range(num_epochs):
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    
    # Update scheduler
    scheduler.step(val_loss)
    
    # Save history
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    
    # Print progress
    print(f"Epoch {epoch+1}/{num_epochs}:")
    print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
    print(f"  Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
    
    # Early stopping
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        epochs_no_improve = 0
        # Save best model
        torch.save(model.state_dict(), '../../models/saved_models/best_model_notebook.pth')
        print(f"  ✓ New best model saved! (Val Acc: {val_acc:.2f}%)")
    else:
        epochs_no_improve += 1
    
    if epochs_no_improve >= patience:
        print(f"\nEarly stopping triggered after {epoch+1} epochs")
        break
    print()

print(f"\n✓ Training complete! Best validation accuracy: {best_val_acc:.2f}%")

## 6. Visualize Training History

In [None]:
# Plot training curves
fig, axes = plt.subplots(1, 2, figsize=(16, 5))

# Loss
axes[0].plot(history['train_loss'], label='Train Loss', linewidth=2, marker='o', markersize=4)
axes[0].plot(history['val_loss'], label='Val Loss', linewidth=2, marker='s', markersize=4)
axes[0].set_xlabel('Epoch', fontsize=12)
axes[0].set_ylabel('Loss', fontsize=12)
axes[0].set_title('Training and Validation Loss', fontsize=14, fontweight='bold')
axes[0].legend(fontsize=11)
axes[0].grid(alpha=0.3)

# Accuracy
axes[1].plot(history['train_acc'], label='Train Accuracy', linewidth=2, marker='o', markersize=4)
axes[1].plot(history['val_acc'], label='Val Accuracy', linewidth=2, marker='s', markersize=4)
axes[1].set_xlabel('Epoch', fontsize=12)
axes[1].set_ylabel('Accuracy (%)', fontsize=12)
axes[1].set_title('Training and Validation Accuracy', fontsize=14, fontweight='bold')
axes[1].legend(fontsize=11)
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.savefig('../../outputs/training_curves.png', dpi=300, bbox_inches='tight')
plt.show()

## 7. Test Set Evaluation

In [None]:
# Load best model
model.load_state_dict(torch.load('../../models/saved_models/best_model_notebook.pth'))

# Evaluate on test set
test_loss, test_acc = validate(model, test_loader, criterion, device)

print(f"\n{'='*50}")
print(f"TEST SET RESULTS")
print(f"{'='*50}")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.2f}%")
print(f"{'='*50}\n")

## 8. Confusion Matrix

In [None]:
# Get predictions
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for data, target in test_loader:
        data = data.to(device)
        output = model(data)
        _, predicted = output.max(1)
        
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(target.numpy())

# Compute confusion matrix
cm = confusion_matrix(all_labels, all_preds)

# Animal category names
animal_categories = sorted(config['data']['animal_categories'])

# Plot confusion matrix
plt.figure(figsize=(12, 10))
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='Blues',
            xticklabels=animal_categories, yticklabels=animal_categories,
            cbar_kws={'label': 'Proportion'})
plt.title('Confusion Matrix (Normalized)', fontsize=16, fontweight='bold', pad=20)
plt.ylabel('True Label', fontsize=13)
plt.xlabel('Predicted Label', fontsize=13)
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.savefig('../../outputs/confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

## 9. Per-Class Performance

In [None]:
# Calculate per-class metrics
per_class_acc = cm.diagonal() / cm.sum(axis=1)

# Create DataFrame
perf_df = pd.DataFrame({
    'Animal': animal_categories,
    'Accuracy': per_class_acc,
    'Correct': cm.diagonal(),
    'Total': cm.sum(axis=1)
})

perf_df = perf_df.sort_values('Accuracy', ascending=False)
print("\nPer-Class Performance:")
print(perf_df.to_string(index=False))

# Visualize
plt.figure(figsize=(12, 6))
bars = plt.bar(range(len(animal_categories)), 
               [per_class_acc[i] for i in range(len(animal_categories))],
               color='steelblue', alpha=0.8)

# Color bars by performance
for i, bar in enumerate(bars):
    if per_class_acc[i] >= 0.8:
        bar.set_color('green')
    elif per_class_acc[i] >= 0.6:
        bar.set_color('orange')
    else:
        bar.set_color('red')

plt.xlabel('Animal Category', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.title('Per-Class Classification Accuracy', fontsize=14, fontweight='bold')
plt.xticks(range(len(animal_categories)), animal_categories, rotation=45, ha='right')
plt.ylim([0, 1])
plt.axhline(y=0.8, color='green', linestyle='--', alpha=0.5, label='Good (≥80%)')
plt.axhline(y=0.6, color='orange', linestyle='--', alpha=0.5, label='Fair (≥60%)')
plt.grid(axis='y', alpha=0.3)
plt.legend()
plt.tight_layout()
plt.savefig('../../outputs/per_class_accuracy.png', dpi=300, bbox_inches='tight')
plt.show()

## 10. Classification Report

In [None]:
# Print detailed classification report
print("\n" + "="*60)
print("DETAILED CLASSIFICATION REPORT")
print("="*60)
print(classification_report(all_labels, all_preds, 
                          target_names=animal_categories, 
                          digits=3))

## 11. Model Summary & Next Steps

### Summary:
- **Best Validation Accuracy**: Check above
- **Test Accuracy**: Check above
- **Model**: CNN with mel spectrogram features
- **Best Performing Classes**: Check per-class accuracy
- **Worst Performing Classes**: Check per-class accuracy

### Possible Improvements:
1. **Data Augmentation**: Time shift, pitch shift, add noise
2. **Architecture**: Try LSTM or Transformer models
3. **Features**: Experiment with different feature types (MFCC vs Mel-spec)
4. **Hyperparameters**: Tune learning rate, dropout, batch size
5. **Ensemble**: Combine multiple models
6. **Transfer Learning**: Use pre-trained audio models