In [None]:
import pandas as pd
import pretty_midi
import librosa
import numpy as np
from audio_midi_pipeline import process_files
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, LSTM, GRU, Dense, Flatten, Reshape

# Additional import for setting up the input shape
from tensorflow.keras.layers import Input

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
df = process_files('songs/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_06_Track06_wav.')

In [None]:
labels = df.iloc[:, 513:]
inputs = df.iloc[:, :513]


In [None]:


class SpectrogramDataset(Dataset):
    def __init__(self, inputs_df, labels_df):
        self.inputs = torch.FloatTensor(inputs_df.values)
        self.labels = torch.FloatTensor(labels_df.values)
        
    def __len__(self):
        return len(self.inputs)
    
    def __getitem__(self, idx):
        return self.inputs[idx].view(1, 1, 513), self.labels[idx]  # [channels, time, freq]

class PitchDetectionModel(nn.Module):
    def __init__(self, num_pitches=88):
        super(PitchDetectionModel, self).__init__()
        
        # Reduced number of pooling layers and smaller kernels
        self.conv_layers = nn.Sequential(
            # First conv block
            nn.Conv2d(1, 32, kernel_size=(1, 3), padding=(0, 1)),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d((1, 2)),  # Only pool frequency dimension
            
            # Second conv block
            nn.Conv2d(32, 64, kernel_size=(1, 3), padding=(0, 1)),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d((1, 2)),
            
            # Third conv block without pooling
            nn.Conv2d(64, 128, kernel_size=(1, 3), padding=(0, 1)),
            nn.BatchNorm2d(128),
            nn.ReLU(),
        )
        
        # Fully connected layers
        self.fc_layers = nn.Sequential(
            nn.Flatten(),  # Flatten all dimensions except batch
            nn.Linear(128 * 128, 256),  # Adjust these numbers based on your input size
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_pitches),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        # x shape: [batch, channels, time, freq]
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        if batch_idx % 10 == 0:
            print(f'Batch {batch_idx}, Loss: {loss.item():.4f}')
    
    return total_loss / len(train_loader)

def validate(model, val_loader, criterion, device):
    model.eval()
    total_loss = 0
    
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            total_loss += loss.item()
    
    return total_loss / len(val_loader)

def prepare_training(inputs_df, labels_df, batch_size=32, test_size=0.2, random_state=42):
    inputs_train, inputs_val, labels_train, labels_val = train_test_split(
        inputs_df, labels_df, test_size=test_size, random_state=random_state
    )
    
    train_dataset = SpectrogramDataset(inputs_train, labels_train)
    val_dataset = SpectrogramDataset(inputs_val, labels_val)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    
    return train_loader, val_loader

def main(inputs_df, labels_df):
    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    # Print shapes for debugging
    print(f"Input DataFrame shape: {inputs_df.shape}")
    print(f"Labels DataFrame shape: {labels_df.shape}")
    
    # Initialize model and move to device
    model = PitchDetectionModel(num_pitches=labels_df.shape[1])
    model = model.to(device)
    
    # Create dataloaders
    train_loader, val_loader = prepare_training(inputs_df, labels_df, batch_size=32)
    
    # Initialize loss and optimizer
    criterion = torch.nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    # Training loop
    num_epochs = 30
    best_val_loss = float('inf')
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        train_loss = train_epoch(model, train_loader, criterion, optimizer, device)
        val_loss = validate(model, val_loader, criterion, device)
        
        print(f"Training Loss: {train_loss:.4f}")
        print(f"Validation Loss: {val_loss:.4f}")
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model, 'full_model.pth')
            print("Saved best model!")

# Example usage
if __name__ == "__main__":
    main(inputs, labels)

main(inputs, labels)

In [None]:
import torch
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
import numpy as np
import pandas as pd

def evaluate_model(model, data_loader, device, threshold=0.5):
    model.eval()
    all_predictions = []
    all_targets = []
    
    with torch.no_grad():
        for data, target in data_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            
            # Convert predictions to binary using threshold
            predictions = (outputs > threshold).float()
            
            # Move to CPU and convert to numpy for sklearn metrics
            predictions = predictions.cpu().numpy()
            target = target.cpu().numpy()
            
            all_predictions.append(predictions)
            all_targets.append(target)
    
    # Concatenate all batches
    all_predictions = np.vstack(all_predictions)
    all_targets = np.vstack(all_targets)
    
    # Calculate metrics
    # Per-pitch metrics
    precision, recall, f1, _ = precision_recall_fscore_support(all_targets, all_predictions, average=None)
    
    # Overall metrics
    overall_precision, overall_recall, overall_f1, _ = precision_recall_fscore_support(
        all_targets.flatten(), 
        all_predictions.flatten(), 
        average='binary'
    )
    
    # Calculate accuracy per pitch
    pitch_accuracy = (all_targets == all_predictions).mean(axis=0)
    
    # Calculate overall accuracy
    overall_accuracy = accuracy_score(all_targets.flatten(), all_predictions.flatten())
    
    # Create a DataFrame with all metrics
    metrics_df = pd.DataFrame({
        'Pitch': [f'Pitch_{i}' for i in range(len(precision))],
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1,
        'Accuracy': pitch_accuracy
    })
    
    # Add overall metrics
    overall_metrics = {
        'Overall Accuracy': overall_accuracy,
        'Overall Precision': overall_precision,
        'Overall Recall': overall_recall,
        'Overall F1': overall_f1
    }
    
    return metrics_df, overall_metrics

def print_evaluation_results(metrics_df, overall_metrics):
    print("\n=== Overall Model Performance ===")
    for metric, value in overall_metrics.items():
        print(f"{metric}: {value:.4f}")
    
    print("\n=== Top 5 Best Performing Pitches ===")
    print(metrics_df.nlargest(5, 'F1 Score')[['Pitch', 'Precision', 'Recall', 'F1 Score', 'Accuracy']])
    
    print("\n=== Top 5 Worst Performing Pitches ===")
    print(metrics_df.nsmallest(5, 'F1 Score')[['Pitch', 'Precision', 'Recall', 'F1 Score', 'Accuracy']])
    
    # Calculate performance distribution
    print("\n=== Performance Distribution ===")
    print("\nF1 Score Distribution:")
    print(metrics_df['F1 Score'].describe())

# Usage example:
def evaluate_saved_model(model_path, inputs_df, labels_df, batch_size=32):
    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Initialize model
    model = PitchDetectionModel(num_pitches=labels_df.shape[1])
    model.load_state_dict(torch.load(model_path))
    model = model.to(device)
    
    # Create dataset and dataloader
    dataset = SpectrogramDataset(inputs_df, labels_df)
    data_loader = DataLoader(dataset, batch_size=batch_size)
    
    # Evaluate model
    metrics_df, overall_metrics = evaluate_model(model, data_loader, device)
    
    # Print results
    print_evaluation_results(metrics_df, overall_metrics)
    
    return metrics_df, overall_metrics

# To use this with your saved model:
"""
metrics_df, overall_metrics = evaluate_saved_model(
    model_path='best_pitch_model.pth',
    inputs_df=inputs,
    labels_df=labels
)

# To save the metrics to CSV:
metrics_df.to_csv('pitch_detection_metrics.csv', index=False)
"""

metrics_df, overall_metrics = evaluate_saved_model(
    model_path='best_pitch_model.pth',
    inputs_df=inputs,
    labels_df=labels
)