In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

class GenreClassifier(nn.Module):
    def __init__(self, input_channels=80, ndf=64, num_classes=2):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv1d(input_channels, ndf, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv1d(ndf, ndf * 2, kernel_size=4, stride=2, padding=1),
            nn.InstanceNorm1d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv1d(ndf * 2, ndf * 4, kernel_size=4, stride=2, padding=1),
            nn.InstanceNorm1d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),

            nn.AdaptiveAvgPool1d(1),
        )
        self.fc = nn.Linear(ndf * 4, num_classes)

    def forward(self, x):
        x = self.model(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

# Function to load the classifier model
def load_model(model_path, num_classes, device):
    model = GenreClassifier(input_channels=80, num_classes=num_classes)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()
    return model

# Load mel spectrogram from .npy file
def load_mel_npy(file_path, target_time_steps=2580):
    mel = np.load(file_path)
    mel = torch.tensor(mel).float()

    mel = mel.squeeze(0) 
    mel = mel.squeeze(0) 

    if mel.shape[1] < target_time_steps:
        mel = F.pad(mel, (0, target_time_steps - mel.shape[1]))
    elif mel.shape[1] > target_time_steps:
        mel = mel[:, :target_time_steps]

    return mel.unsqueeze(0)  

# Evaluate a folder of .npy mel spectrograms
def evaluate_mel_folder(folder_path, model, label_map, expected_label_id, device):
    files = [f for f in os.listdir(folder_path) if f.endswith(".npy")]
    correct = 0
    total = len(files)

    for file_name in files:
        mel = load_mel_npy(os.path.join(folder_path, file_name)).to(device)
        print(f"Evaluating file: {file_name}, Mel shape: {mel.shape}")

        with torch.no_grad():
            output = model(mel)
            pred = output.argmax(dim=1).item()

        predicted_label = label_map[pred]
        print(f"Prediction: {predicted_label} (Expected: {label_map[expected_label_id]})")

        if pred == expected_label_id:
            correct += 1

    acc = correct / total if total > 0 else 0.0
    print(f"Evaluation complete. Total: {total} samples, Accuracy: {acc:.4f} ({label_map[expected_label_id]})")

    return acc

if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    label_map = {0: "Pop", 1: "Rock"}
    inv_label_map = {v: k for k, v in label_map.items()}

    # Load the model
    model_path = "best_genre_classifier.pth"
    model = load_model(model_path, num_classes=2, device=device)

    # Directory containing converted .npy files
    converted_mel_dir = "/DATA/music/converted_features"
    target_label = "Rock"

    # Evaluate
    evaluate_mel_folder(
        folder_path=converted_mel_dir,
        model=model,
        label_map=label_map,
        expected_label_id=inv_label_map[target_label],
        device=device
    )
