In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import utils
from sklearn.calibration import LabelEncoder

In [2]:
tracks = utils.load('data/fma_metadata/tracks.csv')
genres = utils.load('data/fma_metadata/genres.csv')
features = utils.load('data/fma_metadata/features.csv')
small = tracks[tracks['set', 'subset'] <= 'small']

In [3]:
print(tracks.head())

            album                                                          \
         comments        date_created date_released engineer favorites id   
track_id                                                                    
2               0 2008-11-26 01:44:45    2009-01-05      NaN         4  1   
3               0 2008-11-26 01:44:45    2009-01-05      NaN         4  1   
5               0 2008-11-26 01:44:45    2009-01-05      NaN         4  1   
10              0 2008-11-26 01:45:08    2008-02-06      NaN         4  6   
20              0 2008-11-26 01:45:05    2009-01-06      NaN         2  4   

                                                                           \
                                        information listens producer tags   
track_id                                                                    
2                                           <p></p>    6073      NaN   []   
3                                           <p></p>    6073      NaN   []  

In [4]:
# Load the features and metadata
features_df = pd.read_csv('data/fma_metadata/features.csv', index_col=0)
tracks_df = pd.read_csv('data/fma_metadata/tracks.csv', header=[0, 1], index_col=0)

# Flatten the multi-level column names in tracks.csv
tracks_df.columns = ['_'.join(col).strip() if isinstance(col, tuple) else col for col in tracks_df.columns]

# Filter the small dataset (subset)
small_tracks = tracks_df[tracks_df['set_subset'] == 'small']

# Extract relevant columns: track_id, features, and genre
small_tracks = small_tracks[['track_genre_top']].copy()
small_tracks.index = small_tracks.index.astype(int)  # Ensure track_id is an integer

# Merge features with genres
mapping_df = features_df.merge(small_tracks, left_index=True, right_index=True)

# Save the mapping to a new CSV file
mapping_df.to_csv('features_genre_mapping_small.csv', index=True)

print("Mapping of features to genres for the small dataset saved to 'features_genre_mapping_small.csv'.")

  features_df = pd.read_csv('data/fma_metadata/features.csv', index_col=0)


Mapping of features to genres for the small dataset saved to 'features_genre_mapping_small.csv'.


In [5]:
mapping_df.describe()

Unnamed: 0,chroma_cens,chroma_cens.1,chroma_cens.2,chroma_cens.3,chroma_cens.4,chroma_cens.5,chroma_cens.6,chroma_cens.7,chroma_cens.8,chroma_cens.9,...,tonnetz.40,tonnetz.41,zcr,zcr.1,zcr.2,zcr.3,zcr.4,zcr.5,zcr.6,track_genre_top
count,7918.0,7918.0,7918.0,7918.0,7918.0,7918.0,7918.0,7918.0,7918.0,7918.0,...,7918.0,7918.0,7918.0,7918.0,7918.0,7918.0,7918.0,7918.0,7918.0,7918
unique,7901.0,7901.0,7901.0,7900.0,7900.0,7900.0,7900.0,7901.0,7901.0,7901.0,...,7899.0,7900.0,7901.0,1573.0,7900.0,343.0,85.0,7900.0,7897.0,8
top,2.120035,-0.1481,0.687363,1.388522,-0.779828,-1.049356,-1.352663,-1.146229,0.13384,0.494833,...,0.023668,0.022414,101.444832,0.385742,0.040447,0.033691,0.0,5.669625,0.026036,Electronic
freq,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,...,6.0,6.0,6.0,20.0,6.0,99.0,3648.0,6.0,6.0,1000


In [6]:
# Filter relevant features: mfcc_* and spectral_*
# feature_columns = [col for col in mapping_df.columns if col.startswith('mfcc.') or col.startswith('spectral_')]
feature_columns = [col for col in mapping_df.columns]

X = mapping_df.drop(columns=['track_genre_top']).values

# Encode the genre labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(mapping_df['track_genre_top'])  # Labels matrix

# Check the shapes of the features and labels
print(f"Features shape: {X.shape}")
print(f"Labels shape: {y.shape}")

# Optional: Save the label encoding mapping for later use
genre_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Genre mapping:", genre_mapping)

Features shape: (7918, 518)
Labels shape: (7918,)
Genre mapping: {'Electronic': 0, 'Experimental': 1, 'Folk': 2, 'Hip-Hop': 3, 'Instrumental': 4, 'International': 5, 'Pop': 6, 'Rock': 7}


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

In [8]:
# Define a custom Dataset class
class GenreDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# Create Dataset and DataLoader
train_dataset = GenreDataset(X_train, y_train)
test_dataset = GenreDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the neural network
class GenreClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(GenreClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [18]:
# Initialize the model, loss function, and optimizer
input_size = 518  # Number of input features
num_classes = 8   # Number of classes
model = GenreClassifier(input_size, num_classes=8)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train_model(model, train_loader, criterion, optimizer, epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for data, labels in train_loader:
        data, labels = data.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = outputs.max(1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_loss = total_loss / len(train_loader)
    train_acc = correct / total
    return train_loss, train_acc

# Testing loop
def test_model(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for features, labels in test_loader:
            features, labels = features.to(device), labels.to(device)

            # Forward pass
            outputs = model(features)
            loss = criterion(outputs, labels)

            # Track loss and accuracy
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    epoch_loss = running_loss / len(test_loader)
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [19]:
# Training the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs = 20
for epoch in range(num_epochs):
    train_loss, train_acc = train_model(model, train_loader, criterion, optimizer, epochs=10)
    test_loss, test_acc = test_model(model, test_loader, criterion, device)

    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}")
    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}")

# Save the trained model
torch.save(model.state_dict(), 'genre_classifier.pth')
print("Model saved to 'genre_classifier.pth'")

Epoch 1/20
Train Loss: 1.4521, Train Accuracy: 0.4847
Test Loss: 1.2609, Test Accuracy: 0.5612
Epoch 2/20
Train Loss: 1.1181, Train Accuracy: 0.6080
Test Loss: 1.1971, Test Accuracy: 0.5903
Epoch 3/20
Train Loss: 0.9738, Train Accuracy: 0.6514
Test Loss: 1.1885, Test Accuracy: 0.5966
Epoch 4/20
Train Loss: 0.8518, Train Accuracy: 0.7024
Test Loss: 1.2233, Test Accuracy: 0.5896
Epoch 5/20
Train Loss: 0.7585, Train Accuracy: 0.7384
Test Loss: 1.2281, Test Accuracy: 0.6061
Epoch 6/20
Train Loss: 0.6513, Train Accuracy: 0.7772
Test Loss: 1.2678, Test Accuracy: 0.5979
Epoch 7/20
Train Loss: 0.5562, Train Accuracy: 0.8118
Test Loss: 1.3424, Test Accuracy: 0.6048
Epoch 8/20
Train Loss: 0.4712, Train Accuracy: 0.8420
Test Loss: 1.4261, Test Accuracy: 0.6067
Epoch 9/20
Train Loss: 0.3850, Train Accuracy: 0.8735
Test Loss: 1.4868, Test Accuracy: 0.5934
Epoch 10/20
Train Loss: 0.3189, Train Accuracy: 0.9020
Test Loss: 1.6245, Test Accuracy: 0.5953
Epoch 11/20
Train Loss: 0.2362, Train Accuracy: 0

In [20]:
import random

# Function to test the model with 10 random songs
def test_random_songs(model, X, y, metadata_df, device, label_encoder):
    model.eval()  # Set the model to evaluation mode

    # Select 10 random indices from the test dataset
    random_indices = random.sample(range(len(X)), 10)

    # Get the corresponding features, true labels, and metadata
    random_features = X[random_indices]
    true_labels = y[random_indices].numpy()

    # Use the original indices of the test set to retrieve metadata
    metadata = metadata_df.iloc[random_indices]

    # Move features to the appropriate device
    random_features = random_features.to(device)

    # Predict genres
    with torch.no_grad():
        outputs = model(random_features)
        _, predicted_labels = torch.max(outputs, 1)
        predicted_labels = predicted_labels.cpu().numpy()  # Move predictions back to CPU

    # Decode the true and predicted labels
    true_genres = label_encoder.inverse_transform(true_labels)
    predicted_genres = label_encoder.inverse_transform(predicted_labels)

    # Display the results
    print(f"{'Track ID':<10} {'Title':<30} {'True Genre':<15} {'Predicted Genre':<15}")
    print("-" * 70)
    for i in range(len(random_indices)):
        track_id = metadata.index[i]
        title = metadata.iloc[i]['track_title'] if 'track_title' in metadata.columns else "Unknown"
        true_genre = true_genres[i]
        predicted_genre = predicted_genres[i]
        print(f"{track_id:<10} {title:<30} {true_genre:<15} {predicted_genre:<15}")

# Example usage
test_random_songs(model, X_test, y_test, tracks_df, device, label_encoder)

Track ID   Title                          True Genre      Predicted Genre
----------------------------------------------------------------------
1154       Hello Heartstring              Pop             Pop            
416        Smyrna Snow Walk               Rock            Rock           
1082       Nam Nhi-tu                     International   International  
481        Council Bluffs                 Folk            Folk           
891        Your One Mind                  Rock            Rock           
564        The Sugar Society              Electronic      International  
1620       Track 01                       Experimental    Rock           
1525       Scovil                         International   International  
458        Hunt Like Devil 4              Pop             Folk           
1127       Do I Tingle? Up?               Rock            Pop            
