In [1]:
# import librosa, Pytorch, magenta
import librosa
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import pandas as pd
#import magenta
import matplotlib.pyplot as plt
import os

In [None]:


# Define the path to the FMA dataset
fma_dataset_path = './data/fma_small/000'  # Update this path to your local FMA dataset path

# Function to extract features from an audio file
def extract_features(file_path):
    try:
        # Load the audio file
        y, sr = librosa.load(file_path, sr=None)

        # Extract MFCCs
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        mfccs_mean = np.mean(mfccs, axis=1)

        # Extract Chroma Features
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        chroma_mean = np.mean(chroma, axis=1)

        # Extract Spectral Contrast
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
        spectral_contrast_mean = np.mean(spectral_contrast, axis=1)

        # Extract Tempo
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
        tempo = np.array([tempo])  # Convert scalar to 1D array

        # Extract Harmonic Features
        harmonic = librosa.effects.harmonic(y=y)
        harmonic_mean = np.array([np.mean(harmonic)])  # Reduce harmonic to a single mean value

        # Flatten all features into a single 1D array
        features = np.concatenate([
            mfccs_mean, 
            chroma_mean, 
            spectral_contrast_mean, 
            tempo.flatten(),  # Flatten tempo to ensure it's 1D
            harmonic_mean.flatten()  # Flatten harmonic_mean to ensure it's 1D
        ])

        print(f"Extracted features from {file_path}: {features.shape}: {features}")

        return features

    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Function to process all audio files in the dataset
def process_dataset(dataset_path):
    feature_list = []
    file_names = []

    # Iterate through all files in the dataset directory
    for root, _, files in os.walk(dataset_path):
        for file in files:
            if file.endswith('.mp3'):  # Process only MP3 files
                file_path = os.path.join(root, file)
                print(f"Processing {file_path}...")

                # Extract features
                features = extract_features(file_path)
                if features is not None:
                    feature_list.append(features)
                    file_names.append(file)

    # Create a DataFrame to store the features
    feature_columns = [f"mfcc_{i}" for i in range(13)] + \
                      [f"chroma_{i}" for i in range(12)] + \
                      [f"spectral_contrast_{i}" for i in range(7)] + \
                      ["tempo", "harmonic_mean"]

    features_df = pd.DataFrame(feature_list, columns=feature_columns)
    features_df['file_name'] = file_names

    return features_df

# Process the dataset and save the features to a CSV file
features_df = process_dataset(fma_dataset_path)
features_df.to_csv('audio_features.csv', index=False)
print("Feature extraction complete. Saved to audio_features.csv.")

Processing ../fma/data/fma_small/000\000002.mp3...
Extracted features from ../fma/data/fma_small/000\000002.mp3: (34,): [-1.22713936e+02  1.17760078e+02 -4.23341751e+01  3.80610008e+01
 -2.30123215e+01  2.39203796e+01 -1.19448633e+01  1.36872883e+01
 -7.29458094e+00  4.04114103e+00 -4.85660362e+00 -1.66124249e+00
 -6.30609512e+00  7.23267317e-01  5.35243392e-01  4.09128755e-01
  3.80510360e-01  3.33038867e-01  2.71092415e-01  2.87743688e-01
  4.25965250e-01  4.04920131e-01  3.87355268e-01  4.11143363e-01
  5.20614505e-01  1.28846047e+01  1.24419593e+01  1.53645199e+01
  1.72508999e+01  1.70910647e+01  1.73183082e+01  5.67500673e+01
  1.66708669e+02  2.17195033e-04]
Processing ../fma/data/fma_small/000\000005.mp3...
Extracted features from ../fma/data/fma_small/000\000005.mp3: (34,): [-1.62232422e+02  1.31435989e+02 -1.84267788e+01  5.16313705e+01
 -1.54448442e+01  2.38600941e+01 -7.86766338e+00  1.06862335e+01
 -3.14591378e-01  1.28646460e+01 -9.88320541e+00  6.41043186e-02
  7.6622706

In [8]:
# Load the metadata file with multi-level headers
metadata_df = pd.read_csv('./data/fma_metadata/tracks.csv', header=[0, 1])

# Flatten the multi-level column names
metadata_df.columns = ['_'.join(col).strip() if isinstance(col, tuple) else col for col in metadata_df.columns]

# Rename the first column (track_id) for clarity
metadata_df.rename(columns={'Unnamed: 0_level_0_Unnamed: 0_level_1': 'track_id'}, inplace=True)

# Extract relevant columns
metadata_df = metadata_df[['track_id', 'track_genre_top']]
metadata_df['file_name'] = metadata_df['track_id'].apply(lambda x: f"{int(x):06d}.mp3")  # Format track_id to match file names

# Merge features with genres based on file names
merged_df = pd.merge(features_df, metadata_df, on='file_name', how='inner')

# Save the mapping to a new CSV file
merged_df.to_csv('features_genre_mapping.csv', index=False)

print("Mapping of features to genres complete. Saved to features_genre_mapping.csv.")

  metadata_df = pd.read_csv('./data/fma_metadata/tracks.csv', header=[0, 1])


ValueError: invalid literal for int() with base 10: 'track_id'