In [1]:
pip install pandas librosa numpy soundfile


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.1.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import os
import random
import shutil
import pandas as pd
import librosa
import numpy as np
import time  # <-- import time module

##########################################
# Feature Extraction Functions
##########################################

def extract_mfcc_with_timing(waveform, sr=22050, n_mfcc=13):
    mfcc = librosa.feature.mfcc(y=waveform, sr=sr, n_mfcc=n_mfcc)
    return mfcc  # Shape: (n_mfcc, num_frames)

def extract_chroma_with_timing(waveform, sr=22050):
    chroma = librosa.feature.chroma_stft(y=waveform, sr=sr)
    return chroma  # Shape: (12, num_frames)

def extract_spectral_contrast_with_timing(waveform, sr=22050):
    spectral_contrast = librosa.feature.spectral_contrast(y=waveform, sr=sr)
    return spectral_contrast  # Shape: (num_bands, num_frames)

def extract_tonnetz_with_timing(waveform, sr=22050):
    tonnetz = librosa.feature.tonnetz(y=waveform, sr=sr)
    return tonnetz  # Shape: (tonnetz_features, num_frames)

def extract_zero_crossing_rate_with_timing(waveform):
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y=waveform)
    return zero_crossing_rate  # Shape: (1, num_frames)

def extract_features_with_timing(waveform, sr=22050):
    """
    Extract features and stack them so that the final tensor has shape:
    (num_time_steps, total_features)
    """
    mfcc = extract_mfcc_with_timing(waveform, sr)
    chroma = extract_chroma_with_timing(waveform, sr)
    spectral_contrast = extract_spectral_contrast_with_timing(waveform, sr)
    tonnetz = extract_tonnetz_with_timing(waveform, sr)
    zero_crossing = extract_zero_crossing_rate_with_timing(waveform)
    
    # Stack features vertically: shape (total_features, num_frames)
    features = np.vstack([mfcc, chroma, spectral_contrast, tonnetz, zero_crossing])
    
    # Transpose so that shape becomes (num_time_steps, total_features)
    return features.T

##########################################
# Processing and Data Handling
##########################################

def process_audio_to_features(path, sr=22050, timeout=120):
    """
    Load an MP3 file using Librosa and extract features.
    If processing takes longer than 'timeout' seconds, return None.
    """
    start_time = time.time()
    try:
        waveform, _ = librosa.load(path, sr=sr)  # Load full audio (no duration limit)
        features = extract_features_with_timing(waveform, sr)
        elapsed = time.time() - start_time
        if elapsed > timeout:
            print(f"Processing {path} took {elapsed:.2f} seconds; skipping.")
            return None
        return features
    except MemoryError as me:
        print(f"MemoryError processing {path}: {me}")
        return None
    except Exception as e:
        print(f"Error processing {path}: {e}")
        return None

def load_existing_metadata(csv_path):
    """
    Load an existing metadata CSV file.
    If not found, return an empty DataFrame with expected columns.
    """
    expected_columns = ["file_path", "valence", "liveness", "time_signature", "energy",
                        "speechiness", "instrumentalness", "acousticness", "tempo",
                        "loudness", "danceability", "key"]
    if os.path.exists(csv_path):
        return pd.read_csv(csv_path)
    else:
        return pd.DataFrame(columns=expected_columns)

def build_dict_from_csv_pandas(csv_filename, songs_folder, existing_files=set(), file_column="file_path", max_files=200, start_index=0):
    df = pd.read_csv(csv_filename)
    # Only process rows starting from start_index
    df = df.iloc[start_index:]
    result = {}
    total = len(df)
    processed_count = 0

    for i, row in df.iterrows():
        file_name = row[file_column]
        if pd.isna(file_name) or not isinstance(file_name, str):
            print(f"Skipping row {i+1} due to invalid file path: {file_name}")
            continue

        # Replace .npy with .mp3 if needed
        if file_name.lower().endswith('.npy'):
            file_name = file_name[:-4] + ".mp3"

        full_path = os.path.join(songs_folder, file_name)
        base_file = os.path.basename(full_path)

        if base_file in existing_files:
            print(f"Skipping already processed file: {base_file}")
            continue
        if not os.path.exists(full_path):
            print(f"File does not exist: {full_path}")
            continue

        print(f"Processing row {i+1}: {full_path}")
        feature_tensor = process_audio_to_features(full_path)
        if feature_tensor is None:
            print(f"Skipping file due to processing error or timeout: {full_path}")
            continue

        result[full_path] = {
            "metadata": {
                "valence": row["valence"],
                "liveness": row["liveness"],
                "time_signature": row["time_signature"],
                "energy": row["energy"],
                "speechiness": row["speechiness"],
                "instrumentalness": row["instrumentalness"],
                "acousticness": row["acousticness"],
                "tempo": row["tempo"],
                "loudness": row["loudness"],
                "danceability": row["danceability"],
                "key": row["key"]
            },
            "audio_tensor": feature_tensor
        }

        processed_count += 1
        if processed_count >= max_files:
            print(f"Reached maximum limit of {max_files} files.")
            break

    return result

def split_master_dict(data_dict, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    """
    Randomly split the dictionary into training, validation, and testing subsets.
    """
    keys = list(data_dict.keys())
    random.shuffle(keys)
    n = len(keys)
    train_end = int(train_ratio * n)
    val_end = train_end + int(val_ratio * n)

    train_dict = {k: data_dict[k] for k in keys[:train_end]}
    val_dict = {k: data_dict[k] for k in keys[train_end:val_end]}
    test_dict = {k: data_dict[k] for k in keys[val_end:]}

    return train_dict, val_dict, test_dict

def dict_to_df(split_dict):
    """
    Convert the dictionary to a DataFrame.
    Save only the MP3 file name (not the full path) under 'path'.
    """
    data = []
    for full_path, info in split_dict.items():
        entry = {"path": os.path.basename(full_path)}
        entry.update(info["metadata"])
        data.append(entry)
    return pd.DataFrame(data)

def save_tensors_to_npy(split_dict, destination_folder):
    """
    Save each feature tensor from the split as a .npy file.
    """
    os.makedirs(destination_folder, exist_ok=True)
    count = 0
    for path, info in split_dict.items():
        tensor = info["audio_tensor"]
        if tensor is not None:
            base_name = os.path.splitext(os.path.basename(path))[0]
            npy_file = os.path.join(destination_folder, base_name + ".npy")
            try:
                if not os.path.exists(npy_file):
                    np.save(npy_file, tensor)
                    count += 1
            except Exception as e:
                print(f"Error saving .npy for {path}: {e}")
    print(f"Saved {count} .npy files to {destination_folder}")

##########################################
# Main Execution
##########################################

if __name__ == "__main__":
    # Set paths – update these to match your environment or Google Drive structure.
    new_csv_file = "Dataset_cleaned_new_proper.csv"  # CSV with new entries
    songs_folder = r'C:\Users\furla\OneDrive\Desktop\Dataset\songs'  # Folder with original MP3 files

    # Paths for existing metadata CSV files (if used).
    train_csv = "train_metadata.csv"
    val_csv = "val_metadata.csv"
    test_csv = "test_metadata.csv"

    # Load existing metadata if available (to avoid reprocessing already handled files).
    train_df = load_existing_metadata(train_csv)
    val_df = load_existing_metadata(val_csv)
    test_df = load_existing_metadata(test_csv)

    existing_files = set(train_df["path"]).union(set(val_df["path"])).union(set(test_df["path"]))

    # Build master dictionary from the CSV, processing only the first 200 valid files starting from row 201.
    new_master_dict = build_dict_from_csv_pandas(new_csv_file, songs_folder, existing_files, file_column="file_path", max_files=10000, start_index=0)

    if not new_master_dict:
        print("No new files to process.")
        exit(0)

    # Split the data into train, validation, and testing sets.
    new_train_dict, new_val_dict, new_test_dict = split_master_dict(new_master_dict)

    # Save new feature tensors as .npy files for each split.
    save_tensors_to_npy(new_train_dict, r"C:\Users\furla\OneDrive\Desktop\Dataset\tensors_train5")
    save_tensors_to_npy(new_val_dict, r"C:\Users\furla\OneDrive\Desktop\Dataset\tensors_val5")
    save_tensors_to_npy(new_test_dict, r"C:\Users\furla\OneDrive\Desktop\Dataset\tensors_test5")

    # Save metadata CSV files for each split.
    new_train_df = dict_to_df(new_train_dict)
    new_val_df = dict_to_df(new_val_dict)
    new_test_df = dict_to_df(new_test_dict)

    new_train_df.to_csv(r"C:\Users\furla\OneDrive\Desktop\Dataset\train_metadata.csv", index=False)
    new_val_df.to_csv(r"C:\Users\furla\OneDrive\Desktop\Dataset\val_metadata.csv", index=False)
    new_test_df.to_csv(r"C:\Users\furla\OneDrive\Desktop\Dataset\test_metadata.csv", index=False)


Skipping already processed file: Alexandria_A_Hawk_And_A_Hacksaw.mp3
Processing row 1203: C:\Users\furla\OneDrive\Desktop\Dataset\songs\A_Broken_Road_Lined_with_Poplar_Trees_A_Hawk_And_A_Hacksaw.mp3
Processing row 1204: C:\Users\furla\OneDrive\Desktop\Dataset\songs\A_Song_for_Old_People__A_Song_for_Young_People_A_Hawk_And_A_Hacksaw.mp3
Processing row 1205: C:\Users\furla\OneDrive\Desktop\Dataset\songs\The_Shepherd_Dogs_Are_Calling_A_Hawk_And_A_Hacksaw.mp3
Processing row 1206: C:\Users\furla\OneDrive\Desktop\Dataset\songs\Night_Sneaker_A_Hawk_And_A_Hacksaw.mp3
Processing row 1207: C:\Users\furla\OneDrive\Desktop\Dataset\songs\The_Magic_Spring_A_Hawk_And_A_Hacksaw.mp3
Processing row 1208: C:\Users\furla\OneDrive\Desktop\Dataset\songs\The_Sky_Is_Blue_The_Desert_Is_Yellow_A_Hawk_And_A_Hacksaw.mp3
Processing row 1209: C:\Users\furla\OneDrive\Desktop\Dataset\songs\Babayaga_A_Hawk_And_A_Hacksaw.mp3
Processing row 1210: C:\Users\furla\OneDrive\Desktop\Dataset\songs\The_Washing_Bear_A_Hawk_And_

  return pitch_tuning(


Processing row 2605: C:\Users\furla\OneDrive\Desktop\Dataset\songs\Moment_8_Lull.mp3
Processing row 2606: C:\Users\furla\OneDrive\Desktop\Dataset\songs\Moment_9_Lull.mp3
Processing row 2607: C:\Users\furla\OneDrive\Desktop\Dataset\songs\Moment_10_Lull.mp3
Processing row 2608: C:\Users\furla\OneDrive\Desktop\Dataset\songs\Moment_11_Lull.mp3
Processing row 2609: C:\Users\furla\OneDrive\Desktop\Dataset\songs\Moment_12_Lull.mp3
Processing row 2610: C:\Users\furla\OneDrive\Desktop\Dataset\songs\Moment_13_Lull.mp3
Processing row 2611: C:\Users\furla\OneDrive\Desktop\Dataset\songs\Moment_14_Lull.mp3
Processing row 2612: C:\Users\furla\OneDrive\Desktop\Dataset\songs\Moment_15_Lull.mp3
Processing row 2613: C:\Users\furla\OneDrive\Desktop\Dataset\songs\Moment_16_Lull.mp3
Processing row 2614: C:\Users\furla\OneDrive\Desktop\Dataset\songs\Moment_17_Lull.mp3
Processing row 2615: C:\Users\furla\OneDrive\Desktop\Dataset\songs\Moment_18_Lull.mp3
Processing row 2616: C:\Users\furla\OneDrive\Desktop\Dat