# Feature extraction
Input: audio files
Output: metadata including content-derived metadata


In [None]:
import librosa
import pandas as pd
import os
import numpy as np

# In the preprocess module, I've declared most of my functions.
import preprocess
import importlib
importlib.reload(preprocess)

pd.set_option('display.float_format', lambda x: f'{x:.3f}')

## Timbre extraction

In [None]:
# Define filepaths
metadata_filepath = '../dataset/SpotifyAudioFeaturesApril2019_scraped_selection.csv'
mp3_folder = '../user_evaluation_app/static/mp3_previews'
mp3_files = [os.path.join(mp3_folder, file) for file in os.listdir(mp3_folder) if file.endswith('.mp3')]
mp3_ids = [file for file in os.listdir(mp3_folder) if file.endswith('.mp3')]
# remove .mp3 extension
mp3_ids = [file[:-4] for file in mp3_ids]

data_folder = 'clmr_predictions'


In [None]:
clmr_predictions = preprocess.load_dataframe_if_exists_else_process_and_save(data_folder, mp3_files, preprocess.process_audio_files_CLMR, preprocess.save_dataframe_to_csv)
clmr_predictions

In [None]:
# Load metadata
metadata = pd.read_csv(metadata_filepath)
metadata

## Energy/arousal extraction

In [None]:
n_files = len(mp3_ids)

rms_energy_values = np.zeros(n_files)
contrast_values = np.zeros(n_files)
dynamic_range_values = np.zeros(n_files)

for i, id in enumerate(mp3_ids):
    y, sr = librosa.load(f'{mp3_folder}/{id}.mp3')
    rms_energy = librosa.feature.rms(y=y).mean()
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr).mean(axis=1)
    contrast_avg = np.mean(spectral_contrast)
    rms_energy_frames = librosa.feature.rms(y=y)
    dynamic_range = np.max(rms_energy_frames) - np.min(rms_energy_frames)
    spotify_energy = metadata.loc[metadata['track_id'] == id, 'energy'].values[0]

    # Store results
    rms_energy_values[i] = rms_energy
    contrast_values[i] = contrast_avg
    dynamic_range_values[i] = dynamic_range

# Create a DataFrame
energy_features_unscaled = pd.DataFrame({
    'track_id' : mp3_ids,
    'RMS Energy': rms_energy_values,
    'Spectral Contrast': contrast_values,
    'Dynamic Range': dynamic_range_values,
})


In [None]:
energy_features_unscaled.describe()

Normalize audio features to a scale from 0 to 1 (or 1 to 0, in the case of spectral contrast) but keep the original values for comparison.

In [None]:
# Manual scaling
energy_features_scaled = energy_features_unscaled.copy()
energy_features_scaled['RMS Energy Scaled'] = (energy_features_scaled['RMS Energy'] - energy_features_scaled['RMS Energy'].min()) / (energy_features_scaled['RMS Energy'].max() - energy_features_scaled['RMS Energy'].min())  
energy_features_scaled['Dynamic Range Scaled'] = (energy_features_scaled['Dynamic Range'] - energy_features_scaled['Dynamic Range'].min()) / (energy_features_scaled['Dynamic Range'].max() - energy_features_scaled['Dynamic Range'].min())  
energy_features_scaled['Spectral Contrast Scaled'] = 1 - (energy_features_scaled['Spectral Contrast'] - energy_features_scaled['Spectral Contrast'].min()) / (energy_features_scaled['Spectral Contrast'].max() - energy_features_scaled['Spectral Contrast'].min())

energy_features_scaled.drop(columns=['RMS Energy', 'Spectral Contrast', 'Dynamic Range'], inplace=True)
energy_features_scaled.describe()

In [None]:
merged_df_energy = pd.merge(metadata, energy_features_scaled, on='track_id')
# Display the audio features and spotify energy
for feature in ['RMS Energy Scaled', 'Spectral Contrast Scaled', 'Dynamic Range Scaled']:
    correlation = merged_df_energy['energy'].corr(merged_df_energy[feature])
    print(f"Correlation between Spotify Energy and {feature}: {correlation}")
    if abs(correlation) > 0.7:
        print(f"Very Strong correlation between Spotify Energy and {feature}")
    elif abs(correlation) > 0.5:
        print(f"Strong correlation between Spotify Energy and {feature}")
    elif abs(correlation) > 0.2:
        print(f"Moderate correlation between Spotify Energy and {feature}")
    else:
        print(f"Weak correlation between Spotify Energy and {feature}")
    print("--------------------\n\n\n")

In [None]:
energy_features_scaled_by_factor = energy_features_scaled.copy()
energy_features_scaled_by_factor['RMS Energy Scaled'] = energy_features_scaled_by_factor['RMS Energy Scaled'] * 0.78
energy_features_scaled_by_factor['Dynamic Range Scaled'] = energy_features_scaled_by_factor['Dynamic Range Scaled'] * 0.54
energy_features_scaled_by_factor['Spectral Contrast Scaled'] = energy_features_scaled_by_factor['Spectral Contrast Scaled'] * 0.78

In [None]:
energy_feature_summed_unscaled = pd.DataFrame({
    'track_id': energy_features_scaled_by_factor['track_id']
})
energy_feature_summed_unscaled['new_energy'] = energy_features_scaled_by_factor[['RMS Energy Scaled', 'Dynamic Range Scaled', 'Spectral Contrast Scaled']].sum(axis=1)

energy_feature_summed_unscaled

In [None]:
energy_feature_summed_scaled = energy_feature_summed_unscaled.copy()
energy_feature_summed_scaled['new_energy'] = (energy_feature_summed_scaled['new_energy'] - energy_feature_summed_scaled['new_energy'].min()) / (energy_feature_summed_scaled['new_energy'].max() - energy_feature_summed_scaled['new_energy'].min())  

energy_feature_summed_scaled.describe()

In [None]:
merged_df_new_energy = pd.merge(metadata, energy_feature_summed_scaled, on='track_id')
# Display the audio features and spotify energy
for feature in ['new_energy']:
    correlation = merged_df_new_energy['energy'].corr(merged_df_new_energy[feature])
    print(f"Correlation between Spotify Energy and {feature}: {correlation}")
    if abs(correlation) > 0.7:
        print(f"Very Strong correlation between Spotify Energy and {feature}")
    elif abs(correlation) > 0.5:
        print(f"Strong correlation between Spotify Energy and {feature}")
    elif abs(correlation) > 0.2:
        print(f"Moderate correlation between Spotify Energy and {feature}")
    else:
        print(f"Weak correlation between Spotify Energy and {feature}")
    print("--------------------\n\n\n")

In [None]:
new_energy_df = energy_feature_summed_scaled.copy()
new_energy_df['energy'] = new_energy_df['new_energy']
new_energy_df.drop(columns=['new_energy'], inplace=True)


# Merge data

In [None]:
all_metadata_folder = 'audio_features'

In [None]:
# Add prefixes to columns
metadata_prefixed = metadata.add_prefix('spotify ')
clmr_predictions_prefixed = clmr_predictions.add_prefix('MTT ')

# Rename 'track_id' columns back to original names for merging
metadata_prefixed = metadata_prefixed.rename(columns={'spotify track_id': 'track_id'})
clmr_predictions_prefixed = clmr_predictions_prefixed.rename(columns={'MTT track_id': 'track_id'})

# Merge the DataFrames on 'track_id'
merged_df = pd.merge(metadata_prefixed, clmr_predictions_prefixed, on="track_id")

merged_df = pd.merge(merged_df, new_energy_df, on='track_id')

# Set 'track_id' as the index of the merged dataframe
merged_df.set_index('track_id', inplace=True)

columns_to_remove = ['spotify acousticness', 'spotify danceability',
       'spotify instrumentalness', 'spotify key', 'spotify liveness',
       'spotify loudness', 'spotify mode', 'spotify speechiness',
       'spotify tempo', 'spotify time_signature', 'spotify popularity', 
                     'spotify energy', 'spotify duration_ms']

merged_df.drop(columns=columns_to_remove, inplace=True)

duplicates = merged_df.index.duplicated()

# Keep the first occurrence and remove subsequent duplicates
if duplicates.any():
    merged_df = merged_df[~duplicates]

merged_df

In [None]:
merged_df_loc = preprocess.save_dataframe_to_csv(merged_df, all_metadata_folder)

In [None]:
# load with
# pd.read_csv(merged_df_loc, index_col='track_id')