# Music emotion classifier

## Setup

### Imports

In [None]:
import librosa
import pandas as pd
import numpy as np
from sklearn import svm

### Constants

In [None]:
AUDIO_PATH = "../data/processed/wav/"
ANNOTATIONS_PATH = "../data/processed/annotations/annotations averaged per song/song_level/static_annotations_averaged_songs_1_2000.csv"

## Data preparation

### Data loading

In [None]:
dataframe = pd.read_csv(ANNOTATIONS_PATH)

audio_clips = dataframe["song_id"]
targets = dataframe[["valence_mean", "arousal_mean"]]

print(audio_clips)
print(targets['valence_mean'], targets['arousal_mean'])

### Data transformation

## Feature engineering

### Feature extraction

Librosa features (use this OR openSMILE)

In [None]:
features_list = []
iter = 0

for song_id in song_id_list:
    if iter == 100:
        break
    iter += 1
    waveform, sample_rate = librosa.load(AUDIO_PATH + f"{song_id}.mp3")

    mfcc = librosa.feature.mfcc(y=waveform, sr=sample_rate)
    rolloff = librosa.feature.spectral_rolloff(y=waveform,sr=sample_rate)
    centroid = librosa.feature.spectral_centroid(y=waveform, sr=sample_rate)
    rms = librosa.feature.rms(y=waveform)
    tempo = librosa.feature.tempo(y=waveform,sr=sample_rate)
    onset_env = librosa.onset.onset_strength(y=waveform, sr=sample_rate)
    zcr = librosa.feature.zero_crossing_rate(waveform)
    chromagram = librosa.feature.chroma_stft(y=waveform, sr=sample_rate)
    pitches, magnitudes = librosa.piptrack(y=waveform, sr=sample_rate)
    features_list.extend([mfcc, rolloff, centroid, rms, tempo, onset_env, zcr, chromagram, pitches, magnitudes])

features = pd.DataFrame(
    data=features_list,
    columns=["mfcc", "rolloff", "centroid", "rms", "tempo", "onset_env", "zcr", "chromagram", "pitches", "magnitudes"]
)

openSMILE

In [None]:
def get_matched_smile(audio, targets):
    smile = opensmile.Smile(
        feature_set=opensmile.FeatureSet.emobase,
        feature_level=opensmile.FeatureLevel.Functionals,
    )
    all_smiles = [] # list of smile features for each clip
    iters = 0
    for file in audio:
        iters += 1
        print(str(iters) + "/" + str(len(audio)))
        
        # get smile features
        filepath = AUDIO_PATH + str(file) + ".wav"
        smile_feats = smile.process_file(filepath)
        # convert from df to list
        smile_feats = smile_feats.values.tolist()
        # convert from 2d list to 1d list
        smile_feats = sum(smile_feats, [])
        all_smiles.append(smile_feats)

    df = pd.DataFrame({'features': all_smiles, 'valence': targets['valence_mean'], 'arousal': targets['arousal_mean']})
    
    return df

matched_smile_df = get_matched_smile(audio_clips[:10], targets[:10])

# Optionally save the matched_midi_df DataFrame as a pickle file    
# with lzma.open("matched_smile.xz", "wb") as f:
#     pickle.dump(matched_smile_df, f)

Import from pickle

In [None]:
with lzma.open('matched_smile.xz', 'rb') as f:
    matched_smile_df = pickle.load(f)

print(matched_smile_df.head())
print(matched_smile_df.shape)

### Feature selection

In [None]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression

In [None]:
smile = opensmile.Smile(
        feature_set=opensmile.FeatureSet.emobase,
        feature_level=opensmile.FeatureLevel.Functionals,
    )
feature_names = smile.feature_names

# Convert to numpy array of features
smile_features = matched_smile_df['features']
smile_features = np.stack(smile_features.values)
# Convert to numpy array of labels
valence_labels = matched_smile_df['valence']
arousal_labels = matched_smile_df['arousal']

selector = SelectKBest(score_func=f_regression, k=100) # Choose the 100 most effective features

valence_fit = selector.fit(smile_features, valence_labels)
valence_features = selector.transform(smile_features)
print(selector.get_feature_names_out())

arousal_fit = selector.fit(smile_features, arousal_labels)
arousal_features = selector.transform(smile_features)
print(selector.get_feature_names_out())

print(arousal_features.shape, valence_features.shape)



## Model

### Definition

In [None]:
emotion_classifier = svm.SVC()
emotion_classifier.fit(X, y)

### Training

### Testing

### Validation

## Results

### Metrics

### Visualization