# Music emotion classifier

## Setup

In [None]:
!pip install opensmile

### Imports

In [2]:
import pandas as pd
import librosa
import opensmile

### Constants

In [5]:
AUDIO_PATH = "../data/processed/audio/"
ANNOTATIONS_PATH = "../data/processed/annotations/annotations averaged per song/song_level/static_annotations_averaged_songs_1_2000.csv"

## Data preparation

### Data loading

In [33]:
dataframe = pd.read_csv(ANNOTATIONS_PATH)

song_id_list = dataframe["song_id"]
targets = dataframe[["valence_mean", "valence_std", "arousal_mean", "arousal_std"]]

### Data transformation

## Feature engineering

### Feature extraction

Librosa features (use this OR openSMILE)

In [36]:
features_list = []
iter = 0

for song_id in song_id_list:
    if iter == 100:
        break
    iter += 1
    waveform, sample_rate = librosa.load(AUDIO_PATH + f"{song_id}.mp3")

    mfcc = librosa.feature.mfcc(y=waveform, sr=sample_rate)
    rolloff = librosa.feature.spectral_rolloff(y=waveform,sr=sample_rate)
    centroid = librosa.feature.spectral_centroid(y=waveform, sr=sample_rate)
    rms = librosa.feature.rms(y=waveform)
    tempo = librosa.feature.tempo(y=waveform,sr=sample_rate)
    onset_env = librosa.onset.onset_strength(y=waveform, sr=sample_rate)
    zcr = librosa.feature.zero_crossing_rate(waveform)
    chromagram = librosa.feature.chroma_stft(y=waveform, sr=sample_rate)
    pitches, magnitudes = librosa.piptrack(y=waveform, sr=sample_rate)
    features_list.append([song_id, mfcc, rolloff, centroid, rms, tempo, onset_env, zcr, chromagram, pitches, magnitudes])

features = pd.DataFrame(
    data=features_list,
    columns=["song_id", "mfcc", "rolloff", "centroid", "rms", "tempo", "onset_env", "zcr", "chromagram", "pitches", "magnitudes"]
)

openSMILE

In [None]:
import opensmile

def get_matched_smile(audio, valence, arousal):
    smile = opensmile.Smile(
        feature_set=opensmile.FeatureSet.emobase,
        feature_level=opensmile.FeatureLevel.Functionals,
    )

    all_smiles = [] # list of smile features for each clip
    iters = 0
    for file in audio:
        iters += 1
        print(str(iters) + "/" + str(len(audio)))
        
        smile_feats = smile.process_file('wav_clips/' + file)
        all_smiles.append(smile_feats)

    df = pd.DataFrame({'features': all_smiles, 'valence': valence, 'arousal': arousal})
    
    return df

matched_smile_df = get_matched_smile(audio_clips, valence, arousal)

# Optionally save the matched_midi_df DataFrame as a pickle file    
with lzma.open("matched_smile.xz", "wb") as f:
    pickle.dump(matched_smile_df, f)

### Feature selection

## Model

### Training

### Testing

### Validation

## Results

### Metrics

### Visualization