In [None]:
import os
import csv
import IPython
from math import sqrt

#import opensmile
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, make_scorer
from catboost.utils import eval_metric
from catboost import CatBoostRegressor
from sklearn.model_selection import GridSearchCV, KFold, RandomizedSearchCV

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
sns.set_palette('husl')

### Извлечение более 6 тысяч Low-level descriptors из аудиозаписей

In [None]:
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.ComParE_2016,
    feature_level=opensmile.FeatureLevel.Functionals,
)
song_df = smile.process_file('/content/A003.mp3')
columns = song_df.columns

In [None]:
def exctract_features(columns):
    df = pd.DataFrame(columns=['song_name']+list(columns)+['arousal', 'valence'])
    annotation = pd.read_csv('/content/gdrive/MyDrive/PMEmo/annotation.csv')
    
    for song in sorted(os.listdir('/content/gdrive/MyDrive/PMEmo/music')):
        if song.split('.')[0] in list(df['song_name'].values):
            print(f"{song.split('.')[0]} уже есть в данных")
            continue
        temp_df = smile.process_file(f'/content/gdrive/MyDrive/PMEmo/music/{song}')
        temp_df = temp_df.reset_index()
        temp_df = temp_df.rename(columns={'file': 'song_name'})
        temp_df.drop(columns=['start', 'end'], inplace=True)
        temp_df['song_name'] = temp_df['song_name'].apply(lambda x: x.split('/')[6].split('.')[0])
        arousal = annotation[annotation['song_id']==temp_df['song_name'].values[0]]['arousal'].values[0]
        valence = annotation[annotation['song_id']==temp_df['song_name'].values[0]]['valence'].values[0]
        temp_df['arousal'] = arousal
        temp_df['valence'] = valence
        df = pd.concat([df, temp_df], axis=0)
    

### Подготовка данных

In [None]:
df = pd.read_csv('/Users/tanchik/Desktop/project_res/notebooks/low_level_features.csv')

In [None]:
df.describe()

Unnamed: 0,audspec_lengthL1norm_sma_range,audspec_lengthL1norm_sma_maxPos,audspec_lengthL1norm_sma_minPos,audspec_lengthL1norm_sma_quartile1,audspec_lengthL1norm_sma_quartile2,audspec_lengthL1norm_sma_quartile3,audspec_lengthL1norm_sma_iqr1-2,audspec_lengthL1norm_sma_iqr2-3,audspec_lengthL1norm_sma_iqr1-3,audspec_lengthL1norm_sma_percentile1.0,...,mfcc_sma_de[14]_peakMeanAbs,mfcc_sma_de[14]_peakMeanMeanDist,mfcc_sma_de[14]_peakMeanRel,mfcc_sma_de[14]_minRangeRel,mfcc_sma_de[14]_meanRisingSlope,mfcc_sma_de[14]_stddevRisingSlope,mfcc_sma_de[14]_meanFallingSlope,mfcc_sma_de[14]_stddevFallingSlope,arousal,valence
count,18212.0,18212.0,18212.0,18212.0,18212.0,18212.0,18212.0,18212.0,18212.0,18212.0,...,18212.0,18212.0,18212.0,18212.0,18212.0,18212.0,18212.0,18212.0,18212.0,18212.0
mean,3.486758,0.470396,0.528729,3.321246,3.878902,4.422869,0.557655,0.543968,1.101623,2.541901,...,2.12028,2.11922,0.187541,0.489067,99.113189,45.70619,91.002601,46.511071,0.632719,0.606813
std,1.632748,0.304583,0.314771,1.467322,1.494135,1.495864,0.384867,0.369319,0.623077,1.353937,...,0.567953,0.556202,15.609393,0.115057,21.920489,15.852692,22.51223,15.345515,0.176778,0.160025
min,0.0,0.0,0.0,0.001034,0.001034,0.001034,0.0,0.0,0.0,0.001034,...,0.0,0.0,-20.0,0.0,0.0,0.0,0.0,0.0,0.097725,0.107512
25%,2.330396,0.193548,0.247312,2.163333,2.813091,3.451104,0.284032,0.290107,0.647101,1.422364,...,1.727306,1.733308,-15.473883,0.409227,84.092551,34.670298,75.603138,35.969055,0.514067,0.500014
50%,3.268075,0.462366,0.537634,3.350773,3.99758,4.597629,0.466273,0.455667,0.995874,2.422369,...,2.045469,2.04268,-2.779635,0.486257,96.931168,43.355446,89.343377,44.406956,0.657797,0.630831
75%,4.478256,0.741935,0.817204,4.462605,5.026137,5.539941,0.734821,0.699023,1.438475,3.564156,...,2.432932,2.419441,15.695458,0.565662,111.827034,54.13343,104.422391,54.473214,0.777056,0.735547
max,10.963795,0.989247,0.989247,7.457852,8.212877,8.547154,3.847158,3.903276,5.220629,7.094428,...,5.682448,5.678403,20.0,0.8947,221.27661,161.57304,224.80377,182.27615,0.968825,0.927344


In [None]:
y_arousal = df['arousal']
y_valence = df['valence']
x = df.drop(columns=['arousal', 'valence', 'song_name'])

In [None]:
x_train_arousal, x_test_arousal, y_train_arousal, y_test_arousal = train_test_split(x, y_arousal, test_size=0.25,
                                                      random_state=42)
x_train_valence, x_test_valence, y_train_valence, y_test_valence = train_test_split(x, y_valence, test_size=0.25,
                                                      random_state=42)

In [None]:
ss_arousal = MinMaxScaler()
x_train_arousal = pd.DataFrame(ss_arousal.fit_transform(x_train_arousal), columns=x_train_arousal.columns, index=x_train_arousal.index)
x_test_arousal = pd.DataFrame(ss_arousal.transform(x_test_arousal), columns=x_test_arousal.columns, index=x_test_arousal.index)

ss_valence = MinMaxScaler()
x_train_valence = pd.DataFrame(ss_valence.fit_transform(x_train_valence), columns=x_train_valence.columns, index=x_train_valence.index)
x_test_valence = pd.DataFrame(ss_valence.transform(x_test_valence), columns=x_test_valence.columns, index=x_test_valence.index)

In [None]:
K = 5
kf = KFold(n_splits = K, random_state = 42, shuffle = True)


def cat_boost_cross_val(model, x, y, kf):
    scores = []
    metrics = []
    for counter, (train_index, test_index) in enumerate(kf.split(x)):
        print(f'Folder {counter}')
        x_train, x_valid = x.iloc[train_index,:], x.iloc[test_index,:]
        y_train, y_valid = y.iloc[train_index], y.iloc[test_index]
        model.fit(x_train, y_train,
            eval_set=(x_valid, y_valid),
            verbose_eval=1000,
            use_best_model=True)
        y_pred = model.predict(x_valid)
        metric = eval_metric(y_valid, y_pred, 'MAE')
        metrics.append(metric)
        scores.append(model.get_best_score())
        print(f'best score: {model.get_best_score()}')
        print(f'best metric: {metric}')
    print(f'result mean score: {np.mean(scores)}')
    print(f'result mean metric: {np.mean(metrics)}')
    return np.mean(scores)


def rmse(y, y_pred):
    return sqrt(mean_squared_error(y, y_pred))
scorer = {'rmse': make_scorer(rmse, greater_is_better=False)}

In [None]:
# CatBoostRegressor grid_search
model_cat_arousal = CatBoostRegressor(iterations=100, random_seed=42,
                              loss_function='RMSE')

parameters = {'depth': [4, 6, 8, 10],
              'learning_rate' : [0.01, 0.05, 0.1]}

model_cat_arousal.grid_search(parameters, x_train_arousal, y_train_arousal, plot=True)

# CatBoostRegressor cross val
model_cat_arousal = CatBoostRegressor(depth=8, iterations=500, learning_rate=0.1, random_seed=42,
                              loss_function='RMSE', task_type="CPU")

score = cat_boost_cross_val(model_cat_arousal, x_train_arousal, y_train_arousal, kf)

# validation on a delayed test data
model_cat_arousal.fit(x_train_arousal, y_train_arousal,
         eval_set=(x_test_arousal, y_test_arousal),
         verbose_eval=1000,
         use_best_model=True
         )
y_pred = model_cat_arousal.predict(x_test_arousal)
metric = eval_metric(y_test_arousal, y_pred, 'MAE')

In [None]:
model_cat_arousal = CatBoostRegressor(depth=8, iterations=200, learning_rate=0.1, random_seed=42,
                              loss_function='RMSE', task_type="CPU")

model_cat_arousal.fit(x_train_arousal, y_train_arousal,
         eval_set=(x_test_arousal, y_test_arousal),
         verbose_eval=1000,
         use_best_model=True)
y_pred = model_cat_arousal.predict(x_test_arousal)
metric = eval_metric(y_test_arousal, y_pred, 'MAE')
print(metric)
model_cat_arousal.save_model("cat_boost_regressor_arousal.cbm")

0:	learn: 0.1683639	test: 0.1699252	best: 0.1699252 (0)	total: 2.33s	remaining: 7m 42s
199:	learn: 0.0720915	test: 0.1080772	best: 0.1080772 (199)	total: 5m	remaining: 0us

bestTest = 0.1080772472
bestIteration = 199

[0.08504634360525275]


In [None]:
model_cat_valence = CatBoostRegressor(depth=8, iterations=200, learning_rate=0.1, random_seed=42,
                              loss_function='RMSE', task_type="CPU")

model_cat_valence.fit(x_train_valence, y_train_valence,
         eval_set=(x_test_valence, y_test_valence),
         verbose_eval=1000,
         use_best_model=True)
y_pred = model_cat_valence.predict(x_test_valence)
metric = eval_metric(y_test_valence, y_pred, 'MAE')
print(metric)
model_cat_valence.save_model("cat_boost_regressor_valence.cbm")

0:	learn: 0.1540388	test: 0.1558894	best: 0.1558894 (0)	total: 5.96s	remaining: 19m 46s
199:	learn: 0.0729553	test: 0.1060691	best: 0.1060691 (199)	total: 4m 34s	remaining: 0us

bestTest = 0.1060690837
bestIteration = 199

[0.08360578855032898]


### Make pipeline

In [None]:
model = CatBoostRegressor(depth=8, iterations=200, learning_rate=0.1, random_seed=42,
                              loss_function='RMSE', task_type="CPU")
model.load_model('/content/cat_boost_regressor_arousal.cbm')

smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.ComParE_2016,
    feature_level=opensmile.FeatureLevel.Functionals,
  )

In [None]:
def music_emotion_pipeline(music_dir, music):
  temp_df = smile.process_file(os.path.join(music_dir, music))
  temp_df = temp_df.reset_index()
  temp_df.drop(columns=['start', 'end', 'file'], inplace=True)
  res = model.predict(temp_df.values[0])
  return res