In [29]:
import h5py
import os
import numpy as np
import tensorflow as tf
import pandas as pd
import time

from dotenv import load_dotenv

from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

from my_functions.my_fun import *
from my_functions.my_models import vgg16,vgg16_svm
from my_functions.My_nn import My_nn


In [3]:
load_dotenv()

ASSETS_DIR = os.getenv('ASSETS_DIR')
ANOT_DIR = os.getenv('PMEMO_ANOTATIONS')
stat_anot_file = os.path.join(ANOT_DIR,'static_annotations.csv')
META_DIR = os.getenv('PMEMO_META')
aux_dir = META_DIR.split('.')[0]
META_DIR = f'{aux_dir}_total.csv'


In [4]:
chromas_m = load_spectrograms(filename='chromas_magnitud')

In [5]:
chromas_m.shape

(767, 224, 224, 3)

In [6]:
anot_df = pd.read_csv(stat_anot_file)
anot_df.head(10)

Unnamed: 0,musicId,Arousal(mean),Valence(mean)
0,1,0.4,0.575
1,4,0.2625,0.2875
2,5,0.15,0.2
3,6,0.5125,0.35
4,7,0.7,0.725
5,8,0.3875,0.225
6,9,0.45,0.2875
7,10,0.4375,0.425
8,12,0.6875,0.575
9,13,0.825,0.5875


In [7]:
labels = anot_df[['Arousal(mean)','Valence(mean)']].values
print(labels)
print(labels.shape)

[[0.4    0.575 ]
 [0.2625 0.2875]
 [0.15   0.2   ]
 ...
 [0.7125 0.6625]
 [0.875  0.775 ]
 [0.6625 0.575 ]]
(767, 2)


In [8]:
pmemo_meta = pd.read_csv(META_DIR)
pmemo_meta.head(2)

Unnamed: 0,musicId,fileName,title,artist,album,duration,chorus_start_time,chorus_end_time
0,1,1.mp3,Good Drank,2 Chainz,"Def Jam Presents: Direct Deposit, Vol. 2",32.1,02:35,03:05
1,4,4.mp3,X Bitch (feat. Future),21 Savage,Savage Mode,28.09,03:00,03:26


In [9]:
indices = np.arange(len(pmemo_meta))
idx = np.random.randint(0,indices.shape[0])
print(idx)
print(pmemo_meta.iloc[idx])

470
musicId                            603
fileName                       603.mp3
title                No More Sad Songs
artist                      Little Mix
album                No More Sad Songs
duration                          22.1
chorus_start_time                03:00
chorus_end_time                  03:20
Name: 470, dtype: object


In [10]:
start = time.time()

svm_16 = vgg16_svm()
features = svm_16.predict(chromas_m)

end_time = time.time()
elapsed_time = end_time - start
print(f'Tiempo para obtener caracteristicas profundas: {elapsed_time}')

print(features.shape)

arousal = labels[:,0]
valence = labels[:,1]

print(arousal.shape)
print(valence.shape)


Tiempo para obtener caracteristicas profundas: 49.488341331481934
(767, 512)
(767,)
(767,)


In [15]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)


In [24]:
svr = SVR(kernel='linear')

# Envolver el modelo de SVR con MultiOutputRegressor para predecir múltiples salidas
multi_output_svr = MultiOutputRegressor(svr)
multi_output_svr.fit(X_train, y_train)

# Predecir los valores de Arousal y Valence
y_pred = multi_output_svr.predict(X_test)

# Evaluar el rendimiento usando el MSE para ambas salidas (Arousal y Valence)
mse = mean_squared_error(y_test, y_pred)
print(f'MSE para Arousal y Valence: {mse}')
# Calcular el R^2 score global
r2 = r2_score(y_test, y_pred)
print(f'R^2 score global: {r2}')

MSE para Arousal y Valence: 0.02709574154350609
R^2 score global: 0.049535824289244745


In [25]:
# Separar las predicciones
y_pred_arousal = y_pred[:, 0]  # Predicciones de Arousal
y_pred_valence = y_pred[:, 1]  # Predicciones de Valence

# Evaluar MSE para Arousal y Valence por separado
mse_arousal = mean_squared_error(y_test[:, 0], y_pred_arousal)
mse_valence = mean_squared_error(y_test[:, 1], y_pred_valence)

print(f'MSE para Arousal: {mse_arousal}')
print(f'MSE para Valence: {mse_valence}')

MSE para Arousal: 0.03136701272731712
MSE para Valence: 0.02282447035969508


In [26]:
print(np.sort(y_test))

[[0.2125     0.675     ]
 [0.4875     0.5625    ]
 [0.6        0.7375    ]
 [0.675      0.675     ]
 [0.65       0.725     ]
 [0.5125     0.675     ]
 [0.4875     0.6625    ]
 [0.675      0.6875    ]
 [0.55       0.575     ]
 [0.6375     0.825     ]
 [0.75       0.7625    ]
 [0.325      0.525     ]
 [0.525      0.6125    ]
 [0.275      0.3375    ]
 [0.45       0.725     ]
 [0.3        0.5625    ]
 [0.6875     0.775     ]
 [0.3875     0.6       ]
 [0.275      0.35      ]
 [0.525      0.5375    ]
 [0.6375     0.7125    ]
 [0.4875     0.7       ]
 [0.825      0.8375    ]
 [0.3375     0.4125    ]
 [0.175      0.825     ]
 [0.5125     0.65      ]
 [0.475      0.8375    ]
 [0.1375     0.275     ]
 [0.3625     0.5       ]
 [0.6375     0.725     ]
 [0.4        0.5       ]
 [0.6125     0.6625    ]
 [0.3125     0.4375    ]
 [0.45       0.6125    ]
 [0.6125     0.7       ]
 [0.675      0.7       ]
 [0.725      0.85      ]
 [0.725      0.7625    ]
 [0.6625     0.7875    ]
 [0.7625     0.8       ]


In [27]:
print(np.sort(y_pred))

[[0.64171127 0.6826784 ]
 [0.53646453 0.54201662]
 [0.59228066 0.62858497]
 [0.60888375 0.65031721]
 [0.54879068 0.5622192 ]
 [0.60037261 0.63084515]
 [0.54095168 0.54522456]
 [0.59278958 0.6072804 ]
 [0.64644681 0.6667943 ]
 [0.63938525 0.6488829 ]
 [0.58877467 0.62638289]
 [0.64139616 0.67641212]
 [0.64739804 0.71145121]
 [0.48425638 0.4966415 ]
 [0.63352692 0.68002213]
 [0.58923333 0.60977214]
 [0.60403017 0.6622334 ]
 [0.64413927 0.68314058]
 [0.52439211 0.53154161]
 [0.60765306 0.63489821]
 [0.63546924 0.68969418]
 [0.62172303 0.6548506 ]
 [0.64488801 0.68063417]
 [0.51213236 0.55015332]
 [0.51875199 0.53694011]
 [0.60382833 0.66457239]
 [0.63392375 0.68129984]
 [0.57065259 0.58362169]
 [0.58267182 0.59066373]
 [0.5674561  0.58956224]
 [0.33229561 0.39921814]
 [0.58212649 0.58929553]
 [0.67631488 0.71027748]
 [0.51357613 0.53918596]
 [0.62753768 0.65971494]
 [0.63916145 0.69204047]
 [0.63352303 0.68571871]
 [0.60535618 0.64899965]
 [0.60101279 0.60882402]
 [0.65828836 0.70306285]


In [30]:
# Crear el modelo de regresión lineal múltiple
multi_output_lr = LinearRegression()
multi_output_lr.fit(X_train, y_train)


In [31]:
# Predecir los valores de Arousal y Valence
y_pred = multi_output_lr.predict(X_test)


In [32]:
# Evaluar el rendimiento global
r2_global = r2_score(y_test, y_pred)
print(f'R^2 score global: {r2_global}')

# Separar las predicciones
y_pred_arousal = y_pred[:, 0]  # Predicciones de Arousal
y_pred_valence = y_pred[:, 1]  # Predicciones de Valence

# Evaluar MSE y R^2 para Arousal y Valence por separado
mse_arousal = mean_squared_error(y_test[:, 0], y_pred_arousal)
mse_valence = mean_squared_error(y_test[:, 1], y_pred_valence)

r2_arousal = r2_score(y_test[:, 0], y_pred_arousal)
r2_valence = r2_score(y_test[:, 1], y_pred_valence)

print(f'MSE para Arousal: {mse_arousal}')
print(f'MSE para Valence: {mse_valence}')
print(f'R^2 score para Arousal: {r2_arousal}')
print(f'R^2 score para Valence: {r2_valence}')


R^2 score global: 0.027639373848434956
MSE para Arousal: 0.0312994744697446
MSE para Valence: 0.02389562389531289
R^2 score para Arousal: 0.07667065499641668
R^2 score para Valence: -0.021391907299546542


In [33]:
print(np.sort(y_pred))

[[0.60633993 0.6870518 ]
 [0.48188043 0.5641608 ]
 [0.58885574 0.61371565]
 [0.561707   0.6375413 ]
 [0.5405364  0.55786514]
 [0.59306455 0.65774345]
 [0.60500073 0.64530087]
 [0.579546   0.6487298 ]
 [0.6503897  0.6645472 ]
 [0.6973939  0.7178931 ]
 [0.5991111  0.6056931 ]
 [0.64910626 0.6732178 ]
 [0.6580534  0.7262111 ]
 [0.38371015 0.43838024]
 [0.61708045 0.70065784]
 [0.56943846 0.6399746 ]
 [0.5728588  0.65416527]
 [0.6558695  0.6703701 ]
 [0.46887016 0.47551537]
 [0.6189966  0.65469027]
 [0.648057   0.67694044]
 [0.6253216  0.6301875 ]
 [0.65934944 0.66204953]
 [0.55646276 0.6212158 ]
 [0.5287039  0.54683447]
 [0.55711746 0.67353964]
 [0.64788795 0.70121145]
 [0.61235905 0.640712  ]
 [0.5323446  0.63825893]
 [0.55255413 0.57443595]
 [0.3035071  0.3206234 ]
 [0.53267956 0.5541601 ]
 [0.58918667 0.74598503]
 [0.51585317 0.53239965]
 [0.63670397 0.6652727 ]
 [0.6563349  0.7181916 ]
 [0.5909836  0.66044617]
 [0.5940697  0.6090069 ]
 [0.5885241  0.6026993 ]
 [0.6758406  0.7191615 ]
