In [1]:
# load .env variables
from dotenv import load_dotenv
load_dotenv()
import os
DATASET_DIR = os.getenv("DATASET_DIR")

In [2]:
import pandas as pd
def _load_static_features():
    features_csv = os.path.join(DATASET_DIR, 'static_features.csv')
    static_features= pd.read_csv(features_csv, index_col=0)
    return static_features

def load_static_features_and_valence():
    static_features = _load_static_features()
    valence_csv = os.path.join(DATASET_DIR, 'V_static.csv')
    valence = pd.read_csv(valence_csv, index_col=0, usecols=['musicId','mean_V'])
    return static_features.join(valence).dropna()

def load_static_features_and_arousal():
    static_features = _load_static_features()
    arousal_csv = os.path.join(DATASET_DIR, 'A_static.csv')
    arousal = pd.read_csv(arousal_csv, index_col=0, usecols=['musicId','mean_A'])
    return static_features.join(arousal).dropna()

In [3]:
from sklearn.utils import shuffle
from math import sqrt, floor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
from scipy.stats import pearsonr

def _get_train_test_data(data):
    data = shuffle(data)
    features = data[data.columns[:-1]].values
    labels = data[data.columns[-1]].values

    n_samples = labels.shape[0]
    train_num = floor(n_samples*10/11)

    X_train, X_test, y_train, y_test = features[:train_num], features[train_num:], labels[:train_num], labels[train_num:]
    return X_train, X_test, y_train, y_test

def train_and_test(data):
    X_train, X_test, y_train, y_test = _get_train_test_data(data)
    
    clf = SVR(kernel='rbf', gamma='scale',C=1.0, epsilon=0.2)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    rmse = sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    r, p = pearsonr(y_test, y_pred)
    print(f'RMSE: {rmse}, R: {r}')

In [7]:
data_v = load_static_features_and_valence()
data_a = load_static_features_and_arousal()

print('In Valence dimension...')
train_and_test(data_v)

print('In Arousal dimension...')
train_and_test(data_a)


In Valence dimension...
RMSE: 0.17665895499447476, R: 0.18387794030468066
In Arousal dimension...
RMSE: 0.16201368428281698, R: 2.9661822647556196e-16
