In [1]:
# load .env variables
from dotenv import load_dotenv
load_dotenv()
import os
DATASET_DIR = os.getenv("DATASET_DIR")

In [2]:
import csv
import pandas as pd

def _read_dynamic_mean(datapath):
    
    fs = 2 # sampling rate of dynamic annotation
    num_omitted = fs * 0 # already omitted

    data = {}
    with open(datapath) as csvfile:
        reader = csv.reader(csvfile)
        next(reader)
        for row in reader:
            # skip music with too-short length
            if len(row)-1 <= num_omitted: 
                continue
            f_in_this_row = []
            for i in range(len(row)-1-num_omitted):
                f_in_this_row.append(float(row[i+1+num_omitted]))
            data[row[0]] = f_in_this_row
    return data

def _get_features_with_dylabel(features, label_type, dylabels):

    feature_fs = 2 # sampling rate of dynamic annotation
    features[0].append(label_type) # last column name is the label name

    index_without_dylabel = []

    for i in range(len(features)-1):
        musicId = features[i+1][0]
        time_index = float(features[i+1][1]) * feature_fs
        try:
            dy_for_this_song = dylabels[str(musicId)]
        except KeyError:
            index_without_dylabel.append(i+1)
            continue
        try:
            dy_for_this_point = dy_for_this_song[int(time_index)]
            features[i + 1].append(dy_for_this_point)
        except IndexError:
            index_without_dylabel.append(i+1)
    
    for i in range(len(index_without_dylabel)):
        # delete features without label. Attension: after deleting, the index behind will minus 1.
        index = index_without_dylabel[i] - i
        features.pop(index)

    return pd.DataFrame(data=features[1:], columns=features[0])

def load_dynamic_features_and_labels(label_type):
    if label_type == 'valence':
        labels = _read_dynamic_mean(os.path.join(DATASET_DIR, 'V_dynamic_mean.csv'))
    elif label_type == 'arousal':
        labels = _read_dynamic_mean(os.path.join(DATASET_DIR, 'A_dynamic_mean.csv'))
    else:
        raise(Exception)
                                   
    features = []
    with open(os.path.join(DATASET_DIR, 'dynamic_features.csv')) as f:
        reader = csv.reader(f)
        for row in reader:
            features.append(row)

    features_and_label = _get_features_with_dylabel(features, label_type, labels)
    return features_and_label  


In [4]:
from sklearn.utils import shuffle
from math import sqrt, floor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
from scipy.stats import pearsonr

def _get_train_test_data(data):
    data = shuffle(data)
    features = data[data.columns[2:-1]].values
    labels = data[data.columns[-1]].values

    n_samples = labels.shape[0]
    train_num = floor(n_samples*10/11)

    X_train, X_test, y_train, y_test = features[:train_num], features[train_num:], labels[:train_num], labels[train_num:]
    return X_train, X_test, y_train, y_test

def train_and_test(data):
    X_train, X_test, y_train, y_test = _get_train_test_data(data)
    
    clf = SVR(kernel='rbf', gamma='scale',C=1.0, epsilon=0.2)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    rmse = sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    r, p = pearsonr(y_test, y_pred)
    print(f'RMSE: {rmse}, R: {r}')

In [5]:
data_v = load_dynamic_features_and_labels('valence')
data_a = load_dynamic_features_and_labels('arousal')

print('In Valence dimension, local result...')
train_and_test(data_v)

print('In Arousal dimension, local result...')
train_and_test(data_a)


In Valence dimension, local result...
RMSE: 0.16370525985080106, R: 0.13568560665668175
In Arousal dimension, local result...
RMSE: 0.17523943209680515, R: 0.23438263514062022
