# DEAM Dataset - Feed Forward Neural Network
## Essentia Best Overall & openSMILE emobase Featureset

## Import relevant libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torcheval.metrics import R2Score

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import math

import sys
sys.path.insert(1, '../../utils')
from paths import *

## Neural Network Training

### Import annotations dataset

In [2]:
df_annotations = pd.read_csv(get_deam_path('processed/annotations/deam_static_annotations.csv'))
df_annotations

Unnamed: 0,song_id,valence_mean_mapped,arousal_mean_mapped
0,2,-0.475,-0.500
1,3,-0.375,-0.425
2,4,0.175,0.125
3,5,-0.150,0.075
4,7,0.200,0.350
...,...,...,...
1739,1996,-0.275,0.225
1740,1997,0.075,-0.275
1741,1998,0.350,0.300
1742,1999,-0.100,0.100


### Import the featureset

In [3]:
df_essentia_best_overall_opensmile_emobase_features = pd.read_csv(get_deam_path('processed/features/integrated/essentia_best_overall_opensmile_emobase_features.csv'))

# drop Unnamed:0 column
df_essentia_best_overall_opensmile_emobase_features = df_essentia_best_overall_opensmile_emobase_features[df_essentia_best_overall_opensmile_emobase_features.columns[1:]]

df_essentia_best_overall_opensmile_emobase_features

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.dmean,lowlevel.melbands_kurtosis.dmean2,lowlevel.melbands_kurtosis.dvar,lowlevel.melbands_kurtosis.dvar2,lowlevel.melbands_kurtosis.max,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_kurtosis.median,lowlevel.melbands_kurtosis.min,lowlevel.melbands_kurtosis.stdev,...,F0env_sma_de_linregerrQ,F0env_sma_de_stddev,F0env_sma_de_skewness,F0env_sma_de_kurtosis,F0env_sma_de_quartile1,F0env_sma_de_quartile2,F0env_sma_de_quartile3,F0env_sma_de_iqr1-2,F0env_sma_de_iqr2-3,F0env_sma_de_iqr1-3
0,2,9.415085,14.939523,309.038300,742.725952,328.973969,21.801605,11.508560,-1.316976,37.293823,...,34.224740,5.851549,0.815608,11.220467,-2.025291,0.0,1.749724,2.025291,1.749724,3.775015
1,3,17.002226,26.332752,547.728210,1229.172241,471.216980,76.052628,62.377014,-1.212672,54.853020,...,11.536975,3.397905,1.259240,29.973100,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
2,4,8.181362,12.390743,164.146927,348.634216,182.409042,16.516722,11.165314,-1.461427,19.692038,...,31.980932,5.655746,0.692436,9.787511,-1.309070,0.0,1.036872,1.309070,1.036872,2.345942
3,5,6.160454,9.577818,82.103508,203.349884,131.002609,14.615296,9.967463,-1.718094,15.931263,...,33.398550,5.780117,0.625557,9.257450,-1.881530,0.0,1.791142,1.881530,1.791142,3.672672
4,7,46.677437,67.838478,2727.447998,6007.274902,635.005981,99.851807,80.275414,-1.255423,84.649658,...,23.441298,4.842434,0.802300,13.485694,-1.159967,0.0,1.114847,1.159967,1.114847,2.274814
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1739,1996,6.549002,10.465438,90.671638,229.975418,102.836098,12.534721,10.126063,-1.418290,11.308396,...,8.894216,2.983331,7.053555,145.043520,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
1740,1997,5.866978,9.743977,73.712753,198.091965,95.177231,11.646033,7.612457,-1.583036,12.044659,...,2.125242,1.458878,1.890596,27.465874,-0.168976,0.0,0.000000,0.168976,0.000000,0.168976
1741,1998,8.790737,14.096998,345.523193,891.726868,402.426819,15.330372,10.513874,-1.687109,19.856863,...,11.348680,3.371960,0.621677,18.409992,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
1742,1999,7.967627,12.597425,119.048744,285.454956,249.734558,20.406567,17.666672,-0.742111,17.207710,...,7.925314,2.818051,3.872501,60.610363,0.000000,0.0,0.000000,0.000000,0.000000,0.000000


In [4]:
df_essentia_best_overall_opensmile_emobase_features.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1744 entries, 0 to 1743
Data columns (total 1125 columns):
 #     Column                                     Dtype  
---    ------                                     -----  
 0     song_id                                    int64  
 1     lowlevel.melbands_kurtosis.dmean           float64
 2     lowlevel.melbands_kurtosis.dmean2          float64
 3     lowlevel.melbands_kurtosis.dvar            float64
 4     lowlevel.melbands_kurtosis.dvar2           float64
 5     lowlevel.melbands_kurtosis.max             float64
 6     lowlevel.melbands_kurtosis.mean            float64
 7     lowlevel.melbands_kurtosis.median          float64
 8     lowlevel.melbands_kurtosis.min             float64
 9     lowlevel.melbands_kurtosis.stdev           float64
 10    lowlevel.melbands_kurtosis.var             float64
 11    lowlevel.melbands_skewness.dmean           float64
 12    lowlevel.melbands_skewness.dmean2          float64
 13    lowlevel.melba

Join both the featureset and annotation set together

In [5]:
df_essentia_best_overall_opensmile_emobase_whole = pd.merge(df_essentia_best_overall_opensmile_emobase_features, df_annotations, how='inner', on='song_id')
df_essentia_best_overall_opensmile_emobase_whole = df_essentia_best_overall_opensmile_emobase_whole.drop('song_id', axis=1)
df_essentia_best_overall_opensmile_emobase_whole

Unnamed: 0,lowlevel.melbands_kurtosis.dmean,lowlevel.melbands_kurtosis.dmean2,lowlevel.melbands_kurtosis.dvar,lowlevel.melbands_kurtosis.dvar2,lowlevel.melbands_kurtosis.max,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_kurtosis.median,lowlevel.melbands_kurtosis.min,lowlevel.melbands_kurtosis.stdev,lowlevel.melbands_kurtosis.var,...,F0env_sma_de_skewness,F0env_sma_de_kurtosis,F0env_sma_de_quartile1,F0env_sma_de_quartile2,F0env_sma_de_quartile3,F0env_sma_de_iqr1-2,F0env_sma_de_iqr2-3,F0env_sma_de_iqr1-3,valence_mean_mapped,arousal_mean_mapped
0,9.415085,14.939523,309.038300,742.725952,328.973969,21.801605,11.508560,-1.316976,37.293823,1390.829224,...,0.815608,11.220467,-2.025291,0.0,1.749724,2.025291,1.749724,3.775015,-0.475,-0.500
1,17.002226,26.332752,547.728210,1229.172241,471.216980,76.052628,62.377014,-1.212672,54.853020,3008.853760,...,1.259240,29.973100,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,-0.375,-0.425
2,8.181362,12.390743,164.146927,348.634216,182.409042,16.516722,11.165314,-1.461427,19.692038,387.776367,...,0.692436,9.787511,-1.309070,0.0,1.036872,1.309070,1.036872,2.345942,0.175,0.125
3,6.160454,9.577818,82.103508,203.349884,131.002609,14.615296,9.967463,-1.718094,15.931263,253.805130,...,0.625557,9.257450,-1.881530,0.0,1.791142,1.881530,1.791142,3.672672,-0.150,0.075
4,46.677437,67.838478,2727.447998,6007.274902,635.005981,99.851807,80.275414,-1.255423,84.649658,7165.564941,...,0.802300,13.485694,-1.159967,0.0,1.114847,1.159967,1.114847,2.274814,0.200,0.350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1739,6.549002,10.465438,90.671638,229.975418,102.836098,12.534721,10.126063,-1.418290,11.308396,127.879837,...,7.053555,145.043520,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,-0.275,0.225
1740,5.866978,9.743977,73.712753,198.091965,95.177231,11.646033,7.612457,-1.583036,12.044659,145.073792,...,1.890596,27.465874,-0.168976,0.0,0.000000,0.168976,0.000000,0.168976,0.075,-0.275
1741,8.790737,14.096998,345.523193,891.726868,402.426819,15.330372,10.513874,-1.687109,19.856863,394.295044,...,0.621677,18.409992,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.350,0.300
1742,7.967627,12.597425,119.048744,285.454956,249.734558,20.406567,17.666672,-0.742111,17.207710,296.105286,...,3.872501,60.610363,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,-0.100,0.100


#### Prepare dataframes for the neural network

Perform splitting of the dataframe into training and testing sets

In [6]:
features = df_essentia_best_overall_opensmile_emobase_features.drop('song_id', axis=1)
features

Unnamed: 0,lowlevel.melbands_kurtosis.dmean,lowlevel.melbands_kurtosis.dmean2,lowlevel.melbands_kurtosis.dvar,lowlevel.melbands_kurtosis.dvar2,lowlevel.melbands_kurtosis.max,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_kurtosis.median,lowlevel.melbands_kurtosis.min,lowlevel.melbands_kurtosis.stdev,lowlevel.melbands_kurtosis.var,...,F0env_sma_de_linregerrQ,F0env_sma_de_stddev,F0env_sma_de_skewness,F0env_sma_de_kurtosis,F0env_sma_de_quartile1,F0env_sma_de_quartile2,F0env_sma_de_quartile3,F0env_sma_de_iqr1-2,F0env_sma_de_iqr2-3,F0env_sma_de_iqr1-3
0,9.415085,14.939523,309.038300,742.725952,328.973969,21.801605,11.508560,-1.316976,37.293823,1390.829224,...,34.224740,5.851549,0.815608,11.220467,-2.025291,0.0,1.749724,2.025291,1.749724,3.775015
1,17.002226,26.332752,547.728210,1229.172241,471.216980,76.052628,62.377014,-1.212672,54.853020,3008.853760,...,11.536975,3.397905,1.259240,29.973100,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
2,8.181362,12.390743,164.146927,348.634216,182.409042,16.516722,11.165314,-1.461427,19.692038,387.776367,...,31.980932,5.655746,0.692436,9.787511,-1.309070,0.0,1.036872,1.309070,1.036872,2.345942
3,6.160454,9.577818,82.103508,203.349884,131.002609,14.615296,9.967463,-1.718094,15.931263,253.805130,...,33.398550,5.780117,0.625557,9.257450,-1.881530,0.0,1.791142,1.881530,1.791142,3.672672
4,46.677437,67.838478,2727.447998,6007.274902,635.005981,99.851807,80.275414,-1.255423,84.649658,7165.564941,...,23.441298,4.842434,0.802300,13.485694,-1.159967,0.0,1.114847,1.159967,1.114847,2.274814
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1739,6.549002,10.465438,90.671638,229.975418,102.836098,12.534721,10.126063,-1.418290,11.308396,127.879837,...,8.894216,2.983331,7.053555,145.043520,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
1740,5.866978,9.743977,73.712753,198.091965,95.177231,11.646033,7.612457,-1.583036,12.044659,145.073792,...,2.125242,1.458878,1.890596,27.465874,-0.168976,0.0,0.000000,0.168976,0.000000,0.168976
1741,8.790737,14.096998,345.523193,891.726868,402.426819,15.330372,10.513874,-1.687109,19.856863,394.295044,...,11.348680,3.371960,0.621677,18.409992,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
1742,7.967627,12.597425,119.048744,285.454956,249.734558,20.406567,17.666672,-0.742111,17.207710,296.105286,...,7.925314,2.818051,3.872501,60.610363,0.000000,0.0,0.000000,0.000000,0.000000,0.000000


In [7]:
targets = df_annotations.drop('song_id', axis=1)
targets

Unnamed: 0,valence_mean_mapped,arousal_mean_mapped
0,-0.475,-0.500
1,-0.375,-0.425
2,0.175,0.125
3,-0.150,0.075
4,0.200,0.350
...,...,...
1739,-0.275,0.225
1740,0.075,-0.275
1741,0.350,0.300
1742,-0.100,0.100


Perform 80-20 train-test split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42)

Create tensors for X_train and X_test

In [9]:
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float64)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float64)

Create tensors for Y_train and Y_test

In [10]:
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float64)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float64)

Define neural network parameters and instantitate neural network

In [11]:
input_size = 1
hidden_size = 20 
output_size = 2  # Output size for valence and arousal
learning_rate = 0.001
criterion = nn.MSELoss()
num_epochs = 300

Define the neural network

In [12]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(NeuralNetwork, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), 2)
        )

    def forward(self, x):
        return self.layers(x)

#### Training

Prepare input_train_data and target_train_labels

In [13]:
input_train_data = X_train_tensor.float()

# input_train_data = input_train_data.view(input_train_data.shape[1], -1)
print(input_train_data.shape)

target_train_labels = y_train_tensor

torch.Size([1395, 1124])


Training loop

In [14]:
model = NeuralNetwork(input_size=input_train_data.shape[1])
optimiser = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
  optimiser.zero_grad()
  
  # forward pass
  output = model(input_train_data)

  # calculate loss
  loss = torch.sqrt(criterion(output.float(), target_train_labels.float()))

  # backward pass
  loss.backward()
  # update weights
  optimiser.step()

  print(f'Epoch {epoch + 1}, Loss: {math.sqrt(loss.item())}')

print("Training completed.")

Epoch 1, Loss: 2.4747702674979934
Epoch 2, Loss: 1.8893633960161846
Epoch 3, Loss: 2.4913842512172093
Epoch 4, Loss: 2.2430087722082965
Epoch 5, Loss: 1.128186957013185
Epoch 6, Loss: 0.8681028192156875
Epoch 7, Loss: 1.0037144577935566
Epoch 8, Loss: 0.733049982953265
Epoch 9, Loss: 0.9375700924420156
Epoch 10, Loss: 0.9886872433930297
Epoch 11, Loss: 0.6673269032410856
Epoch 12, Loss: 1.0969523823712968
Epoch 13, Loss: 0.9023412729204535
Epoch 14, Loss: 0.8738779298506513
Epoch 15, Loss: 0.9125234574738688
Epoch 16, Loss: 0.7538170638228145
Epoch 17, Loss: 0.7381884804220862
Epoch 18, Loss: 0.7649409683364594
Epoch 19, Loss: 0.7791030757542219
Epoch 20, Loss: 1.566272530115874
Epoch 21, Loss: 1.3224796364267495
Epoch 22, Loss: 0.7675793674449232
Epoch 23, Loss: 0.8457631360758272
Epoch 24, Loss: 0.8559418506657981
Epoch 25, Loss: 0.7760443250026018
Epoch 26, Loss: 0.6039300625191235
Epoch 27, Loss: 0.6918645945165415
Epoch 28, Loss: 0.7761385215039761
Epoch 29, Loss: 0.59005094514339

#### Testing

Prepare input_test_data and target_test_labels

In [15]:
input_test_data = X_test_tensor.float()

# input_test_data = input_test_data.view(input_test_data.shape[1], -1)
print(input_test_data.shape)

target_test_labels = y_test_tensor

torch.Size([349, 1124])


Generating scores

In [16]:
with torch.no_grad():
  test_pred = model(input_test_data)
  test_loss = criterion(test_pred.float(), target_test_labels)

print(f'Test RMSE: {math.sqrt(test_loss.item())}')

metric = R2Score()
metric.update(test_pred, target_test_labels)
r2_score = metric.compute()
print(f'Test R^2 score: {r2_score.item()}')


Test RMSE: 0.26413040717435415
Test R2 score: 0.24464621330131125


True values (test set)

In [17]:
target_test_labels

tensor([[-0.1500, -0.1500],
        [-0.3000, -0.1000],
        [ 0.2000,  0.3500],
        [ 0.2250,  0.4500],
        [-0.1750, -0.2000],
        [-0.5250, -0.3000],
        [-0.2500, -0.7750],
        [ 0.3000,  0.3000],
        [-0.1750, -0.4000],
        [ 0.4500,  0.1500],
        [ 0.1750,  0.0250],
        [-0.1750, -0.0250],
        [-0.0500, -0.3000],
        [ 0.1250,  0.3000],
        [-0.0750, -0.1500],
        [-0.2000, -0.2750],
        [-0.6000, -0.2250],
        [ 0.1500, -0.2000],
        [ 0.2750,  0.6000],
        [-0.1500, -0.4500],
        [-0.2250, -0.6250],
        [-0.0250, -0.4500],
        [-0.5250, -0.1250],
        [ 0.0000,  0.3250],
        [ 0.1250,  0.3750],
        [ 0.1500, -0.2500],
        [ 0.4500,  0.3250],
        [ 0.2500,  0.2250],
        [-0.1000,  0.0750],
        [ 0.4250,  0.1250],
        [-0.4500, -0.3500],
        [-0.0500,  0.3750],
        [-0.4750, -0.2000],
        [-0.2750, -0.4000],
        [-0.4000, -0.2250],
        [ 0.1000, -0

Predicted values

In [18]:
test_pred

tensor([[-9.7112e-02, -8.8679e-02],
        [-2.6028e-01, -3.4438e-01],
        [ 7.0087e-02,  5.9717e-02],
        [-1.8041e-01, -1.0039e-01],
        [-1.8815e-01, -3.7730e-01],
        [-1.8396e-01, -2.5296e-01],
        [-2.5810e-01, -4.3348e-01],
        [ 9.8442e-02,  1.0943e-01],
        [-3.3154e-01, -2.5232e-01],
        [ 1.0270e-01,  9.7751e-02],
        [ 8.2965e-02,  4.3011e-02],
        [-2.2917e-01, -2.1015e-01],
        [-2.4800e-01, -3.2287e-01],
        [ 1.0260e-01,  8.1177e-02],
        [-2.1407e-01, -3.1729e-01],
        [-2.9021e-01, -2.6017e-01],
        [-2.5246e-01, -3.0516e-01],
        [ 8.7611e-02,  7.6864e-02],
        [ 9.9558e-02,  6.2187e-02],
        [-2.2533e-01, -3.3316e-01],
        [-2.5325e-01, -3.5551e-01],
        [-1.9643e-01, -1.7205e-01],
        [-3.7684e-01, -4.4052e-01],
        [-6.6947e-02, -1.8588e-01],
        [-2.1383e-02, -1.0764e-01],
        [ 7.7752e-02,  4.4567e-02],
        [ 9.8164e-02,  1.0566e-01],
        [ 5.2092e-02,  6.506