# DEAM Dataset - Feed Forward Neural Network
## Essentia Best Overall & openSMILE ComParE2016 Featureset

## Import relevant libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torcheval.metrics import R2Score

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import math

import sys
sys.path.insert(1, '../../utils')
from paths import *

## Neural Network Training

### Import annotations dataset

In [2]:
df_annotations = pd.read_csv(get_deam_path('processed/annotations/deam_static_annotations.csv'))
df_annotations

Unnamed: 0,song_id,valence_mean_mapped,arousal_mean_mapped
0,2,-0.475,-0.500
1,3,-0.375,-0.425
2,4,0.175,0.125
3,5,-0.150,0.075
4,7,0.200,0.350
...,...,...,...
1739,1996,-0.275,0.225
1740,1997,0.075,-0.275
1741,1998,0.350,0.300
1742,1999,-0.100,0.100


### Import the featureset

In [3]:
df_essentia_best_overall_opensmile_2016_features = pd.read_csv(get_deam_path('processed/features/integrated/essentia_best_overall_opensmile_compare2016_features.csv'))

# drop Unnamed:0 column
df_essentia_best_overall_opensmile_2016_features = df_essentia_best_overall_opensmile_2016_features[df_essentia_best_overall_opensmile_2016_features.columns[1:]]

df_essentia_best_overall_opensmile_2016_features

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.dmean,lowlevel.melbands_kurtosis.dmean2,lowlevel.melbands_kurtosis.dvar,lowlevel.melbands_kurtosis.dvar2,lowlevel.melbands_kurtosis.max,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_kurtosis.median,lowlevel.melbands_kurtosis.min,lowlevel.melbands_kurtosis.stdev,...,mfcc_sma_de[14]_peakRangeAbs,mfcc_sma_de[14]_peakRangeRel,mfcc_sma_de[14]_peakMeanAbs,mfcc_sma_de[14]_peakMeanMeanDist,mfcc_sma_de[14]_peakMeanRel,mfcc_sma_de[14]_minRangeRel,mfcc_sma_de[14]_meanRisingSlope,mfcc_sma_de[14]_stddevRisingSlope,mfcc_sma_de[14]_meanFallingSlope,mfcc_sma_de[14]_stddevFallingSlope
0,2,9.415085,14.939523,309.038300,742.725952,328.973969,21.801605,11.508560,-1.316976,37.293823,...,7.324224,0.534654,2.096779,2.095753,20.000000,0.647608,100.58043,50.135098,99.799490,49.985157
1,3,17.002226,26.332752,547.728210,1229.172241,471.216980,76.052628,62.377014,-1.212672,54.853020,...,6.627462,0.489874,1.832508,1.833608,-19.999998,0.570708,80.85080,43.208675,80.717380,45.814630
2,4,8.181362,12.390743,164.146927,348.634216,182.409042,16.516722,11.165314,-1.461427,19.692038,...,8.150249,0.506702,2.152296,2.151656,20.000000,0.624459,96.52063,51.825253,92.622505,45.681725
3,5,6.160454,9.577818,82.103508,203.349884,131.002609,14.615296,9.967463,-1.718094,15.931263,...,7.644168,0.486110,2.054498,2.052557,19.999930,0.592607,89.58958,48.105022,85.724660,47.280334
4,7,46.677437,67.838478,2727.447998,6007.274902,635.005981,99.851807,80.275414,-1.255423,84.649658,...,7.221351,0.562967,1.721128,1.722401,-19.999989,0.591437,80.62400,39.327477,79.446900,39.143200
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1739,1996,6.549002,10.465438,90.671638,229.975418,102.836098,12.534721,10.126063,-1.418290,11.308396,...,7.014015,0.592036,1.998147,1.996065,19.999868,0.566083,93.15075,44.501705,96.057270,44.310146
1740,1997,5.866978,9.743977,73.712753,198.091965,95.177231,11.646033,7.612457,-1.583036,12.044659,...,8.635638,0.530015,2.202010,2.199063,19.999430,0.565890,101.92789,55.691982,102.434510,54.795270
1741,1998,8.790737,14.096998,345.523193,891.726868,402.426819,15.330372,10.513874,-1.687109,19.856863,...,7.182379,0.635415,1.889074,1.887913,19.999996,0.551982,86.53014,42.108500,89.627280,44.030266
1742,1999,7.967627,12.597425,119.048744,285.454956,249.734558,20.406567,17.666672,-0.742111,17.207710,...,7.847402,0.656611,1.920955,1.920288,20.000000,0.513915,95.08800,46.408382,94.678055,46.461388


In [4]:
df_essentia_best_overall_opensmile_2016_features.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1744 entries, 0 to 1743
Data columns (total 6510 columns):
 #     Column                                                    Dtype  
---    ------                                                    -----  
 0     song_id                                                   int64  
 1     lowlevel.melbands_kurtosis.dmean                          float64
 2     lowlevel.melbands_kurtosis.dmean2                         float64
 3     lowlevel.melbands_kurtosis.dvar                           float64
 4     lowlevel.melbands_kurtosis.dvar2                          float64
 5     lowlevel.melbands_kurtosis.max                            float64
 6     lowlevel.melbands_kurtosis.mean                           float64
 7     lowlevel.melbands_kurtosis.median                         float64
 8     lowlevel.melbands_kurtosis.min                            float64
 9     lowlevel.melbands_kurtosis.stdev                          float64
 10    lowlevel

Join both the featureset and annotation set together

In [5]:
df_essentia_best_overall_opensmile_compare2016_whole = pd.merge(df_essentia_best_overall_opensmile_2016_features, df_annotations, how='inner', on='song_id')
df_essentia_best_overall_opensmile_compare2016_whole = df_essentia_best_overall_opensmile_compare2016_whole.drop('song_id', axis=1)
df_essentia_best_overall_opensmile_compare2016_whole

Unnamed: 0,lowlevel.melbands_kurtosis.dmean,lowlevel.melbands_kurtosis.dmean2,lowlevel.melbands_kurtosis.dvar,lowlevel.melbands_kurtosis.dvar2,lowlevel.melbands_kurtosis.max,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_kurtosis.median,lowlevel.melbands_kurtosis.min,lowlevel.melbands_kurtosis.stdev,lowlevel.melbands_kurtosis.var,...,mfcc_sma_de[14]_peakMeanAbs,mfcc_sma_de[14]_peakMeanMeanDist,mfcc_sma_de[14]_peakMeanRel,mfcc_sma_de[14]_minRangeRel,mfcc_sma_de[14]_meanRisingSlope,mfcc_sma_de[14]_stddevRisingSlope,mfcc_sma_de[14]_meanFallingSlope,mfcc_sma_de[14]_stddevFallingSlope,valence_mean_mapped,arousal_mean_mapped
0,9.415085,14.939523,309.038300,742.725952,328.973969,21.801605,11.508560,-1.316976,37.293823,1390.829224,...,2.096779,2.095753,20.000000,0.647608,100.58043,50.135098,99.799490,49.985157,-0.475,-0.500
1,17.002226,26.332752,547.728210,1229.172241,471.216980,76.052628,62.377014,-1.212672,54.853020,3008.853760,...,1.832508,1.833608,-19.999998,0.570708,80.85080,43.208675,80.717380,45.814630,-0.375,-0.425
2,8.181362,12.390743,164.146927,348.634216,182.409042,16.516722,11.165314,-1.461427,19.692038,387.776367,...,2.152296,2.151656,20.000000,0.624459,96.52063,51.825253,92.622505,45.681725,0.175,0.125
3,6.160454,9.577818,82.103508,203.349884,131.002609,14.615296,9.967463,-1.718094,15.931263,253.805130,...,2.054498,2.052557,19.999930,0.592607,89.58958,48.105022,85.724660,47.280334,-0.150,0.075
4,46.677437,67.838478,2727.447998,6007.274902,635.005981,99.851807,80.275414,-1.255423,84.649658,7165.564941,...,1.721128,1.722401,-19.999989,0.591437,80.62400,39.327477,79.446900,39.143200,0.200,0.350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1739,6.549002,10.465438,90.671638,229.975418,102.836098,12.534721,10.126063,-1.418290,11.308396,127.879837,...,1.998147,1.996065,19.999868,0.566083,93.15075,44.501705,96.057270,44.310146,-0.275,0.225
1740,5.866978,9.743977,73.712753,198.091965,95.177231,11.646033,7.612457,-1.583036,12.044659,145.073792,...,2.202010,2.199063,19.999430,0.565890,101.92789,55.691982,102.434510,54.795270,0.075,-0.275
1741,8.790737,14.096998,345.523193,891.726868,402.426819,15.330372,10.513874,-1.687109,19.856863,394.295044,...,1.889074,1.887913,19.999996,0.551982,86.53014,42.108500,89.627280,44.030266,0.350,0.300
1742,7.967627,12.597425,119.048744,285.454956,249.734558,20.406567,17.666672,-0.742111,17.207710,296.105286,...,1.920955,1.920288,20.000000,0.513915,95.08800,46.408382,94.678055,46.461388,-0.100,0.100


#### Prepare dataframes for the neural network

Perform splitting of the dataframe into training and testing sets

In [6]:
features = df_essentia_best_overall_opensmile_2016_features.drop('song_id', axis=1)
features

Unnamed: 0,lowlevel.melbands_kurtosis.dmean,lowlevel.melbands_kurtosis.dmean2,lowlevel.melbands_kurtosis.dvar,lowlevel.melbands_kurtosis.dvar2,lowlevel.melbands_kurtosis.max,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_kurtosis.median,lowlevel.melbands_kurtosis.min,lowlevel.melbands_kurtosis.stdev,lowlevel.melbands_kurtosis.var,...,mfcc_sma_de[14]_peakRangeAbs,mfcc_sma_de[14]_peakRangeRel,mfcc_sma_de[14]_peakMeanAbs,mfcc_sma_de[14]_peakMeanMeanDist,mfcc_sma_de[14]_peakMeanRel,mfcc_sma_de[14]_minRangeRel,mfcc_sma_de[14]_meanRisingSlope,mfcc_sma_de[14]_stddevRisingSlope,mfcc_sma_de[14]_meanFallingSlope,mfcc_sma_de[14]_stddevFallingSlope
0,9.415085,14.939523,309.038300,742.725952,328.973969,21.801605,11.508560,-1.316976,37.293823,1390.829224,...,7.324224,0.534654,2.096779,2.095753,20.000000,0.647608,100.58043,50.135098,99.799490,49.985157
1,17.002226,26.332752,547.728210,1229.172241,471.216980,76.052628,62.377014,-1.212672,54.853020,3008.853760,...,6.627462,0.489874,1.832508,1.833608,-19.999998,0.570708,80.85080,43.208675,80.717380,45.814630
2,8.181362,12.390743,164.146927,348.634216,182.409042,16.516722,11.165314,-1.461427,19.692038,387.776367,...,8.150249,0.506702,2.152296,2.151656,20.000000,0.624459,96.52063,51.825253,92.622505,45.681725
3,6.160454,9.577818,82.103508,203.349884,131.002609,14.615296,9.967463,-1.718094,15.931263,253.805130,...,7.644168,0.486110,2.054498,2.052557,19.999930,0.592607,89.58958,48.105022,85.724660,47.280334
4,46.677437,67.838478,2727.447998,6007.274902,635.005981,99.851807,80.275414,-1.255423,84.649658,7165.564941,...,7.221351,0.562967,1.721128,1.722401,-19.999989,0.591437,80.62400,39.327477,79.446900,39.143200
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1739,6.549002,10.465438,90.671638,229.975418,102.836098,12.534721,10.126063,-1.418290,11.308396,127.879837,...,7.014015,0.592036,1.998147,1.996065,19.999868,0.566083,93.15075,44.501705,96.057270,44.310146
1740,5.866978,9.743977,73.712753,198.091965,95.177231,11.646033,7.612457,-1.583036,12.044659,145.073792,...,8.635638,0.530015,2.202010,2.199063,19.999430,0.565890,101.92789,55.691982,102.434510,54.795270
1741,8.790737,14.096998,345.523193,891.726868,402.426819,15.330372,10.513874,-1.687109,19.856863,394.295044,...,7.182379,0.635415,1.889074,1.887913,19.999996,0.551982,86.53014,42.108500,89.627280,44.030266
1742,7.967627,12.597425,119.048744,285.454956,249.734558,20.406567,17.666672,-0.742111,17.207710,296.105286,...,7.847402,0.656611,1.920955,1.920288,20.000000,0.513915,95.08800,46.408382,94.678055,46.461388


In [7]:
targets = df_annotations.drop('song_id', axis=1)
targets

Unnamed: 0,valence_mean_mapped,arousal_mean_mapped
0,-0.475,-0.500
1,-0.375,-0.425
2,0.175,0.125
3,-0.150,0.075
4,0.200,0.350
...,...,...
1739,-0.275,0.225
1740,0.075,-0.275
1741,0.350,0.300
1742,-0.100,0.100


Perform 80-20 train-test split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42)

Create tensors for X_train and X_test

In [9]:
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float64)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float64)

Create tensors for Y_train and Y_test

In [10]:
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float64)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float64)

Define neural network parameters and instantitate neural network

In [11]:
input_size = 1
hidden_size = 20 
output_size = 2  # Output size for valence and arousal
learning_rate = 0.001
criterion = nn.MSELoss()
num_epochs = 300

Define the neural network

In [12]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(NeuralNetwork, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), 2)
        )

    def forward(self, x):
        return self.layers(x)

#### Training

Prepare input_train_data and target_train_labels

In [13]:
input_train_data = X_train_tensor.float()

# input_train_data = input_train_data.view(input_train_data.shape[1], -1)
print(input_train_data.shape)

target_train_labels = y_train_tensor

torch.Size([1395, 6509])


Training loop

In [14]:
model = NeuralNetwork(input_size=input_train_data.shape[1])
optimiser = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
  optimiser.zero_grad()
  
  # forward pass
  output = model(input_train_data)

  # calculate loss
  loss = torch.sqrt(criterion(output.float(), target_train_labels.float()))

  # backward pass
  loss.backward()
  # update weights
  optimiser.step()

  print(f'Epoch {epoch + 1}, Loss: {math.sqrt(loss.item())}')

print("Training completed.")

Epoch 1, Loss: 353.98836933577354
Epoch 2, Loss: 646.4645678612247
Epoch 3, Loss: 87.40114672713825
Epoch 4, Loss: 551.4140062149311
Epoch 5, Loss: 237.65678213286907
Epoch 6, Loss: 469.21606829903004
Epoch 7, Loss: 268.2240638673197
Epoch 8, Loss: 484.4044345895277
Epoch 9, Loss: 332.1968778074231
Epoch 10, Loss: 403.90194509311294
Epoch 11, Loss: 251.58518531254975
Epoch 12, Loss: 319.1434948616374
Epoch 13, Loss: 157.9352678413691
Epoch 14, Loss: 280.5650966130677
Epoch 15, Loss: 128.70555209712205
Epoch 16, Loss: 338.3644484871305
Epoch 17, Loss: 247.4985874328377
Epoch 18, Loss: 258.29009550793853
Epoch 19, Loss: 230.46859533893115
Epoch 20, Loss: 131.1093740689429
Epoch 21, Loss: 183.48106382199228
Epoch 22, Loss: 42.50962092390086
Epoch 23, Loss: 168.96630234494984
Epoch 24, Loss: 89.80616333334952
Epoch 25, Loss: 171.52449487974013
Epoch 26, Loss: 162.75987633243642
Epoch 27, Loss: 65.27653764705585
Epoch 28, Loss: 173.83173100192323
Epoch 29, Loss: 157.98735980886255
Epoch 30,

#### Testing

Prepare input_test_data and target_test_labels

In [15]:
input_test_data = X_test_tensor.float()

# input_test_data = input_test_data.view(input_test_data.shape[1], -1)
print(input_test_data.shape)

target_test_labels = y_test_tensor

torch.Size([349, 6509])


Generating scores

In [16]:
with torch.no_grad():
  test_pred = model(input_test_data)
  test_loss = criterion(test_pred.float(), target_test_labels)

print(f'Test RMSE: {math.sqrt(test_loss.item())}')

metric = R2Score()
metric.update(test_pred, target_test_labels)
r2_score = metric.compute()
print(f'Test R2 score: {r2_score.item()}')


Test RMSE: 356.72881803108584
Test R2 score: -1265385.2449331307


True values (test set)

In [17]:
target_test_labels

tensor([[-0.1500, -0.1500],
        [-0.3000, -0.1000],
        [ 0.2000,  0.3500],
        [ 0.2250,  0.4500],
        [-0.1750, -0.2000],
        [-0.5250, -0.3000],
        [-0.2500, -0.7750],
        [ 0.3000,  0.3000],
        [-0.1750, -0.4000],
        [ 0.4500,  0.1500],
        [ 0.1750,  0.0250],
        [-0.1750, -0.0250],
        [-0.0500, -0.3000],
        [ 0.1250,  0.3000],
        [-0.0750, -0.1500],
        [-0.2000, -0.2750],
        [-0.6000, -0.2250],
        [ 0.1500, -0.2000],
        [ 0.2750,  0.6000],
        [-0.1500, -0.4500],
        [-0.2250, -0.6250],
        [-0.0250, -0.4500],
        [-0.5250, -0.1250],
        [ 0.0000,  0.3250],
        [ 0.1250,  0.3750],
        [ 0.1500, -0.2500],
        [ 0.4500,  0.3250],
        [ 0.2500,  0.2250],
        [-0.1000,  0.0750],
        [ 0.4250,  0.1250],
        [-0.4500, -0.3500],
        [-0.0500,  0.3750],
        [-0.4750, -0.2000],
        [-0.2750, -0.4000],
        [-0.4000, -0.2250],
        [ 0.1000, -0

Predicted values

In [18]:
test_pred

tensor([[-4.6954e+00, -3.3411e+01],
        [-1.7584e+00, -1.1960e+01],
        [-2.1323e+02, -1.4257e+03],
        [-1.2183e+02, -8.1716e+02],
        [-2.7070e+00, -1.7916e+01],
        [-1.0300e+01, -6.9287e+01],
        [ 1.1857e+01, -1.6062e+01],
        [-2.6579e+01, -2.1423e+02],
        [-7.1768e+00, -4.5950e+01],
        [-2.7517e+01, -1.7931e+02],
        [-3.0781e+01, -3.4402e+02],
        [-7.1827e-01, -5.2029e+00],
        [ 2.5874e-02, -1.1811e-01],
        [-5.0071e+00, -3.2815e+01],
        [-8.8834e+01, -3.7208e+02],
        [-3.8685e+00, -2.5316e+01],
        [-1.2010e-01, -1.4069e+00],
        [-2.2754e+01, -1.4917e+02],
        [-4.6165e+01, -2.9892e+02],
        [ 4.6033e-02, -1.0666e-01],
        [-1.5602e+00, -8.0663e+00],
        [-1.1657e-01, -1.0021e+00],
        [-1.2451e+00, -8.7026e+00],
        [-1.9968e+00, -1.9777e+01],
        [-5.5713e+01, -3.8558e+02],
        [-9.5642e+01, -6.5232e+02],
        [-4.4272e+01, -4.2739e+02],
        [-7.3510e+01, -5.034