# DEAM Dataset - Feed Forward Neural Network
## openSMILE emobase Featureset

## Import relevant libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torcheval.metrics import R2Score

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import math

import sys
sys.path.insert(1, '../../utils')
from paths import *

## Define Functions for Neural Networks

Define function for converting dataframes to PyTorch tensors

In [2]:
def string_to_numpy_array(str):
  values = str.strip('[]').split()
  return np.array([float(value)for value in values]).flatten()

def convert_ndarray_col_to_tensor(df_object_col):
  # the df_object_col may be of the type object, but it is actually a numpy.ndarray
  df_object_col = df_object_col.apply(string_to_numpy_array)
  return torch.tensor(df_object_col.tolist(), dtype=torch.float64)

## Neural Network Training

### Import annotations dataset

In [3]:
df_annotations = pd.read_csv(get_deam_path('processed/annotations/deam_static_annotations.csv'))
df_annotations

Unnamed: 0,song_id,valence_mean_mapped,arousal_mean_mapped
0,2,-0.475,-0.500
1,3,-0.375,-0.425
2,4,0.175,0.125
3,5,-0.150,0.075
4,7,0.200,0.350
...,...,...,...
1739,1996,-0.275,0.225
1740,1997,0.075,-0.275
1741,1998,0.350,0.300
1742,1999,-0.100,0.100


### Import the featureset

In [4]:
df_opensmile_emobase_features = pd.read_csv(get_deam_path('processed/features/opensmile_emobase_features.csv'))

# drop Unnamed:0 column
df_opensmile_emobase_features = df_opensmile_emobase_features[df_opensmile_emobase_features.columns[1:]]

df_opensmile_emobase_features

Unnamed: 0,song_id,pcm_intensity_sma_max,pcm_intensity_sma_min,pcm_intensity_sma_range,pcm_intensity_sma_maxPos,pcm_intensity_sma_minPos,pcm_intensity_sma_amean,pcm_intensity_sma_linregc1,pcm_intensity_sma_linregc2,pcm_intensity_sma_linregerrA,...,F0env_sma_de_linregerrQ,F0env_sma_de_stddev,F0env_sma_de_skewness,F0env_sma_de_kurtosis,F0env_sma_de_quartile1,F0env_sma_de_quartile2,F0env_sma_de_quartile3,F0env_sma_de_iqr1-2,F0env_sma_de_iqr2-3,F0env_sma_de_iqr1-3
0,2,0.000100,0.000000e+00,0.000100,3821.0,0.0,0.000003,9.746145e-11,0.000003,0.000002,...,34.224740,5.851549,0.815608,11.220467,-2.025291,0.0,1.749724,2.025291,1.749724,3.775015
1,3,0.000051,0.000000e+00,0.000051,2809.0,0.0,0.000005,1.012040e-10,0.000004,0.000004,...,11.536975,3.397905,1.259240,29.973100,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
2,4,0.000088,0.000000e+00,0.000088,1800.0,0.0,0.000011,5.901333e-11,0.000011,0.000008,...,31.980932,5.655746,0.692436,9.787511,-1.309070,0.0,1.036872,1.309070,1.036872,2.345942
3,5,0.000060,0.000000e+00,0.000060,488.0,0.0,0.000006,-3.841752e-10,0.000007,0.000005,...,33.398550,5.780117,0.625557,9.257450,-1.881530,0.0,1.791142,1.881530,1.791142,3.672672
4,7,0.000113,0.000000e+00,0.000113,1061.0,0.0,0.000020,-4.199130e-11,0.000020,0.000011,...,23.441298,4.842434,0.802300,13.485694,-1.159967,0.0,1.114847,1.159967,1.114847,2.274814
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1739,1996,0.000023,1.535268e-08,0.000023,2355.0,548.0,0.000005,-8.072022e-11,0.000005,0.000003,...,8.894216,2.983331,7.053555,145.043520,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
1740,1997,0.000012,1.499340e-08,0.000012,3905.0,4487.0,0.000002,-4.315512e-11,0.000002,0.000001,...,2.125242,1.458878,1.890596,27.465874,-0.168976,0.0,0.000000,0.168976,0.000000,0.168976
1741,1998,0.000026,1.076092e-08,0.000026,3411.0,3137.0,0.000003,-1.908036e-10,0.000004,0.000003,...,11.348680,3.371960,0.621677,18.409992,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
1742,1999,0.000030,5.323769e-09,0.000030,4167.0,1089.0,0.000002,9.297152e-11,0.000002,0.000002,...,7.925314,2.818051,3.872501,60.610363,0.000000,0.0,0.000000,0.000000,0.000000,0.000000


In [5]:
df_opensmile_emobase_features.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1744 entries, 0 to 1743
Data columns (total 989 columns):
 #    Column                           Dtype  
---   ------                           -----  
 0    song_id                          int64  
 1    pcm_intensity_sma_max            float64
 2    pcm_intensity_sma_min            float64
 3    pcm_intensity_sma_range          float64
 4    pcm_intensity_sma_maxPos         float64
 5    pcm_intensity_sma_minPos         float64
 6    pcm_intensity_sma_amean          float64
 7    pcm_intensity_sma_linregc1       float64
 8    pcm_intensity_sma_linregc2       float64
 9    pcm_intensity_sma_linregerrA     float64
 10   pcm_intensity_sma_linregerrQ     float64
 11   pcm_intensity_sma_stddev         float64
 12   pcm_intensity_sma_skewness       float64
 13   pcm_intensity_sma_kurtosis       float64
 14   pcm_intensity_sma_quartile1      float64
 15   pcm_intensity_sma_quartile2      float64
 16   pcm_intensity_sma_quartile3      float64

Join both the featureset and annotation set together

In [6]:
df_opensmile_emobase_whole = pd.merge(df_opensmile_emobase_features, df_annotations, how='inner', on='song_id')
df_opensmile_emobase_whole = df_opensmile_emobase_whole.drop('song_id', axis=1)
df_opensmile_emobase_whole

Unnamed: 0,pcm_intensity_sma_max,pcm_intensity_sma_min,pcm_intensity_sma_range,pcm_intensity_sma_maxPos,pcm_intensity_sma_minPos,pcm_intensity_sma_amean,pcm_intensity_sma_linregc1,pcm_intensity_sma_linregc2,pcm_intensity_sma_linregerrA,pcm_intensity_sma_linregerrQ,...,F0env_sma_de_skewness,F0env_sma_de_kurtosis,F0env_sma_de_quartile1,F0env_sma_de_quartile2,F0env_sma_de_quartile3,F0env_sma_de_iqr1-2,F0env_sma_de_iqr2-3,F0env_sma_de_iqr1-3,valence_mean_mapped,arousal_mean_mapped
0,0.000100,0.000000e+00,0.000100,3821.0,0.0,0.000003,9.746145e-11,0.000003,0.000002,2.003038e-11,...,0.815608,11.220467,-2.025291,0.0,1.749724,2.025291,1.749724,3.775015,-0.475,-0.500
1,0.000051,0.000000e+00,0.000051,2809.0,0.0,0.000005,1.012040e-10,0.000004,0.000004,3.706416e-11,...,1.259240,29.973100,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,-0.375,-0.425
2,0.000088,0.000000e+00,0.000088,1800.0,0.0,0.000011,5.901333e-11,0.000011,0.000008,1.247090e-10,...,0.692436,9.787511,-1.309070,0.0,1.036872,1.309070,1.036872,2.345942,0.175,0.125
3,0.000060,0.000000e+00,0.000060,488.0,0.0,0.000006,-3.841752e-10,0.000007,0.000005,4.100738e-11,...,0.625557,9.257450,-1.881530,0.0,1.791142,1.881530,1.791142,3.672672,-0.150,0.075
4,0.000113,0.000000e+00,0.000113,1061.0,0.0,0.000020,-4.199130e-11,0.000020,0.000011,1.996163e-10,...,0.802300,13.485694,-1.159967,0.0,1.114847,1.159967,1.114847,2.274814,0.200,0.350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1739,0.000023,1.535268e-08,0.000023,2355.0,548.0,0.000005,-8.072022e-11,0.000005,0.000003,1.198487e-11,...,7.053555,145.043520,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,-0.275,0.225
1740,0.000012,1.499340e-08,0.000012,3905.0,4487.0,0.000002,-4.315512e-11,0.000002,0.000001,1.978644e-12,...,1.890596,27.465874,-0.168976,0.0,0.000000,0.168976,0.000000,0.168976,0.075,-0.275
1741,0.000026,1.076092e-08,0.000026,3411.0,3137.0,0.000003,-1.908036e-10,0.000004,0.000003,1.439331e-11,...,0.621677,18.409992,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.350,0.300
1742,0.000030,5.323769e-09,0.000030,4167.0,1089.0,0.000002,9.297152e-11,0.000002,0.000002,6.859520e-12,...,3.872501,60.610363,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,-0.100,0.100


#### Prepare dataframes for the neural network

Perform splitting of the dataframe into training and testing sets

In [7]:
features = df_opensmile_emobase_features.drop('song_id', axis=1)
features

Unnamed: 0,pcm_intensity_sma_max,pcm_intensity_sma_min,pcm_intensity_sma_range,pcm_intensity_sma_maxPos,pcm_intensity_sma_minPos,pcm_intensity_sma_amean,pcm_intensity_sma_linregc1,pcm_intensity_sma_linregc2,pcm_intensity_sma_linregerrA,pcm_intensity_sma_linregerrQ,...,F0env_sma_de_linregerrQ,F0env_sma_de_stddev,F0env_sma_de_skewness,F0env_sma_de_kurtosis,F0env_sma_de_quartile1,F0env_sma_de_quartile2,F0env_sma_de_quartile3,F0env_sma_de_iqr1-2,F0env_sma_de_iqr2-3,F0env_sma_de_iqr1-3
0,0.000100,0.000000e+00,0.000100,3821.0,0.0,0.000003,9.746145e-11,0.000003,0.000002,2.003038e-11,...,34.224740,5.851549,0.815608,11.220467,-2.025291,0.0,1.749724,2.025291,1.749724,3.775015
1,0.000051,0.000000e+00,0.000051,2809.0,0.0,0.000005,1.012040e-10,0.000004,0.000004,3.706416e-11,...,11.536975,3.397905,1.259240,29.973100,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
2,0.000088,0.000000e+00,0.000088,1800.0,0.0,0.000011,5.901333e-11,0.000011,0.000008,1.247090e-10,...,31.980932,5.655746,0.692436,9.787511,-1.309070,0.0,1.036872,1.309070,1.036872,2.345942
3,0.000060,0.000000e+00,0.000060,488.0,0.0,0.000006,-3.841752e-10,0.000007,0.000005,4.100738e-11,...,33.398550,5.780117,0.625557,9.257450,-1.881530,0.0,1.791142,1.881530,1.791142,3.672672
4,0.000113,0.000000e+00,0.000113,1061.0,0.0,0.000020,-4.199130e-11,0.000020,0.000011,1.996163e-10,...,23.441298,4.842434,0.802300,13.485694,-1.159967,0.0,1.114847,1.159967,1.114847,2.274814
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1739,0.000023,1.535268e-08,0.000023,2355.0,548.0,0.000005,-8.072022e-11,0.000005,0.000003,1.198487e-11,...,8.894216,2.983331,7.053555,145.043520,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
1740,0.000012,1.499340e-08,0.000012,3905.0,4487.0,0.000002,-4.315512e-11,0.000002,0.000001,1.978644e-12,...,2.125242,1.458878,1.890596,27.465874,-0.168976,0.0,0.000000,0.168976,0.000000,0.168976
1741,0.000026,1.076092e-08,0.000026,3411.0,3137.0,0.000003,-1.908036e-10,0.000004,0.000003,1.439331e-11,...,11.348680,3.371960,0.621677,18.409992,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
1742,0.000030,5.323769e-09,0.000030,4167.0,1089.0,0.000002,9.297152e-11,0.000002,0.000002,6.859520e-12,...,7.925314,2.818051,3.872501,60.610363,0.000000,0.0,0.000000,0.000000,0.000000,0.000000


In [8]:
targets = df_annotations.drop('song_id', axis=1)
targets

Unnamed: 0,valence_mean_mapped,arousal_mean_mapped
0,-0.475,-0.500
1,-0.375,-0.425
2,0.175,0.125
3,-0.150,0.075
4,0.200,0.350
...,...,...
1739,-0.275,0.225
1740,0.075,-0.275
1741,0.350,0.300
1742,-0.100,0.100


Perform 80-20 train-test split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42)

Create tensors for X_train and X_test

In [10]:
# get the columns containing numpy.ndarray as its values
numpy_array_columns = X_train.select_dtypes(include=['object']).columns.tolist()

print(f'Numpy ndarray columns: {numpy_array_columns}')

# tensors for columns that are numpy.ndarray
X_train_tensors_ndarray = [convert_ndarray_col_to_tensor(X_train[col]) for col in numpy_array_columns]
X_test_tensors_ndarray = [convert_ndarray_col_to_tensor(X_test[col]) for col in numpy_array_columns]

# generate a tensor for the remaining dataframe (for columns where the type is not numpy.ndarray)
X_train = X_train.drop(numpy_array_columns, axis=1)
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float64)
print(f'X_train_tensor shape: {X_train_tensor.shape}')
X_test = X_test.drop(numpy_array_columns, axis=1)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float64)
print(f'X_test_tensor shape: {X_test_tensor.shape}')

Numpy ndarray columns: []
X_train_tensor shape: torch.Size([1395, 988])
X_test_tensor shape: torch.Size([349, 988])


Create tensors for Y_train and Y_test

In [11]:
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float64)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float64)

Define neural network parameters and instantitate neural network

In [12]:
input_size = len(numpy_array_columns) + 1 # total number of tensors
hidden_size = 20 
output_size = 2  # Output size for valence and arousal
learning_rate = 0.001
criterion = nn.MSELoss()
num_epochs = 300

Define the neural network

In [13]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(NeuralNetwork, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), math.ceil((input_size**0.5) * 2)),
            nn.ReLU(),
            nn.Linear(math.ceil((input_size**0.5) * 2), 2)
        )

    def forward(self, x):
        return self.layers(x)

#### Training

Prepare input_train_data and target_train_labels

In [14]:
# X_train_tensors = [X_train_tensor]
# X_train_tensors.extend(X_train_tensors_ndarray)

# # Find the maximum shape among all tensors
# max_shape = max([tensor.shape for tensor in X_train_tensors])

# # Adjust tensor shapes to match the maximum shape without data loss
# X_train_tensors_adjusted = []

# for tensor in X_train_tensors:
#     padding = torch.zeros(*max_shape)
#     padding_slice = [slice(0, s) for s in tensor.shape]
#     padding[tuple(padding_slice)] = tensor
#     X_train_tensors_adjusted.append(padding)

# print(X_train_tensors_adjusted)

In [15]:
# input_train_data = torch.stack(tuple(X_train_tensors_adjusted), dim=0)
input_train_data = X_train_tensor.float()

# input_train_data = input_train_data.view(input_train_data.shape[1], -1)
print(input_train_data.shape)

target_train_labels = y_train_tensor

torch.Size([1395, 988])


Training loop

In [16]:
model = NeuralNetwork(input_size=input_train_data.shape[1])
optimiser = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
  optimiser.zero_grad()
  
  # forward pass
  output = model(input_train_data)

  # calculate loss
  loss = torch.sqrt(criterion(output.float(), target_train_labels.float()))

  # backward pass
  loss.backward()
  # update weights
  optimiser.step()

  print(f'Epoch {epoch + 1}, Loss: {loss.item()}')

print("Training completed.")

Epoch 1, Loss: 0.3236202895641327
Epoch 2, Loss: 0.31169405579566956
Epoch 3, Loss: 0.30857226252555847
Epoch 4, Loss: 0.3073917329311371
Epoch 5, Loss: 0.3063937723636627
Epoch 6, Loss: 0.3049333393573761
Epoch 7, Loss: 0.3034643828868866
Epoch 8, Loss: 0.30203044414520264
Epoch 9, Loss: 0.3001430034637451
Epoch 10, Loss: 0.29839083552360535
Epoch 11, Loss: 0.29701390862464905
Epoch 12, Loss: 0.29646414518356323
Epoch 13, Loss: 0.29671287536621094
Epoch 14, Loss: 0.2915884852409363
Epoch 15, Loss: 0.29376715421676636
Epoch 16, Loss: 0.2891332507133484
Epoch 17, Loss: 0.288534551858902
Epoch 18, Loss: 0.28554460406303406
Epoch 19, Loss: 0.28259819746017456
Epoch 20, Loss: 0.2841258943080902
Epoch 21, Loss: 0.27836519479751587
Epoch 22, Loss: 0.2762415409088135
Epoch 23, Loss: 0.27783551812171936
Epoch 24, Loss: 0.2738196849822998
Epoch 25, Loss: 0.269182026386261
Epoch 26, Loss: 0.27085521817207336
Epoch 27, Loss: 0.28352683782577515
Epoch 28, Loss: 0.26767995953559875
Epoch 29, Loss: 

#### Testing

Prepare input_test_data and target_test_labels

In [17]:
# X_test_tensors = [X_test_tensor]
# X_test_tensors.extend(X_test_tensors_ndarray)

# # Find the maximum shape among all tensors
# max_shape = max([tensor.shape for tensor in X_test_tensors])

# # Adjust tensor shapes to match the maximum shape without data loss
# X_test_tensors_adjusted = []

# for tensor in X_test_tensors:
#     padding = torch.zeros(*max_shape)
#     padding_slice = [slice(0, s) for s in tensor.shape]
#     padding[tuple(padding_slice)] = tensor
#     X_test_tensors_adjusted.append(padding)

# print(X_test_tensors_adjusted)

In [18]:
# input_test_data = torch.stack(tuple(X_test_tensors_adjusted), dim=0)

# input_test_data = input_test_data.view(input_test_data.shape[1], -1)
input_test_data = X_test_tensor.float()
print(input_test_data.shape)

target_test_labels = y_test_tensor

torch.Size([349, 988])


Generating scores

In [19]:
with torch.no_grad():
  test_pred = model(input_test_data)
  test_loss = criterion(test_pred.float(), target_test_labels)

print(f'Test loss: {test_loss.item()}')

metric = R2Score()
metric.update(test_pred, target_test_labels)
r2_score = metric.compute()
print(f'R2 score: {r2_score.item()}')


Test loss: 0.10590348757461489
R2 score: -0.15105884023414295
