imports

In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import torch
import torch.nn as nn
import torch.optim as optim

random seed

In [None]:
seed = 42

### Helper functions

In [None]:
# function that splits data into train, validation, and test sets
def train_val_test_split(X, Y, train_split=0.8, val_split=0.1, test_split=0.1, random_seed=seed):
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_split, random_state=random_seed)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_split/(train_split+val_split), random_state=random_seed)
    return X_train, X_val, X_test, y_train, y_val, y_test
# evaluation function
def evaluation(y_pred, y_test):
    valence_predicted = np.array(y_pred)[:, 0] 
    arousal_predicted = np.array(y_pred)[:, 1] 
    valence_test = np.array(y_test)[:, 0] 
    arousal_test = np.array(y_test)[:, 1] 
    # MSE
    valence_mse = mean_squared_error(valence_test, valence_predicted)
    arousal_mse = mean_squared_error(arousal_test, arousal_predicted)
    # RMSE
    valence_rmse = np.sqrt(valence_mse)
    arousal_rmse = np.sqrt(arousal_mse)
    # MAE
    valence_mae = mean_absolute_error(valence_test, valence_predicted)
    arousal_mae = mean_absolute_error(arousal_test, arousal_predicted)
    # R^2 Score
    valence_r2 = r2_score(valence_test, valence_predicted)
    arousal_r2 = r2_score(arousal_test, arousal_predicted)

    print("Valence MSE:", valence_mse)
    print("Arousal MSE:", arousal_mse)
    print("Valence RMSE:", valence_rmse)
    print("Arousal RMSE:", arousal_rmse)
    print("Valence MAE:", valence_mae)
    print("Arousal MAE:", arousal_mae)
    print("Valence R^2 Score:", valence_r2)
    print("Arousal R^2 Score:", arousal_r2)

### Preprocessing

In [None]:
df_data = pd.read_csv("../data/processed_multi_modal.csv")

audio_features_top4 = ["loudness", "instrumentalness", "time_signature", "energy"]
audio_features_top9 = ["loudness", "instrumentalness", "time_signature", "energy", "danceability", "tempo", "acousticness", "key", "speechiness"]
audio_features_all  = ["danceability", "energy", "key", "loudness", "mode", "speechiness", "acousticness", "instrumentalness", "liveness", "tempo", "time_signature"]

lyric_features = ["compound","neg", "pos", "neu", "pca_tfidf0", "pca_tfidf1", "pca_tfidf2", "pca_tfidf3", "pca_tfidf4", "pca_tfidf5", "pca_tfidf6", "pca_tfidf7", 
                  "pca_tfidf8", "pca_tfidf9", "pca_tfidf10", "pca_tfidf11", "pca_tfidf12", "pca_tfidf13", "pca_tfidf14", "pca_tfidf15", "pca_tfidf16", "pca_tfidf17", 
                  "pca_tfidf18", "pca_tfidf19", "pca_tfidf20", "pca_tfidf21", "pca_tfidf22", "pca_tfidf23", "pca_tfidf24", "pca_tfidf25", "pca_tfidf26", "pca_tfidf27", 
                  "pca_tfidf28", "pca_tfidf29", "pca_tfidf30", "pca_tfidf31", "pca_tfidf32", "pca_tfidf33", "pca_tfidf34", "pca_tfidf35", "pca_tfidf36", "pca_tfidf37", 
                  "pca_tfidf38", "pca_tfidf39", "pca_tfidf40", "pca_tfidf41", "pca_tfidf42", "pca_tfidf43", "pca_tfidf44", "pca_tfidf45", "pca_tfidf46", "pca_tfidf47", 
                  "pca_tfidf48", "pca_tfidf49"]

ys_features = ["valence", "arousal"]

df_audio = df_data[audio_features_top9]
df_lyric = df_data[lyric_features]
df_multi = df_data[audio_features_top9 + lyric_features]
df_ys    = df_data[ys_features]

### NN-model

define dataset

In [None]:
X, Y = df_multi, df_ys
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(X, Y)
X_val_train, X_val_test, y_val_train, y_val_test = train_test_split(X_val, y_val, test_size=0.2, random_state=seed)
X_train, X_val, X_test, y_train, y_val, y_test = torch.tensor(X_train.values, dtype=torch.float), torch.tensor(X_val.values, dtype=torch.float), torch.tensor(X_test.values, dtype=torch.float), torch.tensor(y_train.values, dtype=torch.float), torch.tensor(y_val.values, dtype=torch.float), torch.tensor(y_test.values, dtype=torch.float)
X_val_train, X_val_test, y_val_train, y_val_test = torch.tensor(X_val_train.values, dtype=torch.float), torch.tensor(X_val_test.values, dtype=torch.float), torch.tensor(y_val_train.values, dtype=torch.float), torch.tensor(y_val_test.values, dtype=torch.float)

define model

In [None]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, h0, h1, h2, output_size):
        super(NeuralNet, self).__init__()
        self.input_size = input_size
        self.l1 = nn.Linear(input_size, h0)
        self.l2 = nn.Linear(h0, h1)
        self.l3 = nn.Linear(h1, h2)
        self.l4 = nn.Linear(h2, output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        out = self.relu(out)
        out = self.l3(out)
        out = self.relu(out)
        out = self.l4(out)
        return out

optimizing parameters

In [None]:
input_size = X_train.shape[1]
output_size = 2
b_size = 32
epochs = 500
print_every = 100

# wrapper function for training
def train_model(h0, h1, h2, lr):
    # model definition
    nn_model = NeuralNet(input_size, h0, h1, h2, output_size)
    
    # loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(nn_model.parameters(), lr=lr)
    
    # training loop
    N = X_val_train.shape[0]
    n_epochs = epochs
    batch_size = b_size

    for epoch in range(n_epochs):
        for i in range(0, N, batch_size):
            inputs = X_val_train[i:i+batch_size]
            labels = y_val_train[i:i+batch_size]

            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = nn_model(inputs)

            # Compute loss
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

        # Print training progress
        if (epoch+1) % print_every == 0:
            print(f"Epoch {epoch+1}/{n_epochs}, Loss: {loss.item()}")

    y_val_pred = nn_model(X_val_test).detach()
    score = mean_squared_error(y_val_pred, y_val_test)
    # Return the trained model and validation score
    return nn_model, score

etas = [0.001, 0.01, 0.1]
neuron_sizes = [[32,16,8],
                [8,8,8],
                [16,8,4],
                [21,7,3]]
model_lst = []
for neurons in neuron_sizes:
    for lr in etas:
        model, score = train_model(neurons[0], neurons[1], neurons[2], lr)
        model_lst.append((model, score, neurons, lr))
        print("Neurons:", neurons, "Learning Rate:", lr, "Score:", score)

best_model, best_score, best_neurons, best_lr = min(model_lst, key=lambda x: x[1])
best_model, best_score, best_neurons, best_lr

training

In [None]:
batch_size = 32
n_epochs = 500
print_every = 100
N = X_train.shape[0]


best_nn_model = NeuralNet(input_size, best_neurons[0], best_neurons[1], best_neurons[2], output_size)
# loss function and optimizer
criterion = nn.MSELoss()

optimizer = optim.Adam(best_nn_model.parameters(), lr=best_lr)
for epoch in range(n_epochs):
    for i in range(0, N, batch_size):
        inputs = X_train[i:i+batch_size]
        labels = y_train[i:i+batch_size]
        
        # zero the gradients
        optimizer.zero_grad()
        
        # forward pass
        outputs = best_nn_model(inputs)
        
        # compute loss
        loss = criterion(outputs, labels)
        
        # backward pass and optimization
        loss.backward()
        optimizer.step()

    # print training process    
    if (epoch+1) % 10 == 0:
        print(f"Epoch {epoch+1}/{n_epochs}, Loss: {loss.item()}")

prediction

In [None]:
y_pred = best_nn_model(X_test).detach()

evaluation

In [None]:
evaluation(y_pred.numpy(), y_test.numpy())