## Load the data from pickle files

In [89]:
import pickle
import os
from tqdm import tqdm
import numpy as np

directory = 'data/train'

length_list = []
valence_values=[]

recordings = []

for filename in tqdm(os.listdir(directory)):
    if filename.endswith('.pkl'):
        file_path = os.path.join(directory, filename)
        with open(file_path, 'rb') as file:
            data = pickle.load(file)
            if data['valence'] != 2.333 and len(data['audio_data']) < 91000:
                length_list.append(len(data['audio_data']))
                valence_values.append(data['valence'])
                recordings.append(data['audio_data'])

valence_values = np.array(valence_values)
len(recordings)

100%|██████████| 10557/10557 [00:11<00:00, 951.98it/s]


10391

In [90]:
def round_to_nearest_quarter(number):
    # Assuming 'number' could be a numpy array with a single value
    if isinstance(number, np.ndarray) and number.size == 1:
        number = number.item()  # Convert single-item array to scalar
    
    # Ensure the number is within the 1 to 5 range before processing
    number = np.clip(number, 1, 5)
    # Scale number to shift quarters to whole numbers, round, and rescale
    rounded_number = np.round(number * 4) / 4
    # Clip again to ensure no out-of-range values after rounding
    rounded_number = np.clip(rounded_number, 1, 5)
    return rounded_number

In [91]:
valence_dict = {}

for i in valence_values:
    if i not in valence_dict:
        valence_dict[i] = 1
    else:
        valence_dict[i] += 1


In [92]:
valence_dict

{1.25: 140,
 3.5: 1173,
 3.25: 1273,
 3.75: 972,
 2.0: 523,
 2.75: 1014,
 2.25: 679,
 3.0: 1045,
 4.25: 606,
 1.0: 114,
 1.5: 227,
 1.75: 407,
 4.75: 151,
 4.0: 784,
 2.5: 890,
 5.0: 28,
 4.5: 365}

## **Padding to unify the length of the arrays**

In [93]:

max_length = max(len(array) for array in recordings)  # Find the maximum length

# Pad each array to have the maximum length
padded_arrays = np.array([np.pad(array, (0, max_length - len(array)), mode='constant') for array in recordings])

### Create train, test, validation sets

In [94]:
from sklearn.model_selection import train_test_split

# Split the data and labels into training and testing sets
X_train, X_test_help, y_train, y_test_help = train_test_split(padded_arrays, valence_values, test_size=0.4, random_state=42)

X_val, X_test, y_val, y_test = train_test_split(X_test_help, y_test_help, test_size=0.5, random_state=42)


## 1. Introduction: Create tensors to train the model

In [98]:
import torch
from torch.utils.data import TensorDataset, DataLoader

batch_size = 256  # You can adjust the batch size depending on your system's capability

# Convert input data and labels to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)  # Use float32 for input features
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)  # Use float32 for labels

# Create a dataset from tensors
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)

# Create a DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

############################################################################################################
# Repeat the same process for the test set

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)



############################################################################################################
# Repeat the same process for the validation set
X_validation_tensor = torch.tensor(X_val, dtype=torch.float32)
y_validation_tensor = torch.tensor(y_val, dtype=torch.float32)

validation_dataset = TensorDataset(X_validation_tensor, y_validation_tensor)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)




## 2. Definition of MLP model

In [103]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F


class MLP(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, activation_function):
        """
        Initialize the MLP model.

        Parameters:
        - input_size (int): Size of the input features.
        - hidden_sizes (list): List containing the sizes of hidden layers.
        - output_size (int): Size of the output layer.
        - activation_function (torch.nn.Module): Activation function for hidden layers.
        """
        super(MLP, self).__init__()

        # Set random seed for reproducibility
        # torch.manual_seed(42)
        torch.manual_seed(2024)
        
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes
        self.output_size = output_size

        # Create hidden layers and activations dynamically
        self.layers = nn.ModuleList()

        for i in range(len(hidden_sizes)):
            # Linear layer
            #self.layers.append(nn.Linear(input_size if i == 0 else hidden_sizes[i - 1], hidden_sizes[i]))
            
            #### HE WEIGHTS INITAILIZATION
            layer = nn.Linear(input_size if i == 0 else hidden_sizes[i - 1], hidden_sizes[i])
            self.layers.append(layer)
            # Initialize weights using He initialization
            nn.init.kaiming_normal_(layer.weight, mode='fan_in', nonlinearity='relu')
            # Initialize biases to zero
            nn.init.constant_(layer.bias, 0)
            
            # Activation function (except for the last layer)
            self.layers.append(activation_function())

            #### BATCH NORMALIZATION
            self.layers.append(nn.BatchNorm1d(hidden_sizes[i]))

        # Append the ouptu layer
        self.layers.append(nn.Linear(hidden_sizes[-1], output_size))
        

    def forward(self, x):
 
        # Flatten the input
        x = x.view(-1, self.input_size)

        # Forward pass through hidden layers with activation functions
        for layer in self.layers:
            x = layer(x)

        return x
    

    # Training Cycle

def train_model(MLP_model, optimizer, num_epochs):
    # Define the loss function
    criterion = nn.MSELoss()
    # Training loop
    for epoch in range(num_epochs):
        total_loss = 0

        for inputs, labels in train_loader:
            inputs = inputs.view(-1, max_length)  # Flatten the images
            outputs = MLP_model(inputs)  # Forward pass
            loss = criterion(outputs, labels)  # Compute the loss
            loss.backward()  # Backward pass

            # Update weights using the step function of our custom ADAM optimizer
            optimizer.step()

            # Store the loss. loss.item() gets the value in a tensor. This only works for scalars.
            total_loss += loss.item()
    
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {total_loss / len(train_loader):.4f}')



def evaluate_model(MLP_model, test_loader):
    # Model Evaluation
    predicted_labels = []
    true_labels = []
    predicted_rounded_labels=[]
    mse_total = 0
    total_samples = 0
    total_correct_ratio= 0
    with torch.no_grad():
        MLP_model.eval()  # Set the model to evaluation mode

        for inputs, labels in test_loader:
            # Assumes inputs are already appropriately preprocessed (e.g., flattened if necessary)
            outputs = MLP_model(inputs)

            # Store predictions and true labels
            predicted_labels.extend(outputs.numpy())  # Convert to numpy array for MSE calculation
            true_labels.extend(labels.numpy())
            rounded_outputs = round_to_nearest_quarter(outputs.numpy())
            predicted_rounded_labels.extend(rounded_outputs)

            # Calculate MSE for the current batch
            mse = np.mean((outputs.numpy() - labels.numpy()) ** 2)
            mse_total += mse * labels.size(0)  # Aggregate MSE weighted by batch size
            total_samples += labels.size(0)

        # Calculate overall MSE
        overall_mse = mse_total / total_samples
        # Print the overall MSE and optionally display predictions and true values
        print(f"Mean Squared Error on Test Set: {overall_mse}")
        accuracy = np.mean(np.array(predicted_rounded_labels) == np.array(true_labels))
        print(f"Accuracy on Test Set: {int(accuracy*100)}%")
        print("Predicted Labels:", predicted_labels)
        print("True Labels:", true_labels)
        print("Predicted Rounded Labels:", predicted_rounded_labels)

## 3. Training the model and evaluation

In [108]:

# Initialize the model
input_size = max_length  # the longest input is 175000
hidden_size = [1024,1024,1024,1024]
output_size = 1  # Regression problem
activation_function = nn.ReLU
num_epochs = 50

# Create the model
model = MLP(input_size, hidden_size, output_size, activation_function)
print(model.layers)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Train the model
train_model(model, optimizer, num_epochs=num_epochs)

# Evaluate the model
evaluate_model(model, test_loader)


ModuleList(
  (0): Linear(in_features=90948, out_features=1024, bias=True)
  (1): ReLU()
  (2): Linear(in_features=1024, out_features=1024, bias=True)
  (3): ReLU()
  (4): Linear(in_features=1024, out_features=1024, bias=True)
  (5): ReLU()
  (6): Linear(in_features=1024, out_features=1024, bias=True)
  (7): ReLU()
  (8): Linear(in_features=1024, out_features=1, bias=True)
)
Epoch [1/50], Loss: 15626.7008
Epoch [2/50], Loss: 10.5487
Epoch [3/50], Loss: 8.9900
Epoch [4/50], Loss: 28.2108
Epoch [5/50], Loss: 29.5053
Epoch [6/50], Loss: 363762.1616
Epoch [7/50], Loss: 2.7025
Epoch [8/50], Loss: 4.3240
Epoch [9/50], Loss: 3.8732
Epoch [10/50], Loss: 5.7774
Epoch [11/50], Loss: 14.0779
Epoch [12/50], Loss: 2567.3053
Epoch [13/50], Loss: 21.7370
Epoch [14/50], Loss: 23.1122
Epoch [15/50], Loss: 24.1449
Epoch [16/50], Loss: 24.8502
Epoch [17/50], Loss: 754.2849
Epoch [18/50], Loss: 22.4702
Epoch [19/50], Loss: 18.5559
Epoch [20/50], Loss: 25.1987
Epoch [21/50], Loss: 26.1118
Epoch [22/50], Lo

In [83]:
evaluate_model(model, validation_loader)

Mean Squared Error on Test Set: 8.633212408935893
Accuracy on Test Set: 3%
Predicted Labels: [array([0.77644485], dtype=float32), array([0.77644485], dtype=float32), array([5.414227], dtype=float32), array([1.8379748], dtype=float32), array([0.77644485], dtype=float32), array([3.6503315], dtype=float32), array([0.77644485], dtype=float32), array([0.77644485], dtype=float32), array([2.0683098], dtype=float32), array([0.77644485], dtype=float32), array([1.345665], dtype=float32), array([0.77644485], dtype=float32), array([0.77644485], dtype=float32), array([10.788207], dtype=float32), array([0.77644485], dtype=float32), array([0.77644485], dtype=float32), array([1.3466148], dtype=float32), array([0.77644485], dtype=float32), array([3.6786823], dtype=float32), array([6.4522934], dtype=float32), array([0.77644485], dtype=float32), array([11.412561], dtype=float32), array([0.77644485], dtype=float32), array([1.5778544], dtype=float32), array([1.4925709], dtype=float32), array([1.7070644], d

# **New Try**

In [None]:
def round_to_nearest_quarter(x):
    