## Load the data from pickle files

In [None]:
import pickle
import os
from tqdm import tqdm
import numpy as np

directory = 'data/train'

length_list = []
valence_values=[]

recordings = []

for filename in tqdm(os.listdir(directory)):
    if filename.endswith('.pkl'):
        file_path = os.path.join(directory, filename)
        with open(file_path, 'rb') as file:
            data = pickle.load(file)
            if data['valence'] != 2.333 and len(data['audio_data']) < 91000:
                length_list.append(len(data['audio_data']))
                valence_values.append(data['valence'])
                recordings.append(data['audio_data'])

valence_values = np.array(valence_values)
len(recordings)

In [None]:
def round_to_nearest_quarter(number):
    # Assuming 'number' could be a numpy array with a single value
    if isinstance(number, np.ndarray) and number.size == 1:
        number = number.item()  # Convert single-item array to scalar
    
    # Ensure the number is within the 1 to 5 range before processing
    number = np.clip(number, 1, 5)
    # Scale number to shift quarters to whole numbers, round, and rescale
    rounded_number = np.round(number * 4) / 4
    # Clip again to ensure no out-of-range values after rounding
    rounded_number = np.clip(rounded_number, 1, 5)
    return rounded_number

In [None]:
valence_dict = {}

for i in valence_values:
    if i not in valence_dict:
        valence_dict[i] = 1
    else:
        valence_dict[i] += 1


In [None]:
valence_dict

## **Padding to unify the length of the arrays**

In [None]:

max_length = max(len(array) for array in recordings)  # Find the maximum length

# Pad each array to have the maximum length
padded_arrays = np.array([np.pad(array, (0, max_length - len(array)), mode='constant') for array in recordings])

### Create train, test, validation sets

In [None]:
from sklearn.model_selection import train_test_split

# Split the data and labels into training and testing sets
X_train, X_test_help, y_train, y_test_help = train_test_split(padded_arrays, valence_values, test_size=0.4, random_state=42)

X_val, X_test, y_val, y_test = train_test_split(X_test_help, y_test_help, test_size=0.5, random_state=42)


## 1. Introduction: Create tensors to train the model

In [None]:
import torch
from torch.utils.data import TensorDataset, DataLoader

batch_size = 256  # You can adjust the batch size depending on your system's capability

# Convert input data and labels to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)  # Use float32 for input features
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)  # Use float32 for labels

# Create a dataset from tensors
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)

# Create a DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

############################################################################################################
# Repeat the same process for the test set

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)



############################################################################################################
# Repeat the same process for the validation set
X_validation_tensor = torch.tensor(X_val, dtype=torch.float32)
y_validation_tensor = torch.tensor(y_val, dtype=torch.float32)

validation_dataset = TensorDataset(X_validation_tensor, y_validation_tensor)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)




# Balanced

In [None]:
from torchsampler import ImbalancedDatasetSampler
import torch
from torch.utils.data import TensorDataset, DataLoader

batch_size = 256  # You can adjust the batch size depending on your system's capability

# Convert input data and labels to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)  # Use float32 for input features
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)  # Use float32 for labels

# Create a dataset from tensors
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)

# Create a DataLoader
# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    sampler=ImbalancedDatasetSampler(train_dataset),
    batch_size=batch_size
)

############################################################################################################
# Repeat the same process for the test set

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)



############################################################################################################
# Repeat the same process for the validation set
X_validation_tensor = torch.tensor(X_val, dtype=torch.float32)
y_validation_tensor = torch.tensor(y_val, dtype=torch.float32)

validation_dataset = TensorDataset(X_validation_tensor, y_validation_tensor)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size,shuffle=False)

In [None]:
for inputs, labels in train_loader:
    print(f'Input batch shape: {inputs.shape}')  # This should print: torch.Size([256, 1, 90948])
    print(f'Label batch shape: {labels.shape}')  # This should print: torch.Size([256])
    break  # Stop after the first batch to just see one example


## 2. Definition of MLP model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns


class MLP(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, activation_function):
        """
        Initialize the MLP model.

        Parameters:
        - input_size (int): Size of the input features.
        - hidden_sizes (list): List containing the sizes of hidden layers.
        - output_size (int): Size of the output layer.
        - activation_function (torch.nn.Module): Activation function for hidden layers.
        """
        super(MLP, self).__init__()

        # Set random seed for reproducibility
        torch.manual_seed(42)
        
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes
        self.output_size = output_size

        # Create hidden layers and activations dynamically
        self.layers = nn.ModuleList()
        kernel_size_conv = 5
        stride_conv  =1
        padding = 0
        in_channels = 1

        self.conv = nn.Conv1d(in_channels=in_channels,out_channels=1,stride=stride_conv,kernel_size=kernel_size_conv) # 1 input channel, 1 output channel, 5x kernel
        self.pool = nn.MaxPool1d(2, stride=2)

        ## Calculate the number of neurons after convolution and pooling
        conv_neurons = int((input_size +2*padding-kernel_size_conv)/stride_conv + 1)
        pool_stride = 2
        pool_kernel = 2
        pool_neurons = int((conv_neurons - pool_kernel)/pool_stride + 1)

        for i in range(len(hidden_sizes)):
            ### LINEAR LAYER
            layer = nn.Linear(pool_neurons if i == 0 else hidden_sizes[i - 1], hidden_sizes[i])
            self.layers.append(layer)
            #### HE WEIGHTS INITAILIZATION
            nn.init.kaiming_normal_(layer.weight, mode='fan_in', nonlinearity='relu')
            # Initialize biases to zero
            nn.init.constant_(layer.bias, 0)
            
            #### BATCH NORMALIZATION
            self.layers.append(nn.BatchNorm1d(hidden_sizes[i]))
            
            ### DROPOUT

            #self.layers.append(nn.Dropout(p=0.25)) ## Don't use dropout with batch normalization

            # Activation function (except for the last layer)
            self.layers.append(activation_function())



        # Append the ouptu layer
        self.layers.append(nn.Linear(hidden_sizes[-1], output_size))
        

    def forward(self, x):
 


        x = x.unsqueeze(1)  # Ensure channel dimension is there
        x = self.conv(x)
        x = self.pool(x)
        x = torch.flatten(x, 1) ### Flatten for linear layers
    
        # Forward pass through hidden layers with activation functions
        for layer in self.layers:
            x = layer(x)
            
        return x.view(-1)

### CUDA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Training Cycle

def train_model(MLP_model, optimizer, num_epochs):
    # Define the loss function
    criterion = nn.MSELoss()
    # Training loop
    for epoch in range(num_epochs):
        total_loss = 0

        for inputs, labels in train_loader:
            #inputs = inputs.view(-1, max_length)  # Flatten the images
            inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU
            outputs = MLP_model(inputs)  # Forward pass
            loss = criterion(outputs, labels)  # Compute the loss
            loss.backward()  # Backward pass

            # Update weights using the step function of our custom ADAM optimizer
            optimizer.step()

            # Store the loss. loss.item() gets the value in a tensor. This only works for scalars.
            total_loss += loss.item()
    
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {total_loss / len(train_loader):.4f}')



def evaluate_model(MLP_model, test_loader):
    # Model Evaluation
    predicted_labels = []
    true_labels = []
    predicted_rounded_labels=[]
    mse_total = 0
    total_samples = 0
    total_correct_ratio= 0
    with torch.no_grad():
        MLP_model.eval()  # Set the model to evaluation mode

        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            # Assumes inputs are already appropriately preprocessed (e.g., flattened if necessary)
            outputs = MLP_model(inputs)
            outputs = outputs.to('cpu')
            labels = labels.to('cpu')
            # Store predictions and true labels
            predicted_labels.extend(outputs.numpy())  # Convert to numpy array for MSE calculation
            true_labels.extend(labels.numpy())
            rounded_outputs = round_to_nearest_quarter(outputs.numpy())
            predicted_rounded_labels.extend(rounded_outputs)

            # Calculate MSE for the current batch
            mse = np.mean((outputs.numpy() - labels.numpy()) ** 2)
            mse_total += mse * labels.size(0)  # Aggregate MSE weighted by batch size
            total_samples += labels.size(0)

        # Calculate overall MSE
        overall_mse = mse_total / total_samples
        
        # Print the overall MSE and optionally display predictions and true values
        print(f"Mean Squared Error on Test Set: {overall_mse}")
        accuracy = np.mean(np.array(predicted_rounded_labels) == np.array(true_labels))
        print(f"Accuracy on Test Set: {int(accuracy*100)}%")
        print("Predicted Labels:", predicted_labels)
        print("True Labels:", true_labels)
        print("Predicted Rounded Labels:", predicted_rounded_labels)


        ####################### Confusion Matrix ############################
        # Convert float labels to string labels
        true_labels_str = [str(label) for label in true_labels]
        predicted_rounded_labels_str = [str(label) for label in predicted_rounded_labels]
        confusion = confusion_matrix(true_labels_str, predicted_rounded_labels_str)

        # Unique sorted values of labels for axis ticks
        unique_values = sorted(set(true_labels_str).union(predicted_rounded_labels_str))
        plt.figure(figsize=(10, 7))
        sns.heatmap(confusion, annot=True, fmt="d", cmap='Blues', xticklabels=unique_values, yticklabels=unique_values)
        plt.title('Confusion Matrix')
        plt.ylabel('True Categories')
        plt.xlabel('Predicted Categories')
        plt.show()

## AdaGrad

In [None]:
# Initialize the model
input_size = max_length  
hidden_size = [2048,2048,2048,2048,2048]
output_size = 1  # Regression problem
activation_function = nn.ReLU
num_epochs = 20

# Create the model
modelADAGRAD = MLP(input_size, hidden_size, output_size, activation_function).to(device)
print(modelADAGRAD.conv)
print(modelADAGRAD.pool)
print(modelADAGRAD.layers)
### RMS OPTIMIZER
optimizer = torch.optim.Adagrad(modelADAGRAD.parameters(), lr=0.001,weight_decay=0.00001)

# Train the model
train_model(modelADAGRAD, optimizer, num_epochs=num_epochs)

# Evaluate the model
evaluate_model(modelADAGRAD, test_loader)    

In [None]:
evaluate_model(modelADAGRAD, validation_loader)

### MAKE DATASET LABELS CATEGORICAL

In [5]:
import pickle
import os
from tqdm import tqdm
import numpy as np

directory = 'data/train'

length_list = []
valence_values=[]

recordings = []

for filename in tqdm(os.listdir(directory)):
    if filename.endswith('.pkl'):
        file_path = os.path.join(directory, filename)
        with open(file_path, 'rb') as file:
            data = pickle.load(file)
            if data['valence'] != 2.333 and len(data['audio_data']) < 91000:
                length_list.append(len(data['audio_data']))
                valence_values.append((data['valence']-1)//0.25)
                recordings.append(data['audio_data'])

valence_values = np.array(valence_values)
len(recordings)

100%|██████████| 10557/10557 [00:34<00:00, 307.48it/s]


10391

In [6]:

max_length = max(len(array) for array in recordings)  # Find the maximum length

# Pad each array to have the maximum length
padded_arrays = np.array([np.pad(array, (0, max_length - len(array)), mode='constant') for array in recordings])

In [7]:
from sklearn.model_selection import train_test_split

# Split the data and labels into training and testing sets
X_train, X_test_help, y_train, y_test_help = train_test_split(padded_arrays, valence_values, test_size=0.4, random_state=42)

X_val, X_test, y_val, y_test = train_test_split(X_test_help, y_test_help, test_size=0.5, random_state=42)


In [8]:
import torch
from torch.utils.data import TensorDataset, DataLoader

batch_size = 64  # You can adjust the batch size depending on your system's capability

# Convert input data and labels to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).unsqueeze(2)  # Use float32 for input features
y_train_tensor = torch.tensor(y_train, dtype=torch.long)  # Use long for labels

# Create a dataset from tensors
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)

# Create a DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

############################################################################################################
# Repeat the same process for the test set

X_test_tensor = torch.tensor(X_test, dtype=torch.float32).unsqueeze(2)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)



############################################################################################################
# Repeat the same process for the validation set
X_validation_tensor = torch.tensor(X_val, dtype=torch.float32).unsqueeze(2) 
y_validation_tensor = torch.tensor(y_val, dtype=torch.long)

validation_dataset = TensorDataset(X_validation_tensor, y_validation_tensor)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)




In [None]:
print(train_loader.dataset.tensors[0].shape)

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns


# LSTM

In [None]:
### Alternatives to try

#optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
#optimizer = torch.optim.Adagrad(model.parameters(), lr=0.001,weight_decay=0.00001)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
### Hyperparameters

input_size = 1
sequence_len = 90948
hidden_size = 128
num_layers = 2
num_classes = 17
num_epochs = 20
learning_rate = 0.01    


class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.kernel_size = 250
        self.stride = 250
        
        # CNN Layer
        self.conv1d = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=self.kernel_size, stride=self.stride)
        self.relu = nn.ReLU()

        # Compute the resulting sequence length after CNN
        # Adjusted to calculate based on the formula for output size of a convolution
        self.downsampled_length = (input_size - self.kernel_size) // self.stride + 1

        # LSTM Layer
        self.lstm = nn.LSTM(input_size=16, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.output_layer = nn.Linear(hidden_size, num_classes)

    
    def forward(self, x):

        x = x.permute(0, 2, 1)  # Change shape to (batch, channels, seq_len)

        # CNN
        x = self.conv1d(x)
        x = self.relu(x)

        # Reshape output from CNN to fit LSTM input
        if x.dim()==2:
            x = x.unsqueeze(1)
        x = x.permute(0, 2, 1) 
        hidden_states = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        cell_states = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.lstm(x, (hidden_states, cell_states))
        out = self.output_layer(out[:, -1, :])
        return out

def trainLSTM(num_epochs,model,train_dataloader,loss_func):
    total_steps = len(train_dataloader)

    for epoch in range(num_epochs):
        for batch, (recording,label) in enumerate(train_dataloader):
            recording = recording.to(device)
            label = label.to(device)
            # Forward pass
            outputs = model(recording)
            loss = loss_func(outputs, label)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (batch+1) % 100 == 0:
                print (f'Epoch [{epoch+1}/{num_epochs}], Step [{batch+1}/{total_steps}], Loss: {loss.item():.4f}')
def evaluate(model,test):
    with torch.no_grad():
        correct = 0
        total = 0
        for recording, labels in test:
            recording = recording.to(device)
            labels = labels.to(device)
            outputs = model(recording)
            _, predicted = torch.max(outputs.data, 1)
            print(predicted)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        print(f'Accuracy of the network on the test set: {100 * correct / total} %')
        return 100 * correct / total
    
    

    

In [None]:
model = LSTM(input_size, hidden_size, num_layers, num_classes).to(device)
print(model)
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adagrad(model.parameters(), lr=0.01)
trainLSTM(num_epochs,model,train_loader,loss_func)

In [None]:
evaluate(model,test_loader)



In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# **LSTM WITH CONVOLUTIONAL LAYERS**

In [12]:
### Hyperparameters

input_size = 1
sequence_len = 90948
hidden_size = 128
num_layers = 2
num_classes = 17
num_epochs = 20
learning_rate = 0.01    

class CNN_LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(CNN_LSTM, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1),  # Corrected in_channels to 1
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),
            nn.Conv1d(in_channels=64, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2)
        )
        self.lstm = nn.LSTM(input_size=32, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        print("Original shape:", x.shape)
        x = x.permute(0, 2, 1)
        print("Post permute for CNN:", x.shape)
        out = self.cnn(x)
        print("Post CNN:", out.shape)
        out = out.permute(0, 2, 1)
        print("Pre LSTM:", out.shape)
        out, _ = self.lstm(out)
        out = self.fc(out[:, -1, :])
        return out


def train(num_epochs,model,train_dataloader,loss_func):
    total_steps = len(train_dataloader)

    for epoch in range(num_epochs):
        for batch, (recording,label) in enumerate(train_dataloader):
            recording = recording.to(device)
            label = label.to(device)
            # Forward pass
            outputs = model(recording)
            loss = loss_func(outputs, label)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (batch+1) % 100 == 0:
                print (f'Epoch [{epoch+1}/{num_epochs}], Step [{batch+1}/{total_steps}], Loss: {loss.item():.4f}')
    
    

    

In [13]:
LSTMmodel = CNN_LSTM(max_length, hidden_size, num_layers, num_classes).to(device)
print(LSTMmodel)
loss_func=nn.CrossEntropyLoss()
optimizer = torch.optim.Adagrad(LSTMmodel.parameters(), lr=0.01)

train(num_epochs,LSTMmodel,train_loader,loss_func)

CNN_LSTM(
  (cnn): Sequential(
    (0): Conv1d(1, 64, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv1d(64, 32, kernel_size=(3,), stride=(1,), padding=(1,))
    (4): ReLU()
    (5): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (lstm): LSTM(32, 128, num_layers=2, batch_first=True)
  (fc): Linear(in_features=128, out_features=17, bias=True)
)
Original shape: torch.Size([64, 90948, 1])
Post permute for CNN: torch.Size([64, 1, 90948])
Post CNN: torch.Size([64, 32, 22737])
Pre LSTM: torch.Size([64, 22737, 32])


OutOfMemoryError: CUDA out of memory. Tried to allocate 13.89 GiB (GPU 0; 4.00 GiB total capacity; 9.59 GiB already allocated; 0 bytes free; 9.75 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF