**Loading** CMU-MOSI **dataset**

In [1]:
!git clone https://github.com/pliang279/MultiBench.git

Cloning into 'MultiBench'...
remote: Enumerating objects: 6943, done.[K
remote: Counting objects: 100% (154/154), done.[K
remote: Compressing objects: 100% (94/94), done.[K
remote: Total 6943 (delta 72), reused 121 (delta 60), pack-reused 6789[K
Receiving objects: 100% (6943/6943), 51.07 MiB | 12.32 MiB/s, done.
Resolving deltas: 100% (4258/4258), done.


In [2]:
%cd MultiBench

/content/MultiBench


In [3]:
!mkdir data
!pip install gdown && gdown https://drive.google.com/u/0/uc?id=1szKIqO0t3Be_W91xvf6aYmsVVUa7wDHU

Downloading...
From (original): https://drive.google.com/u/0/uc?id=1szKIqO0t3Be_W91xvf6aYmsVVUa7wDHU
From (redirected): https://drive.google.com/uc?id=1szKIqO0t3Be_W91xvf6aYmsVVUa7wDHU&confirm=t&uuid=557f6c22-3de0-454d-933d-05a74da11f1a
To: /content/MultiBench/mosi_raw.pkl
100% 357M/357M [00:06<00:00, 55.3MB/s]


In [4]:
import torch
import sys
import os
import torch
import torch.nn as nn
from sklearn.metrics import accuracy_score
import numpy as np


In [5]:
# Import the associated dataloader for affect datasets, which MOSI is a part of.
from datasets.affect.get_data import get_dataloader

# Create the training, validation, and test-set dataloaders.
traindata, validdata, testdata = get_dataloader(
    'mosi_raw.pkl', robust_test=False, max_pad=True, data_type='mosi', max_seq_len=50)

----------------------------------------------------------------------------------------------------------------------------------------------------

In [6]:
import torch
import torch.nn as nn

class SubnetModel(nn.Module):
    def __init__(self, input_size, num_utterances, fc1_size, fc2_size, fc3_size, dropout=0.15):
        super(SubnetModel, self).__init__()

        # Dropout layer
        self.drop = nn.Dropout(p=dropout)

        # Fully connected layers
        self.fc1 = nn.Linear(input_size, fc1_size)  # First fully connected layer
        self.fc2 = nn.Linear(fc1_size, fc2_size)    # Second fully connected layer
        self.fc3 = nn.Linear(fc2_size, fc3_size)    # Third fully connected layer

        # Activation functions
        self.relu = nn.ReLU()  # ReLU activation function

    def forward(self, x):
        # Compute mean along the sequence dimension
        x = torch.mean(x, dim=1)

        # Replace NaN values with zeros
        nan_mask = torch.isnan(x)
        x[nan_mask] = 0

        # Pass through fully connected layers with ReLU activation
        fc1_out = self.relu(self.fc1(x))
        fc2_out = self.relu(self.fc2(fc1_out))
        drop_out = self.drop(fc2_out)

        # Final fully connected layer output
        fc3_out = self.relu(self.fc3(drop_out))

        return fc3_out


In [7]:
class TextModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, fc1_size, fc2_size):
        super(TextModel, self).__init__()

        # LSTM layer (stacked LSTM)
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)

        # Fully connected layers
        self.fc1 = nn.Linear(hidden_size, fc1_size)  # First fully connected layer
        self.fc2 = nn.Linear(fc1_size, fc2_size)     # Second fully connected layer

        # Dropout layer
        self.dropout = nn.Dropout(p=0.15)

        # Activation functions
        self.relu = nn.ReLU()  # ReLU activation function

    def forward(self, x):
        # LSTM layer
        lstm_out, (hidden_states, cell_states) = self.lstm(x)

        # Pass the hidden states through fully connected layers with ReLU activation
        fc1_out = self.relu(self.fc1(hidden_states.squeeze()))
        drop_out = self.dropout(fc1_out)
        fc2_out = self.relu(self.fc2(drop_out))

        return fc2_out


In [67]:
import torch
import torch.nn as nn

class TFN(nn.Module):
    def __init__(self, audio_params, video_params, text_params, SIN_params):
        super(TFN, self).__init__()

        self.output_range = nn.Parameter(torch.FloatTensor([5]), requires_grad=False)
        self.output_shift = nn.Parameter(torch.FloatTensor([0]), requires_grad=False)

        self.audio_params = audio_params
        self.video_params = video_params
        self.text_params = text_params

        # Unimodal models
        self.audio_subnet = SubnetModel(audio_params[0], audio_params[1], audio_params[2], audio_params[3], audio_params[4])
        self.video_subnet = SubnetModel(video_params[0], video_params[1], video_params[2], video_params[3], video_params[4])
        self.text_subnet = TextModel(text_params[0], text_params[1], text_params[2], text_params[3], text_params[4])

        # Fully connected layers
        self.fc1 = nn.Linear(((audio_params[2] + 1) * (video_params[2] + 1) * (text_params[3] + 1)), SIN_params[0])
        self.fc2 = nn.Linear(SIN_params[0], SIN_params[1])

        # Output layer
        self.output_layer = nn.Linear(SIN_params[1], 5)

        # Activation functions
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        batch_size = x[0].shape[0]

        # Unimodal outputs
        audio_out = self.audio_subnet(x[0])
        video_out = self.video_subnet(x[1])
        text_out = self.text_subnet(x[2])

        # Adding 1 to increase the dimension value
        audio_out = torch.cat((torch.ones(batch_size, 1).to(x[0].device), audio_out), dim=1)
        video_out = torch.cat((torch.ones(batch_size, 1).to(x[1].device), video_out), dim=1)
        text_out = torch.cat((torch.ones(batch_size, 1).to(x[2].device), text_out), dim=1)

        # Tensor fusion operation
        fusion_tensor = torch.bmm(audio_out.unsqueeze(2), video_out.unsqueeze(1))
        fusion_tensor = fusion_tensor.view(-1, (self.audio_params[2] + 1) * (self.video_params[2] + 1), 1)
        fusion_tensor = torch.bmm(fusion_tensor, text_out.unsqueeze(1)).view(batch_size, -1)

        # Sentiment inference network (SIN)
        fc1_out = self.relu(self.fc1(fusion_tensor))
        fc2_out = self.relu(self.fc2(fc1_out))

        # Output layer with Sigmoid activation
        output = self.softmax(self.output_layer(fc2_out))

        # Get output between -3 and +3
        # output = output * self.output_range + self.output_shift

        output = torch.argmax(output, dim=1)

        return output


**-------------------------------------------------------------------------------------------------------------------------------------**

In [68]:
# Audio parameters: (feature_length, max_seq_len, fc1_size, fc2_size, fc3_size)
audio_params = (35, 50, 32, 32, 32)

# Video parameters: (feature_length, max_seq_len, fc1_size, fc2_size, fc3_size)
video_params = (74, 50, 32, 32, 32)

# Text parameters: (feature_length, LSTM_hidden_size, num_LSTM_layers, fc1_size, fc2_size)
text_params = (300, 128, 1, 128, 128)

# Sentiment Inference Network (SIN) parameters: (fc1_size, fc2_size)
SIN_params = (128, 128)

# Instantiate TFN model with the specified parameters
final_model = TFN(audio_params, video_params, text_params, SIN_params)

In [69]:
# Loss function: CrossEntropyLoss for classification tasks
Loss = torch.nn.CrossEntropyLoss()

# Optimizer: Adam optimizer for training the model parameters
optimizer = torch.optim.Adam(list(final_model.parameters())[2:], lr=0.0005, weight_decay=0.01)

# Number of epochs for training
num_epochs = 100

In [70]:
# Loop through each epoch for training
for epoch in range(num_epochs):

    print("EPOCH : ", epoch + 1)

    # Training
    total_train_loss = 0.0
    num_sequences = 0
    final_model.train()  # Set the model to training mode

    # Iterate over training data batches
    for batch in traindata:
        optimizer.zero_grad()  # Zero the gradients
        outputs = final_model(batch[:-1])
        expected_output = batch[-1]

        # Clamp the expected output between -2 and 2
        expected_output = torch.clamp(expected_output, min=-2, max=2)
        expected_output += 2
        expected_output = torch.round(expected_output)

        # Calculate the loss
        loss = Loss(outputs, expected_output.long())
        total_train_loss += loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights
        num_sequences += 1

    # Calculate average training loss
    average_train_loss = total_train_loss / num_sequences
    print("-------------Training----------------")
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {average_train_loss:.4f}')

    # Validation
    final_model.eval()  # Set the model to evaluation mode
    total_val_loss = 0.0
    num_sequences = 0
    val_all_predictions = []
    val_all_targets = []

    # Iterate over validation data batches
    with torch.no_grad():
        for batch in validdata:
            val_outputs = final_model(batch[:-1])
            val_targets = batch[-1]

            # Clamp the validation targets between -2 and 2
            val_targets = torch.clamp(val_targets, min=-2, max=2)
            val_targets += 2
            val_targets = torch.round(val_targets)

            # Calculate validation loss
            val_loss = Loss(val_outputs, val_targets)
            total_val_loss += val_loss.item()

            # Collect predictions and targets for accuracy calculation
            val_targets_np = val_targets.cpu().numpy()
            val_outputs_np = val_outputs.cpu().numpy()

            val_all_predictions.extend(val_outputs_np)
            val_all_targets.extend(val_targets_np)
            num_sequences += 1

    # Calculate average validation loss and accuracy
    average_val_loss = total_val_loss / num_sequences
    accuracy = accuracy_score(val_all_targets, val_all_predictions)

    print("--------------Validation----------")
    print(f'Epoch [{epoch + 1}/{num_epochs}], Validation Loss: {average_val_loss:.4f}, Accuracy Score: {accuracy:.4f}')
    print("\n \n")


EPOCH :  1


  self.pid = os.fork()
  self.pid = os.fork()


RuntimeError: "log_softmax_lastdim_kernel_impl" not implemented for 'Long'

In [None]:
outputs

In [None]:
# Importing necessary metrics from sklearn
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error

# Set the final model to evaluation mode
final_model.eval()

# Initialize lists to store predictions and targets
all_predictions = []
all_targets = []

# Iterate over test data batches
with torch.no_grad():
    for batch in testdata:
        test_targets = batch[-1]
        test_outputs = final_model(batch[:-1])

        # Convert predictions and targets to numpy arrays
        predictions = test_outputs.numpy().flatten()
        targets = test_targets.numpy().flatten()

        # Extend the lists with current batch predictions and targets
        all_predictions.extend(predictions)
        all_targets.extend(targets)

    # Calculate Mean Absolute Error and R-squared score
    mae = mean_absolute_error(all_targets, all_predictions)
    r_squared = r2_score(all_targets, all_predictions)

    # Print the results
    print(f'Test MAE: {mae:.4f}, R-squared: {r_squared:.4f}')
