### Imports

In [1]:
import os
import sys
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
from torcheval.metrics import BinaryPrecision, BinaryRecall, BinaryF1Score
from torch.utils.data import DataLoader, Dataset, Subset
from sklearn.model_selection import train_test_split, KFold

sys.path.append('../')

from modules.classifier import DenseLayer, BCELoss
from modules.dataloader import load_npy_files
from modules.linear_transformation import LinearTransformations

### Data Loading

In [2]:
class MultimodalDataset(Dataset):
    def __init__(self, id_label_df, text_features, audio_features, video_features):
        self.id_label_df = id_label_df
        
        # Convert feature lists to dictionaries for fast lookup
        self.text_features = {os.path.basename(file).split('.')[0]: tensor for file, tensor in text_features}
        self.audio_features = {os.path.basename(file).split('_')[1].split('.')[0]: tensor for file, tensor in audio_features}
        self.video_features = {os.path.basename(file).split('_')[0]: tensor for file, tensor in video_features}

        # List to store missing files
        self.missing_files = []

        # Filter out entries with missing files
        self.valid_files = self._filter_valid_files()


    def _filter_valid_files(self):
        valid_files = []
        for idx in range(len(self.id_label_df)):
            imdbid = self.id_label_df.iloc[idx]['IMDBid']

            # Check if the IMDBid exists in each modality's features
            if imdbid in self.text_features and imdbid in self.audio_features and imdbid in self.video_features:
                valid_files.append(idx)
            else:
                self.missing_files.append({'IMDBid': imdbid})

        # Print missing files after checking all
        if self.missing_files:
            print("Missing files:")
            for item in self.missing_files:
                print(f"IMDBid: {item['IMDBid']}")
            print(f"Total IMDB IDs with missing files: {len(self.missing_files)}")
        else:
            print("No missing files.")

        return valid_files

    def __len__(self):
        return len(self.valid_files)

    def __getitem__(self, idx):
        # Get the original index from the filtered valid files
        original_idx = self.valid_files[idx]
        imdbid = self.id_label_df.iloc[original_idx]['IMDBid']
        label = self.id_label_df.iloc[original_idx]['Label']

        # Retrieve data from the loaded features
        text_data = self.text_features.get(imdbid, torch.zeros((1024,)))
        audio_data = self.audio_features.get(imdbid, torch.zeros((1, 197, 768)))
        video_data = self.video_features.get(imdbid, torch.zeros((95, 768)))
        
        # Define label mapping
        label_map = {'red': 1, 'green': 0} 
        
        # Convert labels to tensor using label_map
        try:
            label_data = torch.tensor([label_map[label]], dtype=torch.float32)  # Ensure labels are integers
        except KeyError as e:
            print(f"Error: Label '{e}' not found in label_map.")
            raise

        return text_data, audio_data, video_data, label_data


In [3]:
import torch.nn.functional as F

def collate_fn(batch):
    text_data, audio_data, video_data, label_data = zip(*batch)

    # Convert lists to tensors
    text_data = torch.stack(text_data)
    audio_data = torch.stack(audio_data)

    # Padding for video data
    # Determine maximum length of video sequences in the batch
    video_lengths = [v.size(0) for v in video_data]
    max_length = max(video_lengths)

    # Pad video sequences to the maximum length
    video_data_padded = torch.stack([
        F.pad(v, (0, 0, 0, max_length - v.size(0)), "constant", 0)
        for v in video_data
    ])

    # Convert labels to tensor and ensure the shape [batch_size, 1]
    label_data = torch.stack(label_data)  # Convert list of tensors to a single tensor

    return text_data, audio_data, video_data_padded, label_data


In [5]:
# Load the labels DataFrame
id_label_df = pd.read_excel('../../misc/MM-Trailer_dataset.xlsx')

# Define the directories
text_features_dir = '../../misc/text_features'
audio_features_dir = '../../misc/audio_features'
video_features_dir = '../../misc/video_features'

# Load the feature vectors from each directory
text_features = load_npy_files(text_features_dir)
audio_features = load_npy_files(audio_features_dir)
video_features = load_npy_files(video_features_dir)

# Splitting data for training, validation, and testing
train_df, val_test_df = train_test_split(id_label_df, test_size=0.3, random_state=42)

# Further splitting remaining set into validation and test sets
val_df, test_df = train_test_split(val_test_df, test_size=0.5, random_state=42)

# Create datasets
train_dataset = MultimodalDataset(train_df, text_features, audio_features, video_features)
val_dataset = MultimodalDataset(val_df, text_features, audio_features, video_features)
test_dataset = MultimodalDataset(test_df, text_features, audio_features, video_features)

# Create DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=0, collate_fn=collate_fn)
val_dataloader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=0, collate_fn=collate_fn)
test_dataloader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=0, collate_fn=collate_fn)

# Combine all data for K-fold cross-validation
full_dataset = MultimodalDataset(id_label_df, text_features, audio_features, video_features)



Missing files:
IMDBid: tt2494280
IMDBid: tt1724962
IMDBid: tt1152836
IMDBid: tt0389790
IMDBid: tt3053228
IMDBid: tt1045778
IMDBid: tt1758795
IMDBid: tt0099385
IMDBid: tt2917484
IMDBid: tt4769836
IMDBid: tt0089652
IMDBid: tt0465494
IMDBid: tt3675748
IMDBid: tt2126362
IMDBid: tt0988083
IMDBid: tt2101341
IMDBid: tt0401997
IMDBid: tt1661461
IMDBid: tt1313139
IMDBid: tt1094661
IMDBid: tt5162658
IMDBid: tt0104839
IMDBid: tt1288558
IMDBid: tt5962210
IMDBid: tt2937696
IMDBid: tt0284363
IMDBid: tt5580390
IMDBid: tt2293750
IMDBid: tt2980472
IMDBid: tt0082186
IMDBid: tt0924129
IMDBid: tt0988595
IMDBid: tt1349482
IMDBid: tt4158096
IMDBid: tt1403241
IMDBid: tt2713642
IMDBid: tt1682940
IMDBid: tt10327354
IMDBid: tt1087842
IMDBid: tt1800302
IMDBid: tt0113855
IMDBid: tt2504022
IMDBid: tt7248248
IMDBid: tt1720164
IMDBid: tt1336621
IMDBid: tt0266987
IMDBid: tt0859635
Total IMDB IDs with missing files: 47
Missing files:
IMDBid: tt2437712
IMDBid: tt0099371
IMDBid: tt2935564
IMDBid: tt0140336
IMDBid: tt468

### Test Dataloader (for debugging)

In [6]:
for text, audio, video, labels in train_dataloader:
    # print(f"Text Shape: {text.shape}")
    # print(f"Audio Shape: {audio.shape}")
    # print(f"Video Shape: {video.shape}")
    # print(f"Labels Shape: {labels.shape}")
    print('---')


---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---


In [7]:
for text_features, audio_features, video_features, targets in train_dataloader:
    print("Text Features Shape:", text_features.shape)
    print("Audio Features Shape:", audio_features.shape)
    print("Video Features Shape:", video_features.shape)
    print("Labels shape:", targets.shape)
    break


Text Features Shape: torch.Size([8, 1024])
Audio Features Shape: torch.Size([8, 1, 197, 768])
Video Features Shape: torch.Size([8, 185, 768])
Labels shape: torch.Size([8, 1])


In [8]:
# Function to print a sample from the dataset
def print_sample(dataset, index):
    text_data, audio_data, video_data, label_data = dataset[index]
    print(f"Sample {index}:")
    # print("Text Data:", text_data)
    # print("Audio Data:", audio_data)
    # print("Video Data:", video_data)
    print("-" * 30)
    print("Text Data Shape:", text_data.shape)
    print("Audio Data Shape:", audio_data.shape)
    print("Video Data Shape:", video_data.shape)
    print("Label:", label_data)
    print("-" * 30)

# Print a sample from each dataset
print("Training Dataset Sample:")
print_sample(train_dataset, 1)  # Change 5 to any index to view different samples

# print("Validation Dataset Sample:")
# print_sample(val_dataset, 0)  # Change 5 to any index to view different samples

# print("Test Dataset Sample:")
# print_sample(test_dataset, 0)  # Change 5 to any index to view different samples

Training Dataset Sample:
Sample 1:
------------------------------
Text Data Shape: torch.Size([1024])
Audio Data Shape: torch.Size([1, 197, 768])
Video Data Shape: torch.Size([89, 768])
Label: tensor([1.])
------------------------------


In [9]:
def print_dataloader_samples(dataloader, num_batches=1):
    for i, batch in enumerate(dataloader):
        if i >= num_batches:
            break
        
        text_data, audio_data, video_data, labels_data = batch

        # # Convert labels to a list of integers if they are tensors
        # if isinstance(labels, torch.Tensor):
        #     labels = labels.tolist()

        print(f"Batch {i}:")
        print("Text Data Shape:", text_data.shape)
        print("Audio Data Shape:", audio_data.shape)
        print("Video Data Shape:", video_data.shape)
        print("Labels:", labels_data.shape)
        print("-" * 30)

# Print a few batches from the training DataLoader
print("Training DataLoader Samples:")
print_dataloader_samples(train_dataloader, num_batches=2)

# # Print a few batches from the validation DataLoader
# print("Validation DataLoader Samples:")
# print_dataloader_samples(val_dataloader, num_batches=5)

# # Print a few batches from the validation DataLoader
# print("Validation DataLoader Samples:")
# print_dataloader_samples(test_dataloader, num_batches=5)

Training DataLoader Samples:
Batch 0:
Text Data Shape: torch.Size([8, 1024])
Audio Data Shape: torch.Size([8, 1, 197, 768])
Video Data Shape: torch.Size([8, 146, 768])
Labels: torch.Size([8, 1])
------------------------------
Batch 1:
Text Data Shape: torch.Size([8, 1024])
Audio Data Shape: torch.Size([8, 1, 197, 768])
Video Data Shape: torch.Size([8, 144, 768])
Labels: torch.Size([8, 1])
------------------------------


### GMU Class

In [10]:
# Class for Gated Multimodal Unit of Arevalo et al. (2017)
class GatedMultimodalUnit(torch.nn.Module):
    def __init__(self, text_dim, audio_dim, video_dim, output_dim):
        super(GatedMultimodalUnit, self).__init__()
        
        # Linear transformation for text
        self.text_linear = LinearTransformations(text_dim, output_dim)
        
        # Convolutional layers for audio and video features
        self.audio_conv = nn.Conv1d(audio_dim, output_dim, kernel_size=1)
        self.video_conv = nn.Conv1d(video_dim, output_dim, kernel_size=1)
        
        self.output_dim = output_dim 
        
        # Activation functions
        self.activation = nn.Tanh()
        self.gate_activation = nn.Sigmoid()
        
        # Weight matrices for each modality
        self.W1 = nn.Parameter(torch.Tensor(output_dim, output_dim))
        self.W2 = nn.Parameter(torch.Tensor(output_dim, output_dim))
        self.W3 = nn.Parameter(torch.Tensor(output_dim, output_dim))
        
        # Gating matrices
        self.Y1 = nn.Parameter(torch.Tensor(output_dim, output_dim))
        self.Y2 = nn.Parameter(torch.Tensor(output_dim, output_dim))
        self.Y3 = nn.Parameter(torch.Tensor(output_dim, output_dim))
        
        # Initialize weights
        self._initialize_weights()
        
    def _initialize_weights(self):
        
        # Initialize weight matrices
        init.xavier_uniform_(self.W1)
        init.xavier_uniform_(self.W2)
        init.xavier_uniform_(self.W3)
        
        # Initialize gating matrices
        init.xavier_uniform_(self.Y1)
        init.xavier_uniform_(self.Y2)
        init.xavier_uniform_(self.Y3)
        
        
    def forward(self, text_features, audio_features, video_features):

        # Process text features to match shape
        x_t = self.text_linear(text_features)              # Shape: [batch_size, output_dim]

        # Process audio features to match shape
        audio_features = audio_features.squeeze(1).permute(0, 2, 1)               # Shape: [batch_size, audio_dim, sequence_length] 
        x_a = self.audio_conv(audio_features).mean(dim=-1)              # Shape: [batch_size, output_dim]

        # Process video features to match shape
        video_features = video_features.permute(0, 2, 1)   # Shape: [batch_size, video_dim, sequence_length]
        x_v = self.video_conv(video_features).mean(dim=-1)              # Shape: [batch_size, output_dim]
 
        h1 = self.activation(torch.matmul(x_t, self.W1))        # Shape: [batch_size, output_dim]
        h2 = self.activation(torch.matmul(x_a, self.W2))        # Shape: [batch_size, output_dim]
        h3 = self.activation(torch.matmul(x_v, self.W3))        # Shape: [batch_size, output_dim]
        
        # Compute modality-specific gating weights
        z1 = self.gate_activation(torch.matmul(x_t, self.Y1))  # Shape: [batch_size, output_dim]
        z2 = self.gate_activation(torch.matmul(x_a, self.Y2))  # Shape: [batch_size, output_dim]
        z3 = self.gate_activation(torch.matmul(x_v, self.Y3))  # Shape: [batch_size, output_dim]
        
        # Calculate final output
        h = z1 * h1 + z2 * h2 + z3 * h3         

        return h


### Test Model (for debugging)

In [11]:
# Test the GMU model using the items from dataloader as input

# Define dimensions
text_dim = 1024
audio_dim = 768  # Number of channels in audio data
video_dim = 768  # Number of channels in video data
output_dim = 512  # You can set this to any value, depending on your requirements

# Instantiate the GMU model
gmu = GatedMultimodalUnit(text_dim, audio_dim, video_dim, output_dim)

# Use DataLoader to get a batch of data
for batch in train_dataloader:  # You can use any DataLoader (train_dataloader, val_dataloader, etc.)
    text_data, audio_data, video_data, labels = batch
    
   
    # Feed the entire batch to the GMU model
    with torch.no_grad():
        output = gmu(text_data, audio_data, video_data)
    
    # Print the output shape
    print('-'*50)
    print("GMU Output Shape:", output.shape)
    print("GMU Output: ", output)
    
    # Break after the first batch for testing purposes
    break


--------------------------------------------------
GMU Output Shape: torch.Size([8, 512])
GMU Output:  tensor([[-0.0038,  0.1910,  0.1111,  ...,  0.0087,  0.1819,  0.1141],
        [-0.0044,  0.0162,  0.2544,  ...,  0.2737,  0.0663,  0.2049],
        [ 0.0756,  0.0695,  0.1365,  ...,  0.1385, -0.0628,  0.1549],
        ...,
        [ 0.1110,  0.1560,  0.0137,  ...,  0.0556, -0.0376,  0.3730],
        [-0.0155, -0.0034,  0.1696,  ...,  0.2945, -0.0514,  0.2381],
        [-0.1074, -0.0131,  0.1898,  ...,  0.0238, -0.0554,  0.2082]])


In [15]:
from modules.dataloader import load_npy_files

# Define the directories
text_features_dir = '../../misc/text_features'
audio_features_dir = '../../misc/audio_features'
video_features_dir = '../../misc/video_features'

# Load the feature vectors from each directory
text_features = load_npy_files(text_features_dir)
audio_features = load_npy_files(audio_features_dir)
video_features = load_npy_files(video_features_dir)

# Select the first file from each modality directories (for testing) [insert index]
text_file_name, text_features = text_features[0]
audio_file_name, audio_features = audio_features[0]
video_file_name, video_features = video_features[0]

print("Selected File:")
print("Text file:", os.path.basename(text_file_name))
print("Audio file:", os.path.basename(audio_file_name))
print("Video file:", os.path.basename(video_file_name))
print("-"*50)


# Define dimensions (make sure these match your model's expected input sizes)
text_dim = 1024
audio_dim = 768  # Number of channels in audio data
video_dim = 768  # Number of channels in video data
output_dim = 768  # You can set this to any value, depending on your requirements

# Initialize the GMU model
model = GatedMultimodalUnit(text_dim, audio_dim, video_dim, output_dim)

# Move model to the same device as your data (e.g., GPU if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Prepare the selected data samples
text_features = text_features.to(device)  # Convert to tensor and move to device
audio_features = audio_features.to(device)  # Convert to tensor and move to device
video_features = video_features.to(device)  # Convert to tensor and move to device

print("Text Feature Shape:", text_features.shape)
print("Audio Feature Shape:", audio_features.shape)
print("Video Feature Shape", video_features.shape)
print("-"*50)

# Pass the data through the GMU model
model.eval()  # Set the model to evaluation mode
with torch.no_grad():  # No need to compute gradients
    output = model(text_features.unsqueeze(0), audio_features, video_features.unsqueeze(0))

# Print the output shape
print("Model output shape:", output.shape, "###[batch_size, output_dim]")
print("-"*50)
print("Model output:", output) 


Selected File:
Text file: tt2381335.npy
Audio file: feature_tt1741243.npy
Video file: tt0443453_features.npy
--------------------------------------------------
Text Feature Shape: torch.Size([1024])
Audio Feature Shape: torch.Size([1, 197, 768])
Video Feature Shape torch.Size([79, 768])
--------------------------------------------------
Model output shape: torch.Size([1, 768]) ###[batch_size, output_dim]
--------------------------------------------------
Model output: tensor([[-2.5238e-01,  7.9541e-02,  3.7040e-01,  1.0298e-01,  8.7948e-02,
         -8.1107e-02, -1.4414e-01, -1.4601e-01, -8.6355e-02,  1.6799e-01,
          2.7540e-01,  4.0523e-01, -1.4785e-01,  1.0707e-01,  1.3213e-01,
         -1.1718e-01,  7.8220e-03, -1.0956e-01, -4.6226e-02, -2.1644e-02,
         -1.5270e-01,  3.1718e-01, -2.6788e-01,  2.0977e-01,  2.0672e-01,
         -1.3995e-01,  7.3574e-02, -3.1293e-01,  1.9473e-02, -3.2938e-03,
          3.3725e-01, -2.1112e-01, -1.5969e-01,  5.2099e-02,  2.2123e-01,
         

### Model Training

In [16]:
def get_optimizer(parameters, lr=1e-3):
    # Create an optimizer, for example, Adam
    return optim.Adam(parameters, lr=lr)

In [17]:
def train_model(model, dense_layer, dataloader, criterion, optimizer, device):
    model.train()
    dense_layer.train()
    total_loss = 0
    
    for text_features, audio_features, video_features, targets in dataloader:
        text_features, audio_features, video_features, targets = (
            text_features.to(device),
            audio_features.to(device),
            video_features.to(device),
            targets.to(device)
        )
        
        optimizer.zero_grad()
        
        # Pass inputs through GMU model
        outputs = model(text_features, audio_features, video_features)
        
        # Pass the GMU outputs through the dense layer to get final predictions
        predictions = dense_layer(outputs)  # Shape: [batch_size, 1]
        
        # Compute loss
        loss = criterion(predictions, targets)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    average_loss = total_loss / len(dataloader)
    print(f"Training Loss: {average_loss:.4f}")
    return average_loss

In [18]:
def evaluate_model(model, dense_layer, dataloader, criterion, device):
    model.eval()
    dense_layer.eval()
    total_loss = 0

    # Initialize the metrics for binary classification
    precision_metric = BinaryPrecision().to(device)
    recall_metric = BinaryRecall().to(device)
    f1_metric = BinaryF1Score().to(device)
    
    with torch.no_grad():
        for text_features, audio_features, video_features, targets in dataloader:
            text_features, audio_features, video_features, targets = (
                text_features.to(device),
                audio_features.to(device),
                video_features.to(device),
                targets.to(device).squeeze()
            )

            # Pass inputs through GMU model
            outputs = model(text_features, audio_features, video_features)
            
            # Pass the GMU outputs through the dense layer to get final predictions
            predictions = dense_layer(outputs).squeeze()  
            
            # Compute loss
            loss = criterion(predictions, targets)
            total_loss += loss.item()

            # Apply threshold to get binary predictions
            preds = (predictions > 0.5).float()
            
            # Update the precision, recall, and F1 score metrics
            precision_metric.update(preds.long(), targets.long())
            recall_metric.update(preds.long(), targets.long())
            f1_metric.update(preds.long(), targets.long())

    # Compute precision, recall, and F1 score
    precision = precision_metric.compute().item()
    recall = recall_metric.compute().item()
    f1_score = f1_metric.compute().item()

    average_loss = total_loss / len(dataloader)

    print(f"Evaluation Loss: {average_loss:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1_score:.4f}")
    
    return average_loss, precision, recall, f1_score

In [19]:
def cross_validate_model(text_dim, audio_dim, video_dim, output_dim, model_class,  dense_layer_class, dataset, criterion, optimizer_class, device, n_splits, collate_fn):
    kf = KFold(n_splits=n_splits, shuffle=True)
    
    total_loss = 0
    total_precision = 0
    total_recall = 0
    total_f1 = 0

    for fold, (train_index, val_index) in enumerate(kf.split(dataset), 1):
        print("-"*50)
        print(f"Fold {fold}/{n_splits}")

        # Create subsets for training and validation
        train_subset = Subset(dataset, train_index)
        val_subset = Subset(dataset, val_index)
        
        # DataLoaders with batch size 8 and collate function
        train_loader = DataLoader(train_subset, batch_size=8, shuffle=True, num_workers=0, collate_fn=collate_fn)
        val_loader = DataLoader(val_subset, batch_size=8, shuffle=False, num_workers=0, collate_fn=collate_fn)
        
        # Initialize the model and dense layer for the current fold
        model = model_class(text_dim=text_dim, audio_dim=audio_dim, video_dim=video_dim, output_dim=output_dim).to(device)
        dense_layer = dense_layer_class(input_size=output_dim).to(device)
        
        # Combine parameters of GMU model and DenseLayer for the optimizer
        optimizer = optimizer_class(list(model.parameters()) + list(dense_layer.parameters()))
        
        print(f"Training model for fold {fold}")
        train_loss = train_model(model, dense_layer, train_loader, criterion, optimizer, device)
        
        print(f"Evaluating model for fold {fold}")
        val_loss, precision, recall, f1_score = evaluate_model(model, dense_layer, val_loader, criterion, device)
        
        total_loss += val_loss
        total_precision += precision
        total_recall += recall
        total_f1 += f1_score
    
    average_cv_loss = total_loss / n_splits
    average_cv_precision = total_precision / n_splits
    average_cv_recall = total_recall / n_splits
    average_cv_f1 = total_f1 / n_splits
    
    print(f"Average Cross-Validation Loss: {average_cv_loss:.4f}")
    print(f"Average Cross-Validation Precision: {average_cv_precision:.4f}")
    print(f"Average Cross-Validation Recall: {average_cv_recall:.4f}")
    print(f"Average Cross-Validation F1 Score: {average_cv_f1:.4f}")
    
    return average_cv_loss, average_cv_precision, average_cv_recall, average_cv_f1


### Test Run

In [20]:
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the dimensions
text_dim = 1024  
audio_dim = 768 
video_dim = 768  
output_dim = 512 

# Cross-validation
average_cv_loss = cross_validate_model(
    text_dim=text_dim,
    audio_dim=audio_dim,
    video_dim=video_dim,
    output_dim=output_dim,
    model_class=GatedMultimodalUnit,
    dense_layer_class=DenseLayer,
    dataset=full_dataset,  # Use your complete dataset for cross-validation
    criterion=BCELoss(),
    optimizer_class=get_optimizer,  # Pass optimizer class, not the instantiated optimizer
    device=device,
    n_splits=50,
    collate_fn=collate_fn
)


--------------------------------------------------
Fold 1/50
Training model for fold 1
Training Loss: 0.4277
Evaluating model for fold 1
Evaluation Loss: 0.3460
Precision: 0.8182
Recall: 0.9000
F1 Score: 0.8571
--------------------------------------------------
Fold 2/50
Training model for fold 2
Training Loss: 0.4470
Evaluating model for fold 2
Evaluation Loss: 0.2563
Precision: 0.8889
Recall: 0.8889
F1 Score: 0.8889
--------------------------------------------------
Fold 3/50
Training model for fold 3
Training Loss: 0.4719
Evaluating model for fold 3
Evaluation Loss: 0.3358
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
--------------------------------------------------
Fold 4/50
Training model for fold 4
Training Loss: 0.4669
Evaluating model for fold 4
Evaluation Loss: 0.3792
Precision: 0.8000
Recall: 0.5714
F1 Score: 0.6667
--------------------------------------------------
Fold 5/50
Training model for fold 5
Training Loss: 0.4713
Evaluating model for fold 5
Evaluation Loss: 0.