In [10]:
import sys, os
sys.path.append(os.path.abspath(os.path.join('..')))
from processing.preprocessing import prepare_graph_data, prepare_dataloaders
from models.MultiHeadAttention import MultiheadAttention
from models.CrossModalNet import CrossmodalNet
from models.Transformers import TransformerEncoder
from models.CombineModel import CombinedModel
from models.GraphModel import GraphModel, GraphModel_CFusion
from trainning.trainning import train_model as train
from trainning.trainning import evaluate_model as evaluate
import torch
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader
from torch_geometric.nn import GATConv, RGCNConv
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix


from data.loaddata import load_data
import torch

data = torch.load('../outputs/embeddings/loaders_datasets.pt')


In [11]:
data

{'train': {'audio': <torch.utils.data.dataset.TensorDataset at 0x237511b46a0>,
  'text': <torch.utils.data.dataset.TensorDataset at 0x237511ebd60>,
  'video': <torch.utils.data.dataset.TensorDataset at 0x237511ebfd0>,
  'labels': tensor([0, 0, 0,  ..., 6, 0, 1])},
 'val': {'audio': <torch.utils.data.dataset.TensorDataset at 0x237a79c2670>,
  'text': <torch.utils.data.dataset.TensorDataset at 0x237a79c2550>,
  'video': <torch.utils.data.dataset.TensorDataset at 0x237a79c2130>,
  'labels': tensor([4, 6, 0,  ..., 4, 4, 4])},
 'test': {'audio': <torch.utils.data.dataset.TensorDataset at 0x237a79c2610>,
  'text': <torch.utils.data.dataset.TensorDataset at 0x237a79c2310>,
  'video': <torch.utils.data.dataset.TensorDataset at 0x23751218d00>,
  'labels': tensor([6, 2, 0,  ..., 0, 0, 0])}}

In [24]:
class DataPreparation:
    def __init__(self, data, args):
        self.data = data
        self.args = args

    def prepare_graph_data(self, dataset_type):
        """
        Prepare graph data for the model.
        Args:
            dataset_type (str): 'train', 'val', or 'test'.
        Returns:
            features, labels, lengths
        """
        dataset = self.data[dataset_type]
        video_features = torch.mean(dataset['video'].tensors[0], dim=1)  # Average over time dimension for video
        features = torch.cat([
            dataset['audio'].tensors[0],
            dataset['text'].tensors[0],
            video_features
        ], dim=0)

        labels = torch.cat([
            dataset['audio'].tensors[1],
            dataset['text'].tensors[1],
            dataset['video'].tensors[1]
        ], dim=0)

        lengths = [
            dataset['audio'].tensors[0].size(0),  # Audio samples
            dataset['text'].tensors[0].size(0),   # Text samples
            video_features.size(0)               # Video samples
        ]

        return features, labels, lengths

    def prepare_dataloaders(self, features, labels, lengths, batch_size=32, shuffle=True):
        """
        Create DataLoader for the GraphDataset.
        Args:
            features (torch.Tensor): Combined features.
            labels (torch.Tensor): Corresponding labels.
            lengths (list): Number of samples for each modality.
            batch_size (int): Batch size.
            shuffle (bool): Whether to shuffle the data.
        Returns:
            DataLoader
        """
        dataset = GraphDataset(features, labels, lengths)
        loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
        return loader

# Dataset class
class GraphDataset(Dataset):
    def __init__(self, features, labels, lengths, graph_features, edge_index):
        self.features = features
        self.labels = labels
        self.lengths = lengths
        self.graph_features = graph_features
        self.edge_index = edge_index

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        audio_start = 0
        text_start = self.lengths[0]
        video_start = text_start + self.lengths[1]

        return {
            "audio": self.features[idx, audio_start:text_start],
            "text": self.features[idx, text_start:video_start],
            "video": self.features[idx, video_start:],
            "graph_features": self.graph_features[idx],
            "edge_index": self.edge_index,
            "labels": self.labels[idx]
        }


#---------------------------------------------------------------Prepare the data---------------------------------------------------------------
args = {
    'modalities': ['audio', 'text', 'video'],
    'edge_type': ['temp', 'multi'],
    'wp': 2,
    'wf': 2,
    'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu')
}
data_preparation = DataPreparation(data, args)

# Prepare data
train_features, train_labels, train_lengths = data_preparation.prepare_graph_data('train')
val_features, val_labels, val_lengths = data_preparation.prepare_graph_data('val')
test_features, test_labels, test_lengths = data_preparation.prepare_graph_data('test')

# Create DataLoaders
train_loader = data_preparation.prepare_dataloaders(train_features, train_labels, train_lengths, batch_size=32, shuffle=True)
val_loader = data_preparation.prepare_dataloaders(val_features, val_labels, val_lengths, batch_size=32, shuffle=False)
test_loader = data_preparation.prepare_dataloaders(test_features, test_labels, test_lengths, batch_size=32, shuffle=False)

In [33]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.nn import GATConv, RGCNConv
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

# Define models for all frameworks
class AudioTextVideoGraphAttention(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_heads, num_classes):
        super(AudioTextVideoGraphAttention, self).__init__()
        self.gat1 = GATConv(input_dim, hidden_dim, heads=num_heads, dropout=0.3)
        self.gat2 = GATConv(hidden_dim * num_heads, output_dim, heads=1, concat=False, dropout=0.3)
        self.fc = nn.Linear(output_dim, num_classes)

    def forward(self, x, edge_index):
        x = self.gat1(x, edge_index).relu()
        x = self.gat2(x, edge_index)
        return self.fc(x)


class GraphAttentionModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_heads):
        super(GraphAttentionModel, self).__init__()
        self.gat1 = GATConv(input_dim, hidden_dim, heads=num_heads, dropout=0.3)
        self.gat2 = GATConv(hidden_dim * num_heads, output_dim, heads=1, concat=False, dropout=0.3)

    def forward(self, x, edge_index):
        x = self.gat1(x, edge_index).relu()
        x = self.gat2(x, edge_index)
        return x


class ModalityCNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(ModalityCNN, self).__init__()
        self.conv1 = nn.Conv1d(input_dim, hidden_dim, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(hidden_dim, output_dim, kernel_size=3, padding=1)
        self.pool = nn.AdaptiveAvgPool1d(1)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = self.pool(x).squeeze(-1)  # Global pooling
        return x


class FusionLayer(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FusionLayer, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, *features):
        combined = torch.cat(features, dim=-1)  # Concatenate features
        x = torch.relu(self.fc1(combined))
        x = self.fc2(x)
        return x

class TemporalModule(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(TemporalModule, self).__init__()
        self.gru = nn.GRU(input_dim, hidden_dim, batch_first=True)

    def forward(self, x):
        _, h = self.gru(x)  # Return hidden state
        return h.squeeze(0)


class AudioTextVideoGraphAttentionCNN(nn.Module):
    def __init__(self, audio_dim, text_dim, video_dim, graph_dim, hidden_dim, output_dim, num_heads, num_classes):
        super(AudioTextVideoGraphAttentionCNN, self).__init__()
        self.audio_cnn = ModalityCNN(audio_dim, hidden_dim, hidden_dim)
        self.text_cnn = ModalityCNN(text_dim, hidden_dim, hidden_dim)
        self.video_cnn = ModalityCNN(video_dim, hidden_dim, hidden_dim)

        self.graph_attention = GraphAttentionModel(graph_dim, hidden_dim, hidden_dim, num_heads)

        self.fusion = FusionLayer(hidden_dim * 4, hidden_dim, output_dim)
        self.fc = nn.Linear(output_dim, num_classes)

    def forward(self, audio, text, video, graph_features, edge_index):
        audio_feat = self.audio_cnn(audio)
        text_feat = self.text_cnn(text)
        video_feat = self.video_cnn(video)

        graph_feat = self.graph_attention(graph_features, edge_index)
        combined_features = self.fusion(audio_feat, text_feat, video_feat, graph_feat)

        return self.fc(combined_features)

class EmotionRelationAwareGNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_relations):
        super(EmotionRelationAwareGNN, self).__init__()
        self.rgcn = RGCNConv(input_dim, hidden_dim, num_relations)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, edge_index, edge_type):
        x = self.rgcn(x, edge_index, edge_type)
        x = torch.relu(x)
        return self.fc(x)

class HierarchicalCrossModalGNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_heads, num_classes):
        super(HierarchicalCrossModalGNN, self).__init__()
        self.level1_gnn = GraphAttentionModel(input_dim, hidden_dim, hidden_dim, num_heads)
        self.level2_gnn = GraphAttentionModel(hidden_dim, hidden_dim, output_dim, num_heads)
        self.fc = nn.Linear(output_dim, num_classes)

    def forward(self, x, edge_index):
        x = self.level1_gnn(x, edge_index)
        x = self.level2_gnn(x, edge_index)
        return self.fc(x)


class CNNGraphAttention(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_heads, num_classes):
        super(CNNGraphAttention, self).__init__()
        self.conv1 = nn.Conv1d(input_dim, hidden_dim, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(hidden_dim, output_dim, kernel_size=3, padding=1)
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.gat = GATConv(output_dim, output_dim, heads=num_heads, dropout=0.3)
        self.fc = nn.Linear(output_dim, num_classes)

    def forward(self, x, edge_index):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = self.pool(x).squeeze(-1)  # Pool across sequence length
        x = self.gat(x, edge_index).relu()
        return self.fc(x)

class EmotionRelationAwareGNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_relations, num_classes):
        super(EmotionRelationAwareGNN, self).__init__()
        self.rgcn = RGCNConv(input_dim, hidden_dim, num_relations=num_relations)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x, edge_index, edge_type):
        x = self.rgcn(x, edge_index, edge_type).relu()
        return self.fc(x)

class HierarchicalCrossModalGAT(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_heads, num_classes):
        super(HierarchicalCrossModalGAT, self).__init__()
        self.gat1 = GATConv(input_dim, hidden_dim, heads=num_heads, dropout=0.3)
        self.gat2 = GATConv(hidden_dim * num_heads, output_dim, heads=1, concat=False, dropout=0.3)
        self.fc = nn.Linear(output_dim, num_classes)

    def forward(self, x, edge_index):
        x = self.gat1(x, edge_index).relu()
        x = self.gat2(x, edge_index)
        return self.fc(x)

class DilatedEmotionalPropagation(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_classes):
        super(DilatedEmotionalPropagation, self).__init__()
        self.dilated_conv = nn.Conv1d(input_dim, hidden_dim, kernel_size=3, dilation=2, padding=2)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        x = torch.relu(self.dilated_conv(x))
        x = x.mean(dim=-1)  # Global average pooling
        return self.fc(x)

class ContextAwareGraphAttention(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_heads, num_classes):
        super(ContextAwareGraphAttention, self).__init__()
        self.gat1 = GATConv(input_dim, hidden_dim, heads=num_heads, dropout=0.3)
        self.context_fc = nn.Linear(hidden_dim * num_heads, output_dim)
        self.fc = nn.Linear(output_dim, num_classes)

    def forward(self, x, edge_index):
        x = self.gat1(x, edge_index).relu()
        x = self.context_fc(x)
        return self.fc(x)

class AdaptiveLateFusion(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes):
        super(AdaptiveLateFusion, self).__init__()
        self.audio_fc = nn.Linear(input_dim, hidden_dim)
        self.text_fc = nn.Linear(input_dim, hidden_dim)
        self.video_fc = nn.Linear(input_dim, hidden_dim)
        self.fusion_fc = nn.Linear(hidden_dim * 3, num_classes)

    def forward(self, audio, text, video):
        audio_feat = torch.relu(self.audio_fc(audio))
        text_feat = torch.relu(self.text_fc(text))
        video_feat = torch.relu(self.video_fc(video))
        fused_feat = torch.cat([audio_feat, text_feat, video_feat], dim=-1)
        return self.fusion_fc(fused_feat)

class EmotionGraphTemporalCNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_classes):
        super(EmotionGraphTemporalCNN, self).__init__()
        self.temporal_conv = nn.Conv1d(input_dim, hidden_dim, kernel_size=3, padding=1)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        x = torch.relu(self.temporal_conv(x))
        x = x.mean(dim=-1)  # Global average pooling
        return self.fc(x)

class HierarchicalAttentionFusion(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_classes):
        super(HierarchicalAttentionFusion, self).__init__()
        self.attention_fc1 = nn.Linear(input_dim, hidden_dim)
        self.attention_fc2 = nn.Linear(hidden_dim, output_dim)
        self.fc = nn.Linear(output_dim, num_classes)

    def forward(self, x):
        x = torch.relu(self.attention_fc1(x))
        x = torch.relu(self.attention_fc2(x))
        return self.fc(x)


In [67]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch_geometric.nn import GATConv, RGCNConv
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
import numpy as np

# Graph preparation class

# Graph preparation class
class GraphPreparation:
    def __init__(self, args):
        self.args = args
        self.edge_type_to_idx = {}
        self.n_modals = len(args['modalities'])
        self.wp = args['wp']
        self.wf = args['wf']
        self.edge_temp = "temp" in args['edge_type']
        self.edge_multi = "multi" in args['edge_type']

        self._initialize_edge_type_mapping()
        
    def _initialize_edge_type_mapping(self):
        if self.edge_temp:
            temporal = [-1, 1, 0]
            for j in temporal:
                for k in range(self.n_modals):
                    for l in range(self.n_modals):  # Include all cross-modality connections
                        self.edge_type_to_idx[f"{j}{k}{l}"] = len(self.edge_type_to_idx)
        else:
            for j in range(self.n_modals):
                for k in range(self.n_modals):
                    self.edge_type_to_idx[f"0{j}{k}"] = len(self.edge_type_to_idx)

        # Debug: Log the initialized edge types
        print("Edge Type Mapping Initialized:", self.edge_type_to_idx)


    def compute_graph_features(self, data):
        video_features = torch.mean(data['video'].tensors[0], dim=1)
        node_features = torch.cat([
            data['audio'].tensors[0],
            data['text'].tensors[0],
            video_features
        ], dim=0)

        labels = torch.cat([
            data['audio'].tensors[1],
            data['text'].tensors[1],
            data['video'].tensors[1]
        ], dim=0)

        lengths = [
            data['audio'].tensors[0].size(0),
            data['text'].tensors[0].size(0),
            video_features.size(0)
        ]

        edge_index, edge_type = self._compute_edges(lengths, node_features.size(0))

        return node_features, labels, edge_index, edge_type, lengths

    def _compute_edges(self, lengths, total_nodes):
        edge_index = []
        edge_type = []
        sum_length = 0

        for j in range(len(lengths)):
            cur_len = lengths[j]
            perms = self._compute_edge_perms(cur_len, total_nodes)

            for src, dst in perms:
                edge_index.append([src + sum_length, dst + sum_length])

                # Ensure temporal_type is within defined keys
                if src % total_nodes > dst % total_nodes:
                    temporal_type = 1
                elif src % total_nodes < dst % total_nodes:
                    temporal_type = -1
                else:
                    temporal_type = 0

                # Generate the key based on modality indices
                src_mod = (src + sum_length) % self.n_modals
                dst_mod = (dst + sum_length) % self.n_modals
                key = f"{temporal_type}{src_mod}{dst_mod}"

                if key not in self.edge_type_to_idx:
                    print(f"Invalid key: {key}, src: {src}, dst: {dst}, total_nodes: {total_nodes}")
                    print(f"Available keys: {self.edge_type_to_idx}")
                    raise KeyError(f"Key {key} not found in edge_type_to_idx. Verify edge_type initialization.")

                edge_type.append(self.edge_type_to_idx[key])

            sum_length += cur_len

        edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
        edge_type = torch.tensor(edge_type, dtype=torch.long)

        return edge_index, edge_type



    def _compute_edge_perms(self, length, total_nodes):
        all_perms = set()
        array = np.arange(length)

        for j in range(length):
            if self.wp == -1 and self.wf == -1:
                eff_array = array
            elif self.wp == -1:
                eff_array = array[: min(length, j + self.wf)]
            elif self.wf == -1:
                eff_array = array[max(0, j - self.wp) :]
            else:
                eff_array = array[max(0, j - self.wp) : min(length, j + self.wf)]

            for k in range(self.n_modals):
                node_index = j + k * total_nodes
                if self.edge_temp:
                    for item in eff_array:
                        all_perms.add((node_index, item + k * total_nodes))
                else:
                    all_perms.add((node_index, node_index))
                if self.edge_multi:
                    for l in range(self.n_modals):
                        if l != k:
                            all_perms.add((node_index, j + l * total_nodes))

        perms = [(src, dst) for src, dst in all_perms if 0 <= src < total_nodes and 0 <= dst < total_nodes]
        print(f"Computed permutations: {perms}")
        return perms
# Dataset class
class GraphDataset(Dataset):
    def __init__(self, node_features, labels, edge_index, edge_type, lengths):
        self.node_features = node_features
        self.labels = labels
        self.edge_index = edge_index
        self.edge_type = edge_type
        self.lengths = lengths


    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {
            "node_features": self.node_features[idx],
            "edge_index": self.edge_index, 
            "edge_type": self.edge_type,   
            "labels": self.labels[idx],
            # audio, text, video
            "audio": self.node_features[idx, :self.lengths[0]],
            "text": self.node_features[idx, self.lengths[0]:self.lengths[1]],
            "video": self.node_features[idx, self.lengths[1]:]
        }

# Prepare DataLoader

def prepare_dataloader(node_features, labels, edge_index, edge_type, lengths ,batch_size=32, shuffle=True):
    dataset = GraphDataset(node_features, labels, edge_index, edge_type, lengths)
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)


In [70]:
def train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs):
    best_val_loss = float('inf')
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for batch in tqdm(train_loader, desc=f"Epoch {epoch + 1} Training"):
            # audio, text, video, graph_features, edge_index, labels = (
            #     batch['audio'].to(device),
            #     batch['text'].to(device),
            #     batch['video'].to(device),
            #     batch['graph_features'].to(device),
            #     batch['edge_index'].to(device),
            #     batch['labels'].to(device)
            # )

            batch = {key: value.to(device) for key, value in batch.items()}
            labels = batch.pop("labels")
            optimizer.zero_grad()

            if isinstance(model, AudioTextVideoGraphAttentionCNN):
                outputs = model(batch["audio"], batch["text"], batch["video"], batch["node_features"], batch["edge_index"])
            elif isinstance(model, AudioTextVideoGraphAttention):
                outputs = model(batch["node_features"], batch["edge_index"])
            elif isinstance(model, GraphAttentionModel):
                outputs = model(batch["node_features"], batch["edge_index"])
            elif isinstance(model, ModalityCNN):
                outputs = model(batch["audio"])
            elif isinstance(model, FusionLayer):
                outputs = model(batch["audio"], batch["text"], batch["video"])
            else:
                raise ValueError(f"Unsupported model type: {type(model)}")


            # outputs = model(audio, text, video, graph_features, edge_index) 
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        val_loss = validate_model(model, val_loader, criterion, device)
        scheduler.step(val_loss)

        print(f"Epoch {epoch + 1}, Train Loss: {train_loss / len(train_loader):.4f}, Val Loss: {val_loss / len(val_loader):.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), f"best_model_{model.__class__.__name__}.pth")

def validate_model(model, val_loader, criterion, device):
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch in val_loader:
            batch = {key: value.to(device) for key, value in batch.items()}
            labels = batch.pop("labels")

            if isinstance(model, AudioTextVideoGraphAttentionCNN):
                outputs = model(batch["audio"], batch["text"], batch["video"], batch["node_features"], batch["edge_index"])
            elif isinstance(model, AudioTextVideoGraphAttention):
                outputs = model(batch["node_features"], batch["edge_index"])
            elif isinstance(model, GraphAttentionModel):
                outputs = model(batch["node_features"], batch["edge_index"])
            elif isinstance(model, ModalityCNN):
                outputs = model(batch["audio"])
            elif isinstance(model, FusionLayer):
                outputs = model(batch["audio"], batch["text"], batch["video"])
            else:
                raise ValueError(f"Unsupported model type: {type(model)}")

            val_loss += criterion(outputs, labels).item()

    return val_loss / len(val_loader)


def test_model(model, test_loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Testing"):
            batch = {key: value.to(device) for key, value in batch.items()}
            labels = batch.pop("labels")

            if isinstance(model, AudioTextVideoGraphAttentionCNN):
                outputs = model(batch["audio"], batch["text"], batch["video"], batch["node_features"], batch["edge_index"])
            elif isinstance(model, AudioTextVideoGraphAttention):
                outputs = model(batch["node_features"], batch["edge_index"])
            elif isinstance(model, GraphAttentionModel):
                outputs = model(batch["node_features"], batch["edge_index"])
            elif isinstance(model, ModalityCNN):
                outputs = model(batch["audio"])
            elif isinstance(model, FusionLayer):
                outputs = model(batch["audio"], batch["text"], batch["video"])
            else:
                raise ValueError(f"Unsupported model type: {type(model)}")

            preds = outputs.argmax(dim=1)
            all_preds.append(preds.cpu())
            all_labels.append(labels.cpu())

    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)

    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='macro')
    report = classification_report(all_labels, all_preds, output_dict=True)
    confusion = confusion_matrix(all_labels, all_preds)

    print(f"Test Accuracy: {accuracy * 100:.2f}%")
    print(f"Test F1 Score: {f1 * 100:.2f}%")
    print("Classification Report:\n", classification_report(all_labels, all_preds))
    print("Confusion Matrix:\n", confusion)

    return {
        'accuracy': accuracy,
        'f1_score': f1,
        'classification_report': report,
        'confusion_matrix': confusion
    }



def initialize_models():
    return {
        "AudioTextVideoGraphAttention": AudioTextVideoGraphAttention(768, 128, 64, 4, 7),
        "CNNGraphAttention": CNNGraphAttention(768, 128, 64, 4, 7),
        "EmotionRelationAwareGNN": EmotionRelationAwareGNN(768, 128, 64, 3, 7),
        "HierarchicalCrossModalGNN": HierarchicalCrossModalGNN(768, 128, 64, 4, 7),
        "DilatedEmotionalPropagation": DilatedEmotionalPropagation(768, 128, 64, 7),
        "ContextAwareGraphAttention": ContextAwareGraphAttention(768, 128, 64, 4, 7),
        "AdaptiveLateFusion": AdaptiveLateFusion(768, 128, 7),
        "EmotionGraphTemporalCNN": EmotionGraphTemporalCNN(768, 128, 64, 7),
        "HierarchicalAttentionFusion": HierarchicalAttentionFusion(768, 128, 64, 7)
    }

def pipeline(models, train_loader, val_loader, test_loader, device, epochs=10):
    criterion = nn.CrossEntropyLoss()

    for model_name, model in models.items():
        print(f"\nTraining and Testing {model_name}...")
        model.to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)

        # Train the model
        train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs)

        # Test the model
        test_results = test_model(model, test_loader, device)

        # Save results
        with open(f"{model_name}_results.txt", "w") as f:
            f.write(f"Accuracy: {test_results['accuracy'] * 100:.2f}%\n")
            f.write(f"F1 Score: {test_results['f1_score'] * 100:.2f}%\n")
            f.write(f"Confusion Matrix:\n{test_results['confusion_matrix']}\n")
            f.write("Classification Report:\n")
            for class_label, metrics in test_results['classification_report'].items():
                f.write(f"{class_label}: {metrics}\n")



In [71]:
args = {
    'modalities': ['audio', 'text', 'video'],
    'edge_type': ['temp', 'multi'],
    'wp': 2,
    'wf': 2,
    'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu')
}

# Load data and initialize GraphPreparation
graph_prep = GraphPreparation(args)

# Prepare graph data for train, val, and test
train_node_features, train_labels, train_edge_index, train_edge_type, train_lengths = graph_prep.compute_graph_features(data['train'])
val_node_features, val_labels, val_edge_index, val_edge_type,val_lengths = graph_prep.compute_graph_features(data['val'])
test_node_features, test_labels, test_edge_index, test_edge_type, test_lengths = graph_prep.compute_graph_features(data['test'])

# Create DataLoaders
train_loader = prepare_dataloader(train_node_features, train_labels, train_edge_index, train_edge_type, train_lengths, batch_size=32, shuffle=True)
val_loader = prepare_dataloader(val_node_features, val_labels, val_edge_index, val_edge_type, val_lengths, batch_size=32, shuffle=False)
test_loader = prepare_dataloader(test_node_features, test_labels, test_edge_index, test_edge_type, test_lengths, batch_size=32, shuffle=False)

print("Data preparation completed.")



Edge Type Mapping Initialized: {'-100': 0, '-101': 1, '-102': 2, '-110': 3, '-111': 4, '-112': 5, '-120': 6, '-121': 7, '-122': 8, '100': 9, '101': 10, '102': 11, '110': 12, '111': 13, '112': 14, '120': 15, '121': 16, '122': 17, '000': 18, '001': 19, '002': 20, '010': 21, '011': 22, '012': 23, '020': 24, '021': 25, '022': 26}
Computed permutations: [(5859, 5860), (8625, 8624), (5871, 5870), (9762, 9763), (9522, 9522), (3652, 3652), (1764, 1763), (7303, 7304), (9534, 9532), (7315, 7314), (5427, 5425), (442, 443), (2673, 2671), (3208, 3207), (454, 453), (9330, 9328), (5993, 5994), (4105, 4105), (1886, 1887), (4117, 4115), (8008, 8008), (1898, 1897), (10, 8), (5789, 5790), (4810, 4809), (8701, 8702), (2591, 2591), (8713, 8712), (8473, 8471), (6254, 6253), (1612, 1610), (4932, 4933), (9166, 9165), (3296, 3295), (3056, 3054), (6947, 6947), (837, 836), (4728, 4729), (6959, 6957), (5968, 5966), (5637, 5637), (3749, 3748), (9288, 9289), (1530, 1530), (9871, 9869), (7652, 7651), (9300, 9299), (

In [72]:
def pipeline(models, train_loader, val_loader, test_loader, device, epochs=10):
    criterion = nn.CrossEntropyLoss()
    for model_name, model in models.items():
        print(f"\nTraining and Testing {model_name}...")
        model.to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)

        train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs)
        test_results = test_model(model, test_loader, device)
        
        with open(f"{model_name}_results.txt", "w") as f:
            f.write(f"Accuracy: {test_results['accuracy'] * 100:.2f}%\n")
            f.write(f"F1 Score: {test_results['f1_score'] * 100:.2f}%\n")
            f.write(f"Confusion Matrix:\n{test_results['confusion_matrix']}\n")
            f.write("Classification Report:\n")
            for class_label, metrics in test_results['classification_report'].items():
                f.write(f"{class_label}: {metrics}\n")

# Run the pipeline
device = args['device']
models = initialize_models()
pipeline(models, train_loader, val_loader, test_loader, device, epochs=10)



Training and Testing AudioTextVideoGraphAttention...


Epoch 1 Training:   0%|          | 0/937 [00:00<?, ?it/s]

Epoch 1 Training:   0%|          | 0/937 [00:03<?, ?it/s]


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 32 but got size 2 for tensor number 1 in the list.