In [3]:
import pickle
import torch
import os
import sys


data_dir = os.path.join('..', "outputs", "embeddings")
with open(os.path.join(data_dir, "loaders_datasets.pkl"), 'rb') as f:
    data = pickle.load(f)

In [4]:
for split in data.keys():
    for modal in ['audio', 'text', 'video']:
        modal_tensors = data[split][modal].tensors
        data[split][modal] = {
            'features': modal_tensors[0], 
            'labels': modal_tensors[1]   
        }

    if 'text' in data[split]:
        text_features = data[split]['text']['features']
        text_len_tensor = torch.sum((text_features != 0).long(), dim=1) 
        data[split]['text']['text_len_tensor'] = text_len_tensor


for split in data.keys():
    print(f"{split} audio features shape: {data[split]['audio']['features'].shape}")
    print(f"{split} text features shape: {data[split]['text']['features'].shape}")
    print(f"{split} video features shape: {data[split]['video']['features'].shape}")
    print(f"{split} text length tensor shape: {data[split]['text']['text_len_tensor'].shape}")


train audio features shape: torch.Size([9988, 768])
train text features shape: torch.Size([9988, 768])
train video features shape: torch.Size([9988, 16, 768])
train text length tensor shape: torch.Size([9988])
val audio features shape: torch.Size([1108, 768])
val text features shape: torch.Size([1108, 768])
val video features shape: torch.Size([1108, 16, 768])
val text length tensor shape: torch.Size([1108])
test audio features shape: torch.Size([2610, 768])
test text features shape: torch.Size([2610, 768])
test video features shape: torch.Size([2610, 16, 768])
test text length tensor shape: torch.Size([2610])


In [35]:
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, GATConv, RGCNConv
from sklearn.metrics import accuracy_score, f1_score
import matplotlib.pyplot as plt

# -------- Préparer les données --------
def prepare_graph_data(data, split):
    # Moyennage sur la dimension temporelle pour la modalité vidéo
    video_features = torch.mean(data[split]['video']['features'], dim=1)  # Moyenne sur la dimension temporelle
    
    # Concaténer les features
    features = torch.cat([
        data[split]['audio']['features'],
        data[split]['text']['features'],
        video_features
    ], dim=0)
    
    # Concaténer les labels
    labels = torch.cat([
        data[split]['audio']['labels'],
        data[split]['text']['labels'],
        data[split]['video']['labels']
    ], dim=0)
    
    # Initialiser les listes pour edge_index et edge_type
    edge_index = []
    edge_type = []

    # -------- Relations temporelles intra-modales --------
    def add_temporal_edges(offset, num_nodes, relation_type):
        for i in range(num_nodes):
            if i > 0:
                edge_index.append([offset + i, offset + i - 1])  # -1 (passé)
                edge_type.append(relation_type)
            edge_index.append([offset + i, offset + i])  # 0 (présent)
            edge_type.append(relation_type + 1)
            if i < num_nodes - 1:
                edge_index.append([offset + i, offset + i + 1])  # +1 (futur)
                edge_type.append(relation_type + 2)

    # Ajouter les relations intra-modales
    num_audio = data[split]['audio']['features'].size(0)
    num_text = data[split]['text']['features'].size(0)
    num_video = video_features.size(0)  # Taille après réduction temporelle

    add_temporal_edges(0, num_audio, 0)  # Audio : relation types 0, 1, 2
    add_temporal_edges(num_audio, num_text, 3)  # Text : relation types 3, 4, 5
    add_temporal_edges(num_audio + num_text, num_video, 6)  # Video : relation types 6, 7, 8

    # -------- Relations cross-modales --------
    for i in range(min(num_audio, num_text, num_video)):
        edge_index.append([i, num_audio + num_text + i])  # Audio -> Video
        edge_type.append(9)
        edge_index.append([num_audio + i, num_audio + num_text + i])  # Text -> Video
        edge_type.append(10)
        edge_index.append([i, num_audio + i])  # Audio -> Text
        edge_type.append(11)

    # Convertir en tenseurs
    edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
    edge_type = torch.tensor(edge_type, dtype=torch.long)

    print("Unique edge types:", torch.unique(edge_type))  # Debugging
    return Data(x=features, y=labels, edge_index=edge_index, edge_type=edge_type)

# -------- Définir les Modèles --------
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, dropout=0.0):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)
        self.dropout = dropout

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

class GAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=4):
        super(GAT, self).__init__()
        self.conv1 = GATConv(in_channels, hidden_channels, heads=heads)
        self.conv2 = GATConv(hidden_channels * heads, out_channels, heads=1)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

class RGCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_relations):
        super(RGCN, self).__init__()
        self.conv1 = RGCNConv(in_channels, hidden_channels, num_relations)
        self.conv2 = RGCNConv(hidden_channels, out_channels, num_relations)

    def forward(self, x, edge_index, edge_type):
        x = self.conv1(x, edge_index, edge_type)
        x = F.relu(x)
        x = self.conv2(x, edge_index, edge_type)
        return F.log_softmax(x, dim=1)

# -------- Entraîner et Tester --------
def train_and_evaluate(model, train_data, val_data, test_data, optimizer, criterion, epochs=100):
    for epoch in range(epochs):
        # Entraînement
        model.train()
        optimizer.zero_grad()
        if isinstance(model, RGCN):
            out = model(train_data.x, train_data.edge_index, train_data.edge_type)
        
        else:
            out = model(train_data.x, train_data.edge_index)

        loss = criterion(out, train_data.y)
        loss.backward()
        optimizer.step()

        # Validation
        model.eval()
        with torch.no_grad():
            if isinstance(model, RGCN):
                val_out = model(val_data.x, val_data.edge_index, val_data.edge_type)
            else:
                val_out = model(val_data.x, val_data.edge_index)
            val_loss = criterion(val_out, val_data.y)
        print(f"Epoch {epoch + 1}, Train Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}")

    # Test
    model.eval()
    with torch.no_grad():
        if isinstance(model, RGCN):
            test_out = model(test_data.x, test_data.edge_index, test_data.edge_type)
        else:
            test_out = model(test_data.x, test_data.edge_index)
        test_pred = test_out.argmax(dim=1)
        test_acc = accuracy_score(test_data.y.cpu(), test_pred.cpu())
        test_f1 = f1_score(test_data.y.cpu(), test_pred.cpu(), average='macro')

    return test_acc, test_f1

# -------- Pipeline Principal --------
def main_pipeline(data):
    # Préparer les données
    train_data = prepare_graph_data(data, 'train')
    val_data = prepare_graph_data(data, 'val')
    test_data = prepare_graph_data(data, 'test')

    # Configurations de modèles
    architectures = [
        ("GCN (2 layers)", GCN(train_data.x.size(1), 64, len(torch.unique(train_data.y)))),
        ("GAT (4 heads)", GAT(train_data.x.size(1), 64, len(torch.unique(train_data.y)), heads=4)),
        ("RGCN", RGCN(train_data.x.size(1), 64, len(torch.unique(train_data.y)), num_relations=12)),
    ]

    # Entraîner et évaluer chaque modèle
    results = []
    for name, model in architectures:
        print(f"Training {name}...")
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
        criterion = torch.nn.CrossEntropyLoss()
        acc, f1 = train_and_evaluate(model, train_data, val_data, test_data, optimizer, criterion)
        results.append((name, acc * 100, f1 * 100))

    # Afficher les résultats
    print("\nBenchmark Results:")
    print("Model\t\t\tTest Accuracy\tTest F1-Score")
    for name, acc, f1 in results:
        print(f"{name:20}\t{acc:.2f}\t\t{f1:.2f}")



In [None]:
main_pipeline(data)

## **Improvement**

**1. Normalisation des Données**

In [74]:
from sklearn.preprocessing import StandardScaler

def prepare_graph_data(data, split, graph_augmentation=False):
    # Normaliser les données
    scaler_audio = StandardScaler()
    scaler_text = StandardScaler()
    scaler_video = StandardScaler()

    data[split]['audio']['features'] = torch.tensor(
        scaler_audio.fit_transform(data[split]['audio']['features']),
        dtype=torch.float
    )

    data[split]['text']['features'] = torch.tensor(
        scaler_text.fit_transform(data[split]['text']['features']),
        dtype=torch.float
    )

    video_shape = data[split]['video']['features'].shape
    data[split]['video']['features'] = torch.tensor(
        scaler_video.fit_transform(data[split]['video']['features'].reshape(-1, video_shape[-1])),
        dtype=torch.float
    ).reshape(video_shape)

    # Moyenne sur la dimension temporelle pour la modalité vidéo
    video_features = torch.mean(data[split]['video']['features'], dim=1)  # Moyenne sur la dimension temporelle
    
    # Concaténer les features
    features = torch.cat([
        data[split]['audio']['features'],
        data[split]['text']['features'],
        video_features
    ], dim=0)
    
    # Concaténer les labels
    labels = torch.cat([
        data[split]['audio']['labels'],
        data[split]['text']['labels'],
        data[split]['video']['labels']
    ], dim=0)
    
    # Initialiser les listes pour edge_index et edge_type
    edge_index = []
    edge_type = []

    # -------- Relations temporelles intra-modales --------
    def add_temporal_edges(offset, num_nodes, relation_type):
        for i in range(num_nodes):
            if i > 1:
                edge_index.append([offset + i, offset + i - 2])  # Passé lointain
                edge_type.append(relation_type + 3)
            if i < num_nodes - 2:
                edge_index.append([offset + i, offset + i + 2])  # Futur lointain
                edge_type.append(relation_type + 4)

            # Passé proche, présent, futur proche
            if i > 0:
                edge_index.append([offset + i, offset + i - 1])
                edge_type.append(relation_type)
            edge_index.append([offset + i, offset + i])
            edge_type.append(relation_type + 1)
            if i < num_nodes - 1:
                edge_index.append([offset + i, offset + i + 1])
                edge_type.append(relation_type + 2)


    # Ajouter les relations intra-modales
    num_audio = data[split]['audio']['features'].size(0)
    num_text = data[split]['text']['features'].size(0)
    num_video = video_features.size(0)  # Taille après réduction temporelle

    add_temporal_edges(0, num_audio, 0)  # Audio
    add_temporal_edges(num_audio, num_text, 5)  # Text
    add_temporal_edges(num_audio + num_text, num_video, 10)  # Video

    # Ajouter les relations cross-modales complexes
    for i in range(min(num_audio, num_text, num_video)):
        edge_index.append([i, num_audio + num_text + i])  # Audio -> Video
        edge_type.append(15)
        edge_index.append([num_audio + i, num_audio + num_text + i])  # Text -> Video
        edge_type.append(16)
        edge_index.append([i, num_audio + i])  # Audio -> Text
        edge_type.append(17)

    # Graph augmentation : ajouter des arêtes aléatoires si activé
    if graph_augmentation:
        for _ in range(100):  # Par exemple, 100 arêtes aléatoires
            src, dst = torch.randint(0, features.size(0), (2,))
            edge_index.append([src.item(), dst.item()])
            edge_type.append(18)

    # Convertir en tenseurs
    edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
    edge_type = torch.tensor(edge_type, dtype=torch.long)

    print("Unique edge types:", torch.unique(edge_type))  # Debugging
    return Data(x=features, y=labels, edge_index=edge_index, edge_type=edge_type)


b. Equilibrage des classes

In [75]:
from torch.nn import CrossEntropyLoss

def compute_class_weights(labels):
    unique_labels, counts = torch.unique(labels, return_counts=True)
    weights = 1.0 / counts.float()
    weights = weights / weights.sum()
    return weights

c. Augmentation des données


In [76]:
# graph_augmentation = True
# train_data = prepare_graph_data(data, 'train', graph_augmentation=graph_augmentation)


**2. Architecture des modèles** (Ajout de Couches Supplémentaires)

In [102]:
import torch.nn as nn

class DeepGCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers=3, dropout=0.3):
        super(DeepGCN, self).__init__()
        self.layers = torch.nn.ModuleList()
        self.layers.append(GCNConv(in_channels, hidden_channels))
        self.batch_norms = torch.nn.ModuleList()
        self.batch_norms.append(torch.nn.BatchNorm1d(hidden_channels))

        for _ in range(num_layers - 1):
            self.layers.append(GCNConv(hidden_channels, hidden_channels))
            self.batch_norms.append(torch.nn.BatchNorm1d(hidden_channels))

        self.final_layer = GCNConv(hidden_channels, out_channels)
        self.dropout = dropout

    def forward(self, x, edge_index):
        for layer, batch_norm in zip(self.layers, self.batch_norms):
            x = layer(x, edge_index)
            x = batch_norm(x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.final_layer(x, edge_index)
        return F.log_softmax(x, dim=1)

#----------------------------------------------------------------------------------------------------------
class DeepGAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=4, num_layers=3, dropout=0.5):
        super(DeepGAT, self).__init__()
        self.layers = torch.nn.ModuleList()
        self.layers.append(GATConv(in_channels, hidden_channels, heads=heads))
        self.batch_norms = torch.nn.ModuleList()
        self.batch_norms.append(torch.nn.BatchNorm1d(hidden_channels * heads))
        for _ in range(num_layers - 1):
            self.layers.append(GATConv(hidden_channels * heads, hidden_channels, heads=heads))
            self.batch_norms.append(torch.nn.BatchNorm1d(hidden_channels * heads))

        self.final_layer = GATConv(hidden_channels * heads, out_channels, heads=1)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x, edge_index):
        for layer, batch_norm in zip(self.layers, self.batch_norms):
            x = layer(x, edge_index)
            x = batch_norm(x)
            x = F.elu(x)
            x = self.dropout(x)
        x = self.final_layer(x, edge_index)
        return F.log_softmax(x, dim=1)
#----------------------------------------------------------------------------------------------------------
class CombinedGCN_GAT_RGCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_relations, dropout=0.5):
        super(CombinedGCN_GAT_RGCN, self).__init__()
        self.gcn = GCNConv(in_channels, hidden_channels)
        self.gat = GATConv(hidden_channels, hidden_channels, heads=4)
        self.rgcn = RGCNConv(hidden_channels * 4, hidden_channels, num_relations)
        self.final_layer = GCNConv(hidden_channels, out_channels)
        self.dropout = nn.Dropout(p=dropout)
        
    def forward(self, x, edge_index, edge_type):
        x = self.gcn(x, edge_index)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.gat(x, edge_index)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.rgcn(x, edge_index, edge_type)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.final_layer(x, edge_index)
        return F.log_softmax(x, dim=1)
#----------------------------------------------------------------------------------------------------
from torch_geometric.nn import GATConv

class TemporalGAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_relations, heads=4, dropout=0.5):
        super(TemporalGAT, self).__init__()
        self.gat_layers = torch.nn.ModuleList()
        self.gat_layers.append(GATConv(in_channels, hidden_channels, heads=heads, add_self_loops=False))
        for _ in range(2):  # Exemple : 3 couches de GAT
            self.gat_layers.append(GATConv(hidden_channels * heads, hidden_channels, heads=heads, add_self_loops=False))
        self.final_layer = GATConv(hidden_channels * heads, out_channels, heads=1, add_self_loops=False)
        self.num_relations = num_relations
        self.dropout = torch.nn.Dropout(p=dropout)

    def forward(self, x, edge_index, edge_type=None):
        if edge_type is None:
            raise ValueError("edge_type is required for TemporalGAT")

        for gat_layer in self.gat_layers:
            x = F.dropout(x, p=self.dropout, training=self.training)  
            x = gat_layer(x, edge_index)  
        x = self.final_layer(x, edge_index)
        return F.log_softmax(x, dim=1)
#----------------------------------------------------------------------------------------------------
from torch_geometric.nn import GATv2Conv

class TemporalGATv2(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=4, dropout=0.5):
        super(TemporalGATv2, self).__init__()
        self.gat_layers = torch.nn.ModuleList()
        self.gat_layers.append(GATv2Conv(in_channels, hidden_channels, heads=heads, add_self_loops=False))
        for _ in range(2):
            self.gat_layers.append(GATv2Conv(hidden_channels * heads, hidden_channels, heads=heads, add_self_loops=False))
        self.final_layer = GATv2Conv(hidden_channels * heads, out_channels, heads=1, add_self_loops=False)
        self.dropout = dropout
        self.dropout_layer = nn.Dropout(p=dropout)  # Rename to avoid conflict

    def forward(self, x, edge_index):
        for gat_layer in self.gat_layers:
            x = F.dropout(x, p=self.dropout, training=self.training)
            x = gat_layer(x, edge_index)

        x = self.final_layer(x, edge_index)
        return F.log_softmax(x, dim=1)


5. Relations dans le graphe

a) Relations cross-modales

Introduisez plus de connexions cross-modales dans edge_index, par exemple en liant chaque nœud audio à plusieurs nœuds vidéo proches au lieu d'un seul.

b) Graph Augmentation

Ajoutez des bruits aux connexions existantes ou utilisez des méthodes comme DropEdge (supprimer des arêtes aléatoires à chaque itération) pour améliorer la robustesse.

7. Autres Modèles


a) Graph Attention Networks (GATv2)


In [103]:
class CombinedGCN_GAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(CombinedGCN_GAT, self).__init__()
        self.gcn = GCNConv(in_channels, hidden_channels)
        self.gat = GATConv(hidden_channels, hidden_channels, heads=4)
        self.final_layer = GCNConv(hidden_channels * 4, out_channels)

    def forward(self, x, edge_index):
        x = F.relu(self.gcn(x, edge_index))
        x = F.elu(self.gat(x, edge_index))
        x = self.final_layer(x, edge_index)
        return F.log_softmax(x, dim=1)


In [104]:
def train_and_evaluate(model, train_data, val_data, test_data, optimizer, criterion, epochs=50):
    weights = compute_class_weights(train_data.y)
    criterion = CrossEntropyLoss(weight=weights)
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()

        if hasattr(model, 'rgcn') or isinstance(model, (TemporalGAT, CombinedGCN_GAT_RGCN)):
            out = model(train_data.x, train_data.edge_index, train_data.edge_type)
        else:
            out = model(train_data.x, train_data.edge_index)

        loss = criterion(out, train_data.y)
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            if isinstance(model, (CombinedGCN_GAT_RGCN, TemporalGAT)):
                val_out = model(val_data.x, val_data.edge_index, val_data.edge_type)
            else:
                val_out = model(val_data.x, val_data.edge_index)
            val_loss = criterion(val_out, val_data.y)
        print(f"Epoch {epoch + 1}, Train Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}")

    # Test
    model.eval()
    with torch.no_grad():
        if isinstance(model, (TemporalGAT, CombinedGCN_GAT_RGCN)):
            test_out = model(test_data.x, test_data.edge_index, test_data.edge_type)
        else:
            test_out = model(test_data.x, test_data.edge_index)
        test_pred = test_out.argmax(dim=1)
        test_acc = accuracy_score(test_data.y.cpu(), test_pred.cpu())
        test_f1 = f1_score(test_data.y.cpu(), test_pred.cpu(), average='macro')

    return test_acc, test_f1


**Pipeline**

In [105]:
def main_pipeline(data):
    train_data = prepare_graph_data(data, 'train', graph_augmentation=False)
    val_data = prepare_graph_data(data, 'val', graph_augmentation=False)
    test_data = prepare_graph_data(data, 'test', graph_augmentation=False)

    # Configurations de modèles
    architectures = [
        # ("GCN (3 layers)", DeepGCN(train_data.x.size(1), 64, len(torch.unique(train_data.y)), num_layers=3)),
        # ("GCN (5 layers)", DeepGCN(train_data.x.size(1), 64, len(torch.unique(train_data.y)), num_layers=5)),
        # ("GAT (3 layers)", DeepGAT(train_data.x.size(1), 64, len(torch.unique(train_data.y)), num_layers=3)),
        # ("GAT (5 layers)", DeepGAT(train_data.x.size(1), 64, len(torch.unique(train_data.y)), num_layers=5)),
        # ("RGCN (5 relations)", RGCN(train_data.x.size(1), 64, len(torch.unique(train_data.y)), num_relations=20)),
        # ("Temporal GAT", TemporalGAT(train_data.x.size(1), 64, len(torch.unique(train_data.y)), num_relations=20)),
        # ("Temporal GATv2", TemporalGATv2(train_data.x.size(1), 64, len(torch.unique(train_data.y)))),
        # ("Combined GCN+GAT", CombinedGCN_GAT(train_data.x.size(1), 64, len(torch.unique(train_data.y)))),
        ("GAT + BatchNorm", DeepGAT(train_data.x.size(1), 64, len(torch.unique(train_data.y)), num_layers=3)),
        ("GCN+GAT+RGCN", CombinedGCN_GAT_RGCN(train_data.x.size(1), 64, len(torch.unique(train_data.y)), num_relations=20)),
        ("GCN + Dropout", DeepGCN(train_data.x.size(1), 64, len(torch.unique(train_data.y)), num_layers=3, dropout=0.5)),
    ]

    results = []
    for name, model in architectures:
        print(f"Training {name}...")
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-4)
        criterion = CrossEntropyLoss()
        acc, f1 = train_and_evaluate(model, train_data, val_data, test_data, optimizer, criterion)
        results.append((name, acc * 100, f1 * 100))

    print("\nBenchmark Results:")
    print("Model\t\t\tTest Accuracy\tTest F1-Score")
    for name, acc, f1 in results:
        print(f"{name:20}\t{acc:.2f}\t\t{f1:.2f}")


In [106]:
main_pipeline(data)

Unique edge types: tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17])
Unique edge types: tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17])
Unique edge types: tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17])
Training GAT + BatchNorm...
Epoch 1, Train Loss: 2.6780, Val Loss: 5.0456
Epoch 2, Train Loss: 2.6737, Val Loss: 6.6066
Epoch 3, Train Loss: 3.0211, Val Loss: 5.5473
Epoch 4, Train Loss: 2.3706, Val Loss: 9.4584
Epoch 5, Train Loss: 2.2559, Val Loss: 10.1987
Epoch 6, Train Loss: 2.2097, Val Loss: 8.3322
Epoch 7, Train Loss: 2.1262, Val Loss: 6.2396
Epoch 8, Train Loss: 2.0561, Val Loss: 4.7038
Epoch 9, Train Loss: 2.0492, Val Loss: 4.0332
Epoch 10, Train Loss: 2.0188, Val Loss: 3.7398
Epoch 11, Train Loss: 1.9948, Val Loss: 3.4640
Epoch 12, Train Loss: 1.9643, Val Loss: 3.1707
Epoch 13, Train Loss: 1.9704, Val Loss: 2.8102
Epoch 14, Train Loss: 1.9397, Val Loss: 2.4389
Epoch 15, Train Loss: 