# LSTM

In [None]:
import os
import numpy as np
import pandas as pd
import torch
import sys
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence
import copy
from sklearn.metrics import f1_score, recall_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
from collections import defaultdict
import itertools
import random
import math

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)

GLOBAL_SEED = 42
set_seed(GLOBAL_SEED)

def l2_normalize(x):
    # x имеет форму (seq_length, input_size)
    norm = torch.norm(x, p=2, dim=1, keepdim=True) + 1e-8
    return x / norm

class EmotionDataset(Dataset):
    def __init__(self, embeddings_file, labels_csv, emotion_mapping, csv_type="csv1", transform=None):
        """
        embeddings_file: .npy файл, содержащий словарь {file_id: embedding_array}
        labels_csv: CSV с метками
        emotion_mapping: словарь преобразования метки в число
        csv_type: "csv1" или "csv2"
        transform: функция для предобработки эмбеддингов (например, нормализация)
        """
        self.embeddings = np.load(embeddings_file, allow_pickle=True).item()
        df = pd.read_csv(labels_csv)
        if csv_type == "csv1":
            self.labels = dict(zip(df['name'], df['emotion']))
        elif csv_type == "csv2":
            df['Dialogue_ID'] = df['Dialogue_ID'].astype(str)
            df['Utterance_ID'] = df['Utterance_ID'].astype(str)
            df['file_id'] = "dia" + df['Dialogue_ID'] + "_utt" + df['Utterance_ID']
            self.labels = dict(zip(df['file_id'], df['Emotion']))
        else:
            raise ValueError("csv_type должен быть 'csv1' или 'csv2'")
        self.ids = list(set(self.embeddings.keys()).intersection(set(self.labels.keys())))
        missing = set(self.embeddings.keys()) - set(self.ids)
        
        self.emotion_mapping = emotion_mapping
        self.transform = transform

    def __len__(self):
        return len(self.ids)
    
    def __getitem__(self, index):
        file_id = self.ids[index]
        # Приводим форму эмбеддингов: (1, seq_length, input_size) -> (seq_length, input_size)
        emb = self.embeddings[file_id][0]
        emb_tensor = torch.tensor(emb, dtype=torch.float32)
        # Применяем transform, если он задан (например, L2-нормализация)
        if self.transform is not None:
            emb_tensor = self.transform(emb_tensor)
        emotion_str = self.labels[file_id]
        if emotion_str not in self.emotion_mapping:
            raise ValueError(f"Эмоция {emotion_str} не найдена в emotion_mapping для {file_id}")
        label = self.emotion_mapping[emotion_str]
        label_tensor = torch.tensor(label, dtype=torch.long)
        return emb_tensor, label_tensor, file_id
    
def collate_fn(batch):
    embeddings, labels, file_ids = zip(*batch)
    lengths = torch.tensor([emb.shape[0] for emb in embeddings], dtype=torch.long)
    padded_embeddings = pad_sequence(embeddings, batch_first=True)
    labels = torch.stack(labels)
    return padded_embeddings, lengths, labels, list(file_ids)


class LSTMClassifier(nn.Module):
    def __init__(self, input_size=1024, hidden_size=256, num_layers=2, num_classes=7,
                 bidirectional=True, dropout=0.3):
        super().__init__()
        self.lstm = nn.LSTM(input_size=input_size,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            batch_first=True,
                            bidirectional=bidirectional,
                            dropout=dropout)
        fc_in_features = hidden_size * 2 if bidirectional else hidden_size
        self.fc = nn.Linear(fc_in_features, num_classes)
    
    def forward(self, x, lengths):
        packed = pack_padded_sequence(x, lengths.cpu(), batch_first=True, enforce_sorted=False)
        packed_out, (hn, _) = self.lstm(packed)
        if self.lstm.bidirectional:
            last_hidden = torch.cat((hn[-2], hn[-1]), dim=1)
        else:
            last_hidden = hn[-1]
        logits = self.fc(last_hidden)
        return logits

    
def get_model(variant, input_size, hidden_size, num_layers, num_classes, bidirectional, dropout):
    if variant == "LSTM":
        return LSTMClassifier(input_size, hidden_size, num_layers, num_classes, bidirectional, dropout)

def train_model(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for batch in dataloader:
        inputs, lengths, labels, _ = batch
        inputs = inputs.to(device)
        lengths = lengths.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs, lengths)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    return running_loss / len(dataloader.dataset)

def evaluate_model(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in dataloader:
            inputs, lengths, labels, _ = batch
            inputs = inputs.to(device)
            lengths = lengths.to(device)
            labels = labels.to(device)
            outputs = model(inputs, lengths)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return running_loss / len(dataloader.dataset), correct / total


def evaluate_metrics(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []
    all_file_ids = []
    with torch.no_grad():
        for batch in dataloader:
            inputs, lengths, labels, file_ids = batch
            inputs = inputs.to(device)
            lengths = lengths.to(device)
            labels = labels.to(device)
            outputs = model(inputs, lengths)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_file_ids.extend(file_ids)
    return np.array(all_labels), np.array(all_preds), all_file_ids

def compute_metrics(y_true, y_pred):
    UAR = recall_score(y_true, y_pred, average='macro')
    MF1 = f1_score(y_true, y_pred, average='macro')
    WAR = recall_score(y_true, y_pred, average='weighted')
    WF1 = f1_score(y_true, y_pred, average='weighted')
    return UAR, MF1, WAR, WF1


emotion_mapping = {
    'anger': 0,
    'disgust': 1,
    'fear': 2,
    'joy': 3,
    'happiness': 3,
    'neutral': 4,
    'sadness': 5,
    'surprise': 6,
    'enthusiasm': 6
}
label_to_emotion = {
    0: 'anger',
    1: 'disgust',
    2: 'fear',
    3: 'joy/happiness',
    4: 'neutral',
    5: 'sadness',
    6: 'surprise/enthusiasm'
}


train_embeddings_file1 = "train_resd_embeddings.npy"
train_labels_csv1 = "train.csv"
train_embeddings_file2 = "train_meld_embeddings.npy"
train_labels_csv2 = "train_sent_emo.csv"
test_embeddings_file1 = "test_resd_embeddings.npy"
test_labels_csv1 = "test.csv"
test_embeddings_file2 = "test_meld_wav2vec_embeddings.npy"
test_labels_csv2 = "test_sent_emo.csv"
val_embeddings_file = "val_meld_embeddings.npy"
val_labels_csv = "dev_sent_emo.csv"



def run_experiment(exp_params, num_epochs=100, patience=3):
    print(f"\nЗапуск эксперимента: {exp_params['name']}")
    
    normalize_flag = exp_params.get("normalize", False)
    transform = l2_normalize if normalize_flag else None

    train_dataset1 = EmotionDataset(train_embeddings_file1, train_labels_csv1, emotion_mapping, csv_type="csv1", transform=transform)
    train_dataset2 = EmotionDataset(train_embeddings_file2, train_labels_csv2, emotion_mapping, csv_type="csv2", transform=transform)
    merged_train_dataset = ConcatDataset([train_dataset1, train_dataset2])
    val_dataset = EmotionDataset(val_embeddings_file, val_labels_csv, emotion_mapping, csv_type="csv2", transform=transform)
    test_dataset1 = EmotionDataset(test_embeddings_file1, test_labels_csv1, emotion_mapping, csv_type="csv1", transform=transform)
    test_dataset2 = EmotionDataset(test_embeddings_file2, test_labels_csv2, emotion_mapping, csv_type="csv2", transform=transform)

    batch_size = 16
    train_loader_local = DataLoader(merged_train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    val_loader_local   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    test_loader1_local = DataLoader(test_dataset1, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    test_loader2_local = DataLoader(test_dataset2, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    
    model = get_model(
        variant=exp_params.get("variant", "lstm"),
        input_size=1024,
        hidden_size=exp_params.get("hidden_size", 128),
        num_layers=exp_params.get("num_layers", 3),
        num_classes=7,
        bidirectional=exp_params.get("bidirectional", False),
        dropout=exp_params.get("dropout", 0.4)
    ).to(device)
    
    loss_type = exp_params.get("loss", "crossentropy")
    if loss_type == "crossentropy":
        criterion = nn.CrossEntropyLoss()
    else:
        raise ValueError("Неизвестный тип loss")
    
    opt_type = exp_params.get("optimizer", "adam")
    lr = exp_params.get("lr", 1e-4)
    if opt_type == "adam":
        optimizer = optim.Adam(model.parameters(), lr=lr)
    
    best_val_loss = float('inf')
    counter = 0
    best_model_state = None
    
    for epoch in range(num_epochs):
        train_loss = train_model(model, train_loader_local, criterion, optimizer, device)
       
        val_loss, val_acc = evaluate_model(model, val_loader_local, criterion, device)
        
        y_true_train, y_pred_train, _ = evaluate_metrics(model, train_loader_local, device)
        train_metrics = compute_metrics(y_true_train, y_pred_train)
        
        y_true_val, y_pred_val, _ = evaluate_metrics(model, val_loader_local, device)
        val_metrics = compute_metrics(y_true_val, y_pred_val)
        
        y_true_test1, y_pred_test1, _ = evaluate_metrics(model, test_loader1_local, device)
        test1_metrics = compute_metrics(y_true_test1, y_pred_test1)
        
        y_true_test2, y_pred_test2, _ = evaluate_metrics(model, test_loader2_local, device)
        test2_metrics = compute_metrics(y_true_test2, y_pred_test2)
        
        print(f"\nЭпоха {epoch+1}/{num_epochs}:")
        print(f"  Train Loss: {train_loss:.4f}, WAR: {train_metrics[2]:.4f}, UAR: {train_metrics[0]:.4f}, MF1: {train_metrics[1]:.4f}, WF1: {train_metrics[3]:.4f}")
        print(f"  Val   Loss: {val_loss:.4f}, WAR: {val_metrics[2]:.4f}, UAR: {val_metrics[0]:.4f}, MF1: {val_metrics[1]:.4f}, WF1: {val_metrics[3]:.4f}")
        print(f"  Test1:               WAR: {test1_metrics[2]:.4f}, UAR: {test1_metrics[0]:.4f}, MF1: {test1_metrics[1]:.4f}, WF1: {test1_metrics[3]:.4f}")
        print(f"  Test2:               WAR: {test2_metrics[2]:.4f}, UAR: {test2_metrics[0]:.4f}, MF1: {test2_metrics[1]:.4f}, WF1: {test2_metrics[3]:.4f}")
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = copy.deepcopy(model.state_dict())
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print(f"Ранняя остановка на эпохе {epoch+1}")
                break
                
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    
    y_true_val, y_pred_val, _ = evaluate_metrics(model, val_loader_local, device)
    val_metrics = compute_metrics(y_true_val, y_pred_val)
    y_true_test1, y_pred_test1, _ = evaluate_metrics(model, test_loader1_local, device)
    test1_metrics = compute_metrics(y_true_test1, y_pred_test1)
    y_true_test2, y_pred_test2, _ = evaluate_metrics(model, test_loader2_local, device)
    test2_metrics = compute_metrics(y_true_test2, y_pred_test2)
    
    return {
        "exp_name": exp_params["name"],
        "val": {"UAR": val_metrics[0], "MF1": val_metrics[1], "WAR": val_metrics[2], "WF1": val_metrics[3]},
        "test1": {"UAR": test1_metrics[0], "MF1": test1_metrics[1], "WAR": test1_metrics[2], "WF1": test1_metrics[3]},
        "test2": {"UAR": test2_metrics[0], "MF1": test2_metrics[1], "WAR": test2_metrics[2], "WF1": test2_metrics[3]}
    }


    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

n_trials = 5
best_mf1 = -1
best_params = None
all_results = []

for trial in range(n_trials):
    variant = random.choice(["LSTM"])
    hidden_size = random.choice([128, 256, 512])
    num_layers = random.choice([2, 3, 4])
    bidirectional = random.choice([False, True])
    dropout = random.choice([0.1, 0.2, 0.3, 0.4])
    optimizer_choice = random.choice(["adam"])
    loss_choice = random.choice(["crossentropy"])
    lr = 10 ** random.uniform(math.log10(1e-4), math.log10(1e-3))
    normalize = random.choice([False, True])
    
    exp_params = {
        "name": f"Trial {trial+1}: {variant}, hs={hidden_size}, layers={num_layers}, bidir={bidirectional}, dropout={dropout}, opt={optimizer_choice}, loss={loss_choice}, lr={lr:.1e}, norm={normalize}",
        "variant": variant,
        "hidden_size": hidden_size,
        "num_layers": num_layers,
        "bidirectional": bidirectional,
        "dropout": dropout,
        "optimizer": optimizer_choice,
        "loss": loss_choice,
        "lr": lr,
        "normalize": normalize
    }
    
    print(f"\nЗапуск испытания {trial+1}/{n_trials} с параметрами:")
    print(exp_params)
    
    try:
        result = run_experiment(exp_params, num_epochs=30, patience=10)
    except Exception as e:
        print(f"Испытание {trial+1} завершилось ошибкой: {e}")
        continue

# Transformer

In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence
import copy
from sklearn.metrics import f1_score, recall_score
import matplotlib.pyplot as plt
from collections import defaultdict
import itertools
import random
import math
import pandas as pd

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)

GLOBAL_SEED = 42
set_seed(GLOBAL_SEED)

def l2_normalize(x):
    # x: (seq_length, input_size) 
    norm = torch.norm(x, p=2, dim=1, keepdim=True) + 1e-8
    return x / norm


class EmotionDataset(Dataset):
    def __init__(self, embeddings_file, labels_csv, emotion_mapping, csv_type="csv1", transform=None):
        """
        embeddings_file: .npy файл, содержащий словарь {file_id: embedding_array}
        labels_csv: CSV с метками
        emotion_mapping: словарь для преобразования метки (строка) в число
        csv_type: "csv1" или "csv2"
        transform: функция для предобработки эмбеддингов
        """
        self.embeddings = np.load(embeddings_file, allow_pickle=True).item()
        df = pd.read_csv(labels_csv)
        if csv_type == "csv1":
            self.labels = dict(zip(df['name'], df['emotion']))
        elif csv_type == "csv2":
            df['Dialogue_ID'] = df['Dialogue_ID'].astype(str)
            df['Utterance_ID'] = df['Utterance_ID'].astype(str)
            df['file_id'] = "dia" + df['Dialogue_ID'] + "_utt" + df['Utterance_ID']
            self.labels = dict(zip(df['file_id'], df['Emotion']))
        else:
            raise ValueError("csv_type должен быть 'csv1' или 'csv2'")
        self.ids = list(set(self.embeddings.keys()).intersection(set(self.labels.keys())))
        missing = set(self.embeddings.keys()) - set(self.ids)
        if missing:
            print(f"Предупреждение: отсутствуют метки для {missing}")
        self.emotion_mapping = emotion_mapping
        self.transform = transform

    def __len__(self):
        return len(self.ids)
    
    def __getitem__(self, index):
        file_id = self.ids[index]
        # (1, seq_length, input_size) -> (seq_length, input_size)
        emb = self.embeddings[file_id][0]
        emb_tensor = torch.tensor(emb, dtype=torch.float32)
        if self.transform is not None:
            emb_tensor = self.transform(emb_tensor)
        emotion_str = self.labels[file_id]
        if emotion_str not in self.emotion_mapping:
            raise ValueError(f"Эмоция {emotion_str} не найдена в emotion_mapping для {file_id}")
        label = self.emotion_mapping[emotion_str]
        label_tensor = torch.tensor(label, dtype=torch.long)
        return emb_tensor, label_tensor, file_id

def collate_fn(batch):
    embeddings, labels, file_ids = zip(*batch)
    lengths = torch.tensor([emb.shape[0] for emb in embeddings], dtype=torch.long)
    padded_embeddings = pad_sequence(embeddings, batch_first=True)
    labels = torch.stack(labels)
    return padded_embeddings, lengths, labels, list(file_ids)

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=15500, learnable=False):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        self.learnable = learnable
        if not learnable:
            pe = torch.zeros(max_len, d_model)
            position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
            div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
            pe[:, 0::2] = torch.sin(position * div_term)
            pe[:, 1::2] = torch.cos(position * div_term)
            pe = pe.unsqueeze(0)  # (1, max_len, d_model)
            self.register_buffer('pe', pe)
        else:
            self.pe = nn.Parameter(torch.zeros(1, max_len, d_model))

    def forward(self, x):
        # x: (batch_size, seq_len, d_model)
        seq_len = x.size(1)
        x = x + self.pe[:, :seq_len]
        return self.dropout(x)


class TransformerClassifier(nn.Module):
    def __init__(self, input_size=1024, d_model=256, num_layers=2, num_heads=4,
                 dim_feedforward=512, dropout=0.1, num_classes=7,
                 pooling='cls', pos_encoding_type='fixed', learnable_pos=False):
        
        super().__init__()
        self.input_proj = nn.Linear(input_size, d_model)
        self.pooling = pooling
        if pooling == 'cls':
            self.cls_token = nn.Parameter(torch.zeros(1, 1, d_model))
        self.pos_encoder = PositionalEncoding(d_model, dropout, max_len=15500, learnable=(pos_encoding_type=='learnable' or learnable_pos))
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=num_heads,
                                                   dim_feedforward=dim_feedforward, dropout=dropout, activation='relu')
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(d_model, num_classes)

    def forward(self, x, lengths):
        # x: (batch_size, seq_len, input_size)
        x = self.input_proj(x)  # (batch_size, seq_len, d_model)
        batch_size, seq_len, d_model = x.size()
        if self.pooling == 'cls':
            cls_tokens = self.cls_token.expand(batch_size, -1, -1)  # (batch_size, 1, d_model)
            x = torch.cat((cls_tokens, x), dim=1)  # (batch_size, seq_len+1, d_model)
        x = self.pos_encoder(x)
        x = x.transpose(0, 1)  # (seq_len, batch_size, d_model)
        max_len = x.size(0)
        mask = torch.zeros(batch_size, max_len, dtype=torch.bool, device=x.device)
        for i, l in enumerate(lengths):
            if self.pooling == 'cls':
                mask[i, l+1:] = True
            else:
                mask[i, l:] = True
        x = self.transformer_encoder(x, src_key_padding_mask=mask)
        x = x.transpose(0, 1)  # (batch_size, seq_len, d_model)
        if self.pooling == 'cls':
            out = x[:, 0, :]
        else:
            out = []
            for i, l in enumerate(lengths):
                if l > 0:
                    out.append(x[i, :l, :].mean(dim=0))
                else:
                    out.append(torch.zeros(d_model, device=x.device))
            out = torch.stack(out, dim=0)
        logits = self.fc(out)
        return logits

def get_transformer_model(params):
    return TransformerClassifier(
        input_size=params.get("input_size", 1024),
        d_model=params.get("d_model", 256),
        num_layers=params.get("num_layers", 2),
        num_heads=params.get("num_heads", 4),
        dim_feedforward=params.get("dim_feedforward", 512),
        dropout=params.get("dropout", 0.1),
        num_classes=params.get("num_classes", 7),
        pooling=params.get("pooling", "cls"),
        pos_encoding_type=params.get("pos_encoding_type", "fixed"),
        learnable_pos=params.get("learnable_pos", False)
    )


def train_model(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for batch in dataloader:
        inputs, lengths, labels, _ = batch
        inputs = inputs.to(device)
        lengths = lengths.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs, lengths)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    return running_loss / len(dataloader.dataset)

def evaluate_model(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in dataloader:
            inputs, lengths, labels, _ = batch
            inputs = inputs.to(device)
            lengths = lengths.to(device)
            labels = labels.to(device)
            outputs = model(inputs, lengths)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return running_loss / len(dataloader.dataset), correct / total

def evaluate_metrics(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []
    all_file_ids = []
    with torch.no_grad():
        for batch in dataloader:
            inputs, lengths, labels, file_ids = batch
            inputs = inputs.to(device)
            lengths = lengths.to(device)
            labels = labels.to(device)
            outputs = model(inputs, lengths)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_file_ids.extend(file_ids)
    return np.array(all_labels), np.array(all_preds), all_file_ids

def compute_metrics(y_true, y_pred):
    UAR = recall_score(y_true, y_pred, average='macro')
    MF1 = f1_score(y_true, y_pred, average='macro')
    WAR = recall_score(y_true, y_pred, average='weighted')
    WF1 = f1_score(y_true, y_pred, average='weighted')
    return UAR, MF1, WAR, WF1


emotion_mapping = {
    'anger': 0,
    'disgust': 1,
    'fear': 2,
    'joy': 3,
    'happiness': 3,
    'neutral': 4,
    'sadness': 5,
    'surprise': 6,
    'enthusiasm': 6
}
label_to_emotion = {
    0: 'anger',
    1: 'disgust',
    2: 'fear',
    3: 'joy/happiness',
    4: 'neutral',
    5: 'sadness',
    6: 'surprise/enthusiasm'
}


train_embeddings_file1 = "train_resd_embeddings.npy"
train_labels_csv1 = "train.csv"
train_embeddings_file2 = "train_meld_embeddings.npy"
train_labels_csv2 = "train_sent_emo.csv"
test_embeddings_file1 = "test_resd_embeddings.npy"
test_labels_csv1 = "test.csv"
test_embeddings_file2 = "test_meld_wav2vec_embeddings.npy"
test_labels_csv2 = "test_sent_emo.csv"
val_embeddings_file = "val_meld_embeddings.npy"
val_labels_csv = "dev_sent_emo.csv"

def run_experiment(exp_params, num_epochs=100, patience=3):
    print(f"\nЗапуск эксперимента: {exp_params['name']}")
    normalize_flag = exp_params.get("normalize", False)
    transform = l2_normalize if normalize_flag else None

    train_dataset1 = EmotionDataset(train_embeddings_file1, train_labels_csv1, emotion_mapping, csv_type="csv1", transform=transform)
    train_dataset2 = EmotionDataset(train_embeddings_file2, train_labels_csv2, emotion_mapping, csv_type="csv2", transform=transform)
    merged_train_dataset = ConcatDataset([train_dataset1, train_dataset2])
    val_dataset = EmotionDataset(val_embeddings_file, val_labels_csv, emotion_mapping, csv_type="csv2", transform=transform)
    test_dataset1 = EmotionDataset(test_embeddings_file1, test_labels_csv1, emotion_mapping, csv_type="csv1", transform=transform)
    test_dataset2 = EmotionDataset(test_embeddings_file2, test_labels_csv2, emotion_mapping, csv_type="csv2", transform=transform)

    batch_size = 16
    train_loader_local = DataLoader(merged_train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    val_loader_local = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    test_loader1_local = DataLoader(test_dataset1, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    test_loader2_local = DataLoader(test_dataset2, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

    model_params = {
        "input_size": 1024,
        "d_model": exp_params.get("d_model", 256),
        "num_layers": exp_params.get("num_layers", 2),
        "num_heads": exp_params.get("num_heads", 4),
        "dim_feedforward": exp_params.get("dim_feedforward", 512),
        "dropout": exp_params.get("dropout", 0.1),
        "num_classes": 7,
        "pooling": exp_params.get("pooling", "cls"),
        "pos_encoding_type": exp_params.get("pos_encoding_type", "fixed"),
        "learnable_pos": exp_params.get("learnable_pos", False)
    }
    model = get_transformer_model(model_params).to(device)
    
    loss_type = exp_params.get("loss", "crossentropy")
    if loss_type == "crossentropy":
        criterion = nn.CrossEntropyLoss()
    else:
        raise ValueError("Неизвестный тип loss")
    
    opt_type = exp_params.get("optimizer", "adam")
    lr = exp_params.get("lr", 1e-4)
    if opt_type == "adam":
        optimizer = optim.Adam(model.parameters(), lr=lr)
    else:
        raise ValueError("Неизвестный тип оптимизатора")
    
    best_val_loss = float('inf')
    counter = 0
    best_model_state = None
    
    for epoch in range(num_epochs):
        train_loss = train_model(model, train_loader_local, criterion, optimizer, device)
        val_loss, val_acc = evaluate_model(model, val_loader_local, criterion, device)

        # Метрики на train/val/test1/test2
        y_true_train, y_pred_train, _ = evaluate_metrics(model, train_loader_local, device)
        train_metrics = compute_metrics(y_true_train, y_pred_train)

        y_true_val, y_pred_val, _ = evaluate_metrics(model, val_loader_local, device)
        val_metrics = compute_metrics(y_true_val, y_pred_val)

        y_true_test1, y_pred_test1, _ = evaluate_metrics(model, test_loader1_local, device)
        test1_metrics = compute_metrics(y_true_test1, y_pred_test1)

        y_true_test2, y_pred_test2, _ = evaluate_metrics(model, test_loader2_local, device)
        test2_metrics = compute_metrics(y_true_test2, y_pred_test2)

        print(f"\nЭпоха {epoch+1}/{num_epochs}:")
        print(f"Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        print(f"[Train ] UAR: {train_metrics[0]:.4f}, MF1: {train_metrics[1]:.4f}, WAR: {train_metrics[2]:.4f}, WF1: {train_metrics[3]:.4f}")
        print(f"[Val   ] UAR: {val_metrics[0]:.4f}, MF1: {val_metrics[1]:.4f}, WAR: {val_metrics[2]:.4f}, WF1: {val_metrics[3]:.4f}")
        print(f"[Test1 ] UAR: {test1_metrics[0]:.4f}, MF1: {test1_metrics[1]:.4f}, WAR: {test1_metrics[2]:.4f}, WF1: {test1_metrics[3]:.4f}")
        print(f"[Test2 ] UAR: {test2_metrics[0]:.4f}, MF1: {test2_metrics[1]:.4f}, WAR: {test2_metrics[2]:.4f}, WF1: {test2_metrics[3]:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = copy.deepcopy(model.state_dict())
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print(f"Ранняя остановка на эпохе {epoch+1}")
                break
                
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    
    y_true_val, y_pred_val, _ = evaluate_metrics(model, val_loader_local, device)
    val_metrics = compute_metrics(y_true_val, y_pred_val)
    y_true_test1, y_pred_test1, _ = evaluate_metrics(model, test_loader1_local, device)
    test1_metrics = compute_metrics(y_true_test1, y_pred_test1)
    y_true_test2, y_pred_test2, _ = evaluate_metrics(model, test_loader2_local, device)
    test2_metrics = compute_metrics(y_true_test2, y_pred_test2)
    
    return {
        "exp_name": exp_params["name"],
        "val": {"UAR": val_metrics[0], "MF1": val_metrics[1], "WAR": val_metrics[2], "WF1": val_metrics[3]},
        "test1": {"UAR": test1_metrics[0], "MF1": test1_metrics[1], "WAR": test1_metrics[2], "WF1": test1_metrics[3]},
        "test2": {"UAR": test2_metrics[0], "MF1": test2_metrics[1], "WAR": test2_metrics[2], "WF1": test2_metrics[3]}
    }

n_trials = 5
best_mf1 = -1
best_params = None
all_results = []

for trial in range(n_trials):
    # Случайный выбор гиперпараметров для трансформера
    d_model = random.choice([128, 256])
    num_layers = random.choice([2, 3])
    num_heads = random.choice([4, 8])
    dim_feedforward = random.choice([512, 1024])
    dropout = random.choice([0.2, 0.3, 0.4])
    pooling = random.choice(["cls", "mean"])
    pos_encoding_type = random.choice(["fixed", "learnable"])
    optimizer_choice = random.choice(["adam"])
    loss_choice = random.choice(["crossentropy"])
    
    lr = 10 ** random.uniform(math.log10(1e-4), math.log10(1e-3))
    normalize = random.choice([False, True])
    
    exp_params = {
        "name": f"Trial {trial+1}: transformer, d_model={d_model}, layers={num_layers}, heads={num_heads}, ff={dim_feedforward}, dropout={dropout}, pooling={pooling}, pos={pos_encoding_type}, opt={optimizer_choice}, loss={loss_choice}, lr={lr:.1e}, norm={normalize}",
        "d_model": d_model,
        "num_layers": num_layers,
        "num_heads": num_heads,
        "dim_feedforward": dim_feedforward,
        "dropout": dropout,
        "pooling": pooling,
        "pos_encoding_type": pos_encoding_type,
        "optimizer": optimizer_choice,
        "loss": loss_choice,
        "lr": lr,
        "normalize": normalize
    }
    
    
    print(f"\nЗапуск испытания {trial+1}/{n_trials} с параметрами:")
    print(exp_params)
    
    try:
        result = run_experiment(exp_params, num_epochs=40, patience=25)
    except Exception as e:
        print(f"Испытание {trial+1} завершилось ошибкой: {e}")
        continue

## Mamba v1

In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torch.nn.utils.rnn import pad_sequence
import copy
from sklearn.metrics import f1_score, recall_score, confusion_matrix
import matplotlib.pyplot as plt
from collections import defaultdict
import random
import math
import itertools

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)

torch.use_deterministic_algorithms(False)

GLOBAL_SEED = 42
set_seed(GLOBAL_SEED)

def l2_normalize(x):
    # x: (seq_length, input_size) – нормализуем каждый временной вектор по L2
    norm = torch.norm(x, p=2, dim=1, keepdim=True) + 1e-8
    return x / norm

class EmotionDataset(Dataset):
    def __init__(self, embeddings_file, labels_csv, emotion_mapping, csv_type="csv1", transform=None):
        """
        embeddings_file: .npy файл со словарём {file_id: embedding_array}
        labels_csv: CSV с метками
        emotion_mapping: словарь преобразования метки в число
        csv_type: "csv1" или "csv2"
        transform: функция для предобработки эмбеддингов 
        """
        self.embeddings = np.load(embeddings_file, allow_pickle=True).item()
        df = pd.read_csv(labels_csv)
        if csv_type == "csv1":
            self.labels = dict(zip(df['name'], df['emotion']))
        elif csv_type == "csv2":
            df['Dialogue_ID'] = df['Dialogue_ID'].astype(str)
            df['Utterance_ID'] = df['Utterance_ID'].astype(str)
            df['file_id'] = "dia" + df['Dialogue_ID'] + "_utt" + df['Utterance_ID']
            self.labels = dict(zip(df['file_id'], df['Emotion']))
        else:
            raise ValueError("csv_type должен быть 'csv1' или 'csv2'")
        self.ids = list(set(self.embeddings.keys()).intersection(set(self.labels.keys())))
        missing = set(self.embeddings.keys()) - set(self.ids)
        if missing:
            print(f"Предупреждение: отсутствуют метки для {missing}")
        self.emotion_mapping = emotion_mapping
        self.transform = transform

    def __len__(self):
        return len(self.ids)
    
    def __getitem__(self, index):
        file_id = self.ids[index]
        # (1, seq_length, input_size) -> (seq_length, input_size)
        emb = self.embeddings[file_id][0]
        emb_tensor = torch.tensor(emb, dtype=torch.float32)
        if self.transform is not None:
            emb_tensor = self.transform(emb_tensor)
        emotion_str = self.labels[file_id]
        if emotion_str not in self.emotion_mapping:
            raise ValueError(f"Эмоция {emotion_str} не найдена в emotion_mapping для {file_id}")
        label = self.emotion_mapping[emotion_str]
        label_tensor = torch.tensor(label, dtype=torch.long)
        return emb_tensor, label_tensor, file_id

def collate_fn(batch):
    embeddings, labels, file_ids = zip(*batch)
    lengths = torch.tensor([emb.shape[0] for emb in embeddings], dtype=torch.long)
    padded_embeddings = pad_sequence(embeddings, batch_first=True)
    labels = torch.stack(labels)
    return padded_embeddings, lengths, labels, list(file_ids)

class MambaBlock(nn.Module):
    def __init__(self, d_model, kernel_size=3, dropout=0.1):
        super().__init__()
        self.conv = nn.Conv1d(d_model, d_model, kernel_size, padding=kernel_size//2)
        self.norm = nn.LayerNorm(d_model)
        self.activation = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x):
        # x: (batch, seq_len, d_model)
        x = x.transpose(1, 2)  # (batch, d_model, seq_len)
        x = self.conv(x)
        x = x.transpose(1, 2)  # (batch, seq_len, d_model)
        x = self.norm(x)
        x = self.activation(x)
        x = self.dropout(x)
        return x

class MambaClassifier(nn.Module):
    def __init__(self, input_size=1024, d_model=256, num_layers=2, num_classes=7, dropout=0.1, kernel_size=3):
        super().__init__()
        self.input_proj = nn.Linear(input_size, d_model)
        self.mamba_blocks = nn.ModuleList([MambaBlock(d_model, kernel_size, dropout) for _ in range(num_layers)])
        self.fc = nn.Linear(d_model, num_classes)
    
    def forward(self, x, lengths):
        # x: (batch, seq_len, input_size)
        x = self.input_proj(x)  # (batch, seq_len, d_model)
        for block in self.mamba_blocks:
            x = block(x)
      
        batch_size, seq_len, d_model = x.size()
        pooled = []
        for i, l in enumerate(lengths):
            if l > 0:
                pooled.append(x[i, :l, :].mean(dim=0))
            else:
                pooled.append(torch.zeros(d_model, device=x.device))
        pooled = torch.stack(pooled, dim=0)
        logits = self.fc(pooled)
        return logits

def get_model_mamba(params):
    return MambaClassifier(
        input_size=params.get("input_size", 1024),
        d_model=params.get("d_model", 256),
        num_layers=params.get("num_layers", 2),
        num_classes=params.get("num_classes", 7),
        dropout=params.get("dropout", 0.1),
        kernel_size=params.get("kernel_size", 3)
    )

def train_model(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for batch in dataloader:
        inputs, lengths, labels, _ = batch
        inputs = inputs.to(device)
        lengths = lengths.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs, lengths)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    return running_loss / len(dataloader.dataset)

def evaluate_model(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in dataloader:
            inputs, lengths, labels, _ = batch
            inputs = inputs.to(device)
            lengths = lengths.to(device)
            labels = labels.to(device)
            outputs = model(inputs, lengths)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return running_loss / len(dataloader.dataset), correct / total

def evaluate_metrics(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []
    all_file_ids = []
    with torch.no_grad():
        for batch in dataloader:
            inputs, lengths, labels, file_ids = batch
            inputs = inputs.to(device)
            lengths = lengths.to(device)
            labels = labels.to(device)
            outputs = model(inputs, lengths)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_file_ids.extend(file_ids)
    return np.array(all_labels), np.array(all_preds), all_file_ids

def compute_metrics(y_true, y_pred):
    UAR = recall_score(y_true, y_pred, average='macro')
    MF1 = f1_score(y_true, y_pred, average='macro')
    WAR = recall_score(y_true, y_pred, average='weighted')
    WF1 = f1_score(y_true, y_pred, average='weighted')
    return UAR, MF1, WAR, WF1

def plot_confusion_matrix(cm, classes, title='Confusion Matrix'):
    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    
    thresh = cm.max() / 2.0
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], 'd'),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    
    plt.tight_layout()
    plt.ylabel('Истинная метка')
    plt.xlabel('Предсказанная метка')
    plt.show()

emotion_mapping = {
    'anger': 0,
    'disgust': 1,
    'fear': 2,
    'joy': 3,
    'happiness': 3,
    'neutral': 4,
    'sadness': 5,
    'surprise': 6,
    'enthusiasm': 6
}
label_to_emotion = {
    0: 'anger',
    1: 'disgust',
    2: 'fear',
    3: 'joy/happiness',
    4: 'neutral',
    5: 'sadness',
    6: 'surprise/enthusiasm'
}

train_embeddings_file1 = "train_resd_embeddings.npy"
train_labels_csv1 = "train.csv"
train_embeddings_file2 = "train_meld_embeddings.npy"
train_labels_csv2 = "train_sent_emo.csv"
test_embeddings_file1 = "test_resd_embeddings.npy"
test_labels_csv1 = "test.csv"
test_embeddings_file2 = "test_meld_wav2vec_embeddings.npy"
test_labels_csv2 = "test_sent_emo.csv"
val_embeddings_file = "val_meld_embeddings.npy"
val_labels_csv = "dev_sent_emo.csv"

def run_experiment(exp_params, num_epochs=100, patience=5):
    print(f"\nЗапуск эксперимента: {exp_params['name']}")
    normalize_flag = exp_params.get("normalize", False)
    transform = l2_normalize if normalize_flag else None

    train_dataset1 = EmotionDataset(train_embeddings_file1, train_labels_csv1, emotion_mapping, csv_type="csv1", transform=transform)
    train_dataset2 = EmotionDataset(train_embeddings_file2, train_labels_csv2, emotion_mapping, csv_type="csv2", transform=transform)
    merged_train_dataset = ConcatDataset([train_dataset1, train_dataset2])
    val_dataset = EmotionDataset(val_embeddings_file, val_labels_csv, emotion_mapping, csv_type="csv2", transform=transform)
    test_dataset1 = EmotionDataset(test_embeddings_file1, test_labels_csv1, emotion_mapping, csv_type="csv1", transform=transform)
    test_dataset2 = EmotionDataset(test_embeddings_file2, test_labels_csv2, emotion_mapping, csv_type="csv2", transform=transform)

    batch_size = 16
    train_loader_local = DataLoader(merged_train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    val_loader_local   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    test_loader1_local = DataLoader(test_dataset1, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    test_loader2_local = DataLoader(test_dataset2, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

    model_params = {
        "input_size": 1024,
        "d_model": exp_params.get("d_model", 256),
        "num_layers": exp_params.get("num_layers", 2),
        "num_classes": 7,
        "dropout": exp_params.get("dropout", 0.1),
        "kernel_size": exp_params.get("kernel_size", 3)
    }
    model = get_model_mamba(model_params).to(device)
    
    train_counts = {
        'neutral': 3.25,
        'joy': 3.2,
        'surprise': 3,
        'anger': 3.2,
        'sadness': 3,
        'disgust': 3,
        'fear': 3
    }
    
    label_counts = defaultdict(int)
    for emotion, count in train_counts.items():
        if emotion in emotion_mapping:
            label = emotion_mapping[emotion]
            label_counts[label] += count
    weights = []
    for label in range(7):
        if label in label_counts and label_counts[label] > 0:
            weights.append(1.0 / label_counts[label])
        else:
            weights.append(1.0)
    weights = torch.tensor(weights, dtype=torch.float)
    weights = weights / weights.sum() * 7
    weights = weights.to(device)
    
    loss_type = exp_params.get("loss", "crossentropy")
    if loss_type == "crossentropy":
        criterion = nn.CrossEntropyLoss(weight=weights)
    else:
        raise ValueError("Неизвестный тип loss")
    
    opt_type = exp_params.get("optimizer", "adam")
    lr = exp_params.get("lr", 1e-4)
    if opt_type == "adam":
        optimizer = optim.Adam(model.parameters(), lr=lr)
    else:
        raise ValueError("Неизвестный тип оптимизатора")
    
    best_val_loss = float('inf')
    counter = 0
    best_model_state = None
    
    for epoch in range(num_epochs):
        train_loss = train_model(model, train_loader_local, criterion, optimizer, device)
        
        y_true_train, y_pred_train, _ = evaluate_metrics(model, train_loader_local, device)
        train_metrics = compute_metrics(y_true_train, y_pred_train)
        
        val_loss, val_acc = evaluate_model(model, val_loader_local, criterion, device)
        y_true_val, y_pred_val, _ = evaluate_metrics(model, val_loader_local, device)
        val_metrics = compute_metrics(y_true_val, y_pred_val)
        
        y_true_test1, y_pred_test1, _ = evaluate_metrics(model, test_loader1_local, device)
        test1_metrics = compute_metrics(y_true_test1, y_pred_test1)
        
        y_true_test2, y_pred_test2, _ = evaluate_metrics(model, test_loader2_local, device)
        test2_metrics = compute_metrics(y_true_test2, y_pred_test2)
        
        print(f"\nЭпоха {epoch+1}/{num_epochs}:")
        print(f"  Train Loss: {train_loss:.4f}, WAR: {train_metrics[2]:.4f}, UAR: {train_metrics[0]:.4f}, MF1: {train_metrics[1]:.4f}, WF1: {train_metrics[3]:.4f}")
        print(f"  Val   Loss: {val_loss:.4f}, WAR: {val_metrics[2]:.4f}, UAR: {val_metrics[0]:.4f}, MF1: {val_metrics[1]:.4f}, WF1: {val_metrics[3]:.4f}")
        print(f"  Test1:               WAR: {test1_metrics[2]:.4f}, UAR: {test1_metrics[0]:.4f}, MF1: {test1_metrics[1]:.4f}, WF1: {test1_metrics[3]:.4f}")
        print(f"  Test2:               WAR: {test2_metrics[2]:.4f}, UAR: {test2_metrics[0]:.4f}, MF1: {test2_metrics[1]:.4f}, WF1: {test2_metrics[3]:.4f}")
        
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print(f"Ранняя остановка на эпохе {epoch+1}")
                break
                
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    
    y_true_val, y_pred_val, _ = evaluate_metrics(model, val_loader_local, device)
    val_metrics = compute_metrics(y_true_val, y_pred_val)
    y_true_test1, y_pred_test1, _ = evaluate_metrics(model, test_loader1_local, device)
    test1_metrics = compute_metrics(y_true_test1, y_pred_test1)
    y_true_test2, y_pred_test2, _ = evaluate_metrics(model, test_loader2_local, device)
    test2_metrics = compute_metrics(y_true_test2, y_pred_test2)
    
    cm_test1 = confusion_matrix(y_true_test1, y_pred_test1)
    plot_confusion_matrix(cm_test1, classes=[label_to_emotion[i] for i in range(7)], title='Test1 Confusion Matrix')
    
    cm_test2 = confusion_matrix(y_true_test2, y_pred_test2)
    plot_confusion_matrix(cm_test2, classes=[label_to_emotion[i] for i in range(7)], title='Test2 Confusion Matrix')
    
    return {
        "exp_name": exp_params["name"],
        "val": {"UAR": val_metrics[0], "MF1": val_metrics[1], "WAR": val_metrics[2], "WF1": val_metrics[3]},
        "test1": {"UAR": test1_metrics[0], "MF1": test1_metrics[1], "WAR": test1_metrics[2], "WF1": test1_metrics[3]},
        "test2": {"UAR": test2_metrics[0], "MF1": test2_metrics[1], "WAR": test2_metrics[2], "WF1": test2_metrics[3]}
    }

  
n_trials = 1
best_mf1 = -1
best_params = None
all_results = []

d_models = [256, 512]
num_layers_list = [2, 3, 4]
kernel_sizes = [3, 5, 7]     
dropouts = [0.1, 0.2, 0.3]
optimizers_choices = ["adam"]
losses_choices = ["crossentropy"]
lrs = [1e-3, 1e-4]
normalizations_choices = [False, True]

for trial in range(n_trials):
    d_model = random.choice(d_models)
    num_layers = random.choice(num_layers_list)
    kernel_size = random.choice(kernel_sizes)
    dropout = random.choice(dropouts)
    optimizer_choice = random.choice(optimizers_choices)
    loss_choice = random.choice(losses_choices)
    lr = 10 ** random.uniform(math.log10(1e-4), math.log10(1e-3))
    normalize = random.choice(normalizations_choices)
    
    exp_params = {
        "name": f"Trial {trial+1}: mamba, d_model={d_model}, layers={num_layers}, kernel={kernel_size}, dropout={dropout}, opt={optimizer_choice}, loss={loss_choice}, lr={lr:.1e}, norm={normalize}",
        "d_model": d_model,
        "num_layers": num_layers,
        "kernel_size": kernel_size,
        "dropout": dropout,
        "optimizer": optimizer_choice,
        "loss": loss_choice,
        "lr": lr,
        "normalize": normalize
    }
    
    print(f"\nЗапуск испытания {trial+1}/{n_trials} с параметрами:")
    print(exp_params)
    
    try:
        result = run_experiment(exp_params, num_epochs=15, patience=15)
    except Exception as e:
        print(f"Испытание {trial+1} завершилось ошибкой: {e}")
        continue


# Mamba v2

In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torch.nn.utils.rnn import pad_sequence
import copy
from sklearn.metrics import f1_score, recall_score, confusion_matrix
import matplotlib.pyplot as plt
from collections import defaultdict
import random
import math
import itertools

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)

torch.use_deterministic_algorithms(False)

GLOBAL_SEED = 42
set_seed(GLOBAL_SEED)

def l2_normalize(x):
    # x: (seq_length, input_size) – нормализуем каждый временной вектор по L2
    norm = torch.norm(x, p=2, dim=1, keepdim=True) + 1e-8
    return x / norm

class EmotionDataset(Dataset):
    def __init__(self, embeddings_file, labels_csv, emotion_mapping, csv_type="csv1", transform=None):
        """
        embeddings_file: .npy файл со словарём {file_id: embedding_array}
        labels_csv: CSV с метками
        emotion_mapping: словарь преобразования метки в число
        csv_type: "csv1" или "csv2"
        transform: функция для предобработки эмбеддингов 
        """
        self.embeddings = np.load(embeddings_file, allow_pickle=True).item()
        df = pd.read_csv(labels_csv)
        if csv_type == "csv1":
            self.labels = dict(zip(df['name'], df['emotion']))
        elif csv_type == "csv2":
            df['Dialogue_ID'] = df['Dialogue_ID'].astype(str)
            df['Utterance_ID'] = df['Utterance_ID'].astype(str)
            df['file_id'] = "dia" + df['Dialogue_ID'] + "_utt" + df['Utterance_ID']
            self.labels = dict(zip(df['file_id'], df['Emotion']))
        else:
            raise ValueError("csv_type должен быть 'csv1' или 'csv2'")
        self.ids = list(set(self.embeddings.keys()).intersection(set(self.labels.keys())))
        missing = set(self.embeddings.keys()) - set(self.ids)
        if missing:
            print(f"Предупреждение: отсутствуют метки для {missing}")
        self.emotion_mapping = emotion_mapping
        self.transform = transform

    def __len__(self):
        return len(self.ids)
    
    def __getitem__(self, index):
        file_id = self.ids[index]
        # (1, seq_length, input_size) -> (seq_length, input_size)
        emb = self.embeddings[file_id][0]
        emb_tensor = torch.tensor(emb, dtype=torch.float32)
        if self.transform is not None:
            emb_tensor = self.transform(emb_tensor)
        emotion_str = self.labels[file_id]
        if emotion_str not in self.emotion_mapping:
            raise ValueError(f"Эмоция {emotion_str} не найдена в emotion_mapping для {file_id}")
        label = self.emotion_mapping[emotion_str]
        label_tensor = torch.tensor(label, dtype=torch.long)
        return emb_tensor, label_tensor, file_id

def collate_fn(batch):
    embeddings, labels, file_ids = zip(*batch)
    lengths = torch.tensor([emb.shape[0] for emb in embeddings], dtype=torch.long)
    padded_embeddings = pad_sequence(embeddings, batch_first=True)
    labels = torch.stack(labels)
    return padded_embeddings, lengths, labels, list(file_ids)

class CustomMambaBlock(nn.Module):
    def __init__(self, d_input, d_model, dropout=0.1):
        super().__init__()
        self.in_proj = nn.Linear(d_input, d_model)
        self.s_B = nn.Linear(d_model, d_model)
        self.s_C = nn.Linear(d_model, d_model)
        self.out_proj = nn.Linear(d_model, d_input)
        self.norm = nn.LayerNorm(d_input)
        self.dropout = nn.Dropout(dropout)
        self.activation = nn.ReLU()  # или ReLU

    def forward(self, x):
        x_in = x  # сохраняем вход
        x = self.in_proj(x)
        B = self.s_B(x)
        C = self.s_C(x)
        x = x + B + C
        x = self.activation(x)
        x = self.out_proj(x)
        x = self.dropout(x)
        x = self.norm(x + x_in)  # residual + norm
        return x


class CustomMambaClassifier(nn.Module):
    def __init__(self, input_size=1024, d_model=256, num_layers=2, num_classes=7, dropout=0.1):
        super().__init__()
        self.input_proj = nn.Linear(input_size, d_model)
        self.blocks = nn.ModuleList([
            CustomMambaBlock(d_model, d_model, dropout=dropout)
            for _ in range(num_layers)
        ])
        self.fc = nn.Linear(d_model, num_classes)

    def forward(self, x, lengths):
        x = self.input_proj(x)
        for block in self.blocks:
            x = block(x)

        pooled = []
        for i, l in enumerate(lengths):
            if l > 0:
                pooled.append(x[i, :l, :].mean(dim=0))
            else:
                pooled.append(torch.zeros(x.size(2), device=x.device))
        pooled = torch.stack(pooled, dim=0)
        return self.fc(pooled)


def get_model_mamba(params):
    return CustomMambaClassifier(
        input_size=params.get("input_size", 1024),
        d_model=params.get("d_model", 256),
        num_layers=params.get("num_layers", 2),
        num_classes=params.get("num_classes", 7),
        dropout=params.get("dropout", 0.1)
    )

def train_model(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for batch in dataloader:
        inputs, lengths, labels, _ = batch
        inputs = inputs.to(device)
        lengths = lengths.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs, lengths)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    return running_loss / len(dataloader.dataset)

def evaluate_model(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in dataloader:
            inputs, lengths, labels, _ = batch
            inputs = inputs.to(device)
            lengths = lengths.to(device)
            labels = labels.to(device)
            outputs = model(inputs, lengths)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return running_loss / len(dataloader.dataset), correct / total

def evaluate_metrics(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []
    all_file_ids = []
    with torch.no_grad():
        for batch in dataloader:
            inputs, lengths, labels, file_ids = batch
            inputs = inputs.to(device)
            lengths = lengths.to(device)
            labels = labels.to(device)
            outputs = model(inputs, lengths)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_file_ids.extend(file_ids)
    return np.array(all_labels), np.array(all_preds), all_file_ids

def compute_metrics(y_true, y_pred):
    UAR = recall_score(y_true, y_pred, average='macro')
    MF1 = f1_score(y_true, y_pred, average='macro')
    WAR = recall_score(y_true, y_pred, average='weighted')
    WF1 = f1_score(y_true, y_pred, average='weighted')
    return UAR, MF1, WAR, WF1

def plot_confusion_matrix(cm, classes, title='Confusion Matrix'):
    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    
    thresh = cm.max() / 2.0
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], 'd'),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    
    plt.tight_layout()
    plt.ylabel('Истинная метка')
    plt.xlabel('Предсказанная метка')
    plt.show()

emotion_mapping = {
    'anger': 0,
    'disgust': 1,
    'fear': 2,
    'joy': 3,
    'happiness': 3,
    'neutral': 4,
    'sadness': 5,
    'surprise': 6,
    'enthusiasm': 6
}
label_to_emotion = {
    0: 'anger',
    1: 'disgust',
    2: 'fear',
    3: 'joy/happiness',
    4: 'neutral',
    5: 'sadness',
    6: 'surprise/enthusiasm'
}

train_embeddings_file1 = "train_resd_embeddings.npy"
train_labels_csv1 = "train.csv"
train_embeddings_file2 = "train_meld_embeddings.npy"
train_labels_csv2 = "train_sent_emo.csv"
test_embeddings_file1 = "test_resd_embeddings.npy"
test_labels_csv1 = "test.csv"
test_embeddings_file2 = "test_meld_wav2vec_embeddings.npy"
test_labels_csv2 = "test_sent_emo.csv"
val_embeddings_file = "val_meld_embeddings.npy"
val_labels_csv = "dev_sent_emo.csv"

def run_experiment(exp_params, num_epochs=100, patience=5):
    print(f"\nЗапуск эксперимента: {exp_params['name']}")
    normalize_flag = exp_params.get("normalize", False)
    transform = l2_normalize if normalize_flag else None

    train_dataset1 = EmotionDataset(train_embeddings_file1, train_labels_csv1, emotion_mapping, csv_type="csv1", transform=transform)
    train_dataset2 = EmotionDataset(train_embeddings_file2, train_labels_csv2, emotion_mapping, csv_type="csv2", transform=transform)
    merged_train_dataset = ConcatDataset([train_dataset1, train_dataset2])
    val_dataset = EmotionDataset(val_embeddings_file, val_labels_csv, emotion_mapping, csv_type="csv2", transform=transform)
    test_dataset1 = EmotionDataset(test_embeddings_file1, test_labels_csv1, emotion_mapping, csv_type="csv1", transform=transform)
    test_dataset2 = EmotionDataset(test_embeddings_file2, test_labels_csv2, emotion_mapping, csv_type="csv2", transform=transform)

    batch_size = 16
    train_loader_local = DataLoader(merged_train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    val_loader_local   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    test_loader1_local = DataLoader(test_dataset1, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    test_loader2_local = DataLoader(test_dataset2, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

    model_params = {
        "input_size": 1024,
        "d_model": exp_params.get("d_model", 256),
        "num_layers": exp_params.get("num_layers", 2),
        "num_classes": 7,
        "dropout": exp_params.get("dropout", 0.1),
        "kernel_size": exp_params.get("kernel_size", 3)
    }
    model = get_model_mamba(model_params).to(device)
    
    train_counts = {
        'neutral': 3.3,
        'joy': 2.5,
        'surprise': 2.5,
        'anger': 2.5,
        'sadness': 2.2,
        'disgust': 3.95,
        'fear': 3
    }
    
    label_counts = defaultdict(int)
    for emotion, count in train_counts.items():
        if emotion in emotion_mapping:
            label = emotion_mapping[emotion]
            label_counts[label] += count
    weights = []
    for label in range(7):
        if label in label_counts and label_counts[label] > 0:
            weights.append(1.0 / label_counts[label])
        else:
            weights.append(1.0)
    weights = torch.tensor(weights, dtype=torch.float)
    weights = weights / weights.sum() * 7
    weights = weights.to(device)
    
    loss_type = exp_params.get("loss", "crossentropy")
    if loss_type == "crossentropy":
        criterion = nn.CrossEntropyLoss(weight=weights)
    else:
        raise ValueError("Неизвестный тип loss")
    
    opt_type = exp_params.get("optimizer", "adam")
    lr = exp_params.get("lr", 1e-4)
    if opt_type == "adam":
        optimizer = optim.Adam(model.parameters(), lr=lr)
    else:
        raise ValueError("Неизвестный тип оптимизатора")
    
    best_val_loss = float('inf')
    counter = 0
    best_model_state = None
    
    for epoch in range(num_epochs):
        train_loss = train_model(model, train_loader_local, criterion, optimizer, device)
        
        y_true_train, y_pred_train, _ = evaluate_metrics(model, train_loader_local, device)
        train_metrics = compute_metrics(y_true_train, y_pred_train)
        
        val_loss, val_acc = evaluate_model(model, val_loader_local, criterion, device)
        y_true_val, y_pred_val, _ = evaluate_metrics(model, val_loader_local, device)
        val_metrics = compute_metrics(y_true_val, y_pred_val)
        
        y_true_test1, y_pred_test1, _ = evaluate_metrics(model, test_loader1_local, device)
        test1_metrics = compute_metrics(y_true_test1, y_pred_test1)
        
        y_true_test2, y_pred_test2, _ = evaluate_metrics(model, test_loader2_local, device)
        test2_metrics = compute_metrics(y_true_test2, y_pred_test2)
        
        print(f"\nЭпоха {epoch+1}/{num_epochs}:")
        print(f"  Train Loss: {train_loss:.4f}, WAR: {train_metrics[2]:.4f}, UAR: {train_metrics[0]:.4f}, MF1: {train_metrics[1]:.4f}, WF1: {train_metrics[3]:.4f}")
        print(f"  Val   Loss: {val_loss:.4f}, WAR: {val_metrics[2]:.4f}, UAR: {val_metrics[0]:.4f}, MF1: {val_metrics[1]:.4f}, WF1: {val_metrics[3]:.4f}")
        print(f"  Test1:               WAR: {test1_metrics[2]:.4f}, UAR: {test1_metrics[0]:.4f}, MF1: {test1_metrics[1]:.4f}, WF1: {test1_metrics[3]:.4f}")
        print(f"  Test2:               WAR: {test2_metrics[2]:.4f}, UAR: {test2_metrics[0]:.4f}, MF1: {test2_metrics[1]:.4f}, WF1: {test2_metrics[3]:.4f}")
        
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print(f"Ранняя остановка на эпохе {epoch+1}")
                break
                
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    
    y_true_val, y_pred_val, _ = evaluate_metrics(model, val_loader_local, device)
    val_metrics = compute_metrics(y_true_val, y_pred_val)
    y_true_test1, y_pred_test1, _ = evaluate_metrics(model, test_loader1_local, device)
    test1_metrics = compute_metrics(y_true_test1, y_pred_test1)
    y_true_test2, y_pred_test2, _ = evaluate_metrics(model, test_loader2_local, device)
    test2_metrics = compute_metrics(y_true_test2, y_pred_test2)
    
    # Вычисляем и отображаем confusion matrix для тестовых наборов
    cm_test1 = confusion_matrix(y_true_test1, y_pred_test1)
    plot_confusion_matrix(cm_test1, classes=[label_to_emotion[i] for i in range(7)], title='Test1 Confusion Matrix')
    
    cm_test2 = confusion_matrix(y_true_test2, y_pred_test2)
    plot_confusion_matrix(cm_test2, classes=[label_to_emotion[i] for i in range(7)], title='Test2 Confusion Matrix')
    
    save_path = "best_model.pt"
    
    torch.save({
        'model_state_dict': model.state_dict(),
        'exp_params': exp_params
    }, save_path)
    print(f"Модель и параметры эксперимента сохранены в файл: {save_path}")
    
    return {
        "exp_name": exp_params["name"],
        "val": {"UAR": val_metrics[0], "MF1": val_metrics[1], "WAR": val_metrics[2], "WF1": val_metrics[3]},
        "test1": {"UAR": test1_metrics[0], "MF1": test1_metrics[1], "WAR": test1_metrics[2], "WF1": test1_metrics[3]},
        "test2": {"UAR": test2_metrics[0], "MF1": test2_metrics[1], "WAR": test2_metrics[2], "WF1": test2_metrics[3]}
    }

  
n_trials = 1
best_mf1 = -1
best_params = None
all_results = []

d_models = [256, 512]
num_layers_list = [2, 3, 4]
kernel_sizes = [3, 5, 7]     
dropouts = [0.1, 0.2, 0.3]
optimizers_choices = ["adam"]
losses_choices = ["crossentropy"]
lrs = [1e-3, 1e-4]
normalizations_choices = [False, True]

for trial in range(n_trials):
    d_model = random.choice(d_models)
    num_layers = random.choice(num_layers_list)
    kernel_size = random.choice(kernel_sizes)
    dropout = random.choice(dropouts)
    optimizer_choice = random.choice(optimizers_choices)
    loss_choice = random.choice(losses_choices)
    lr = 10 ** random.uniform(math.log10(1e-4), math.log10(1e-3))
    normalize = random.choice(normalizations_choices)
    
    exp_params = {
        "name": f"Trial {trial+1}: mamba, d_model={d_model}, layers={num_layers}, kernel={kernel_size}, dropout={dropout}, opt={optimizer_choice}, loss={loss_choice}, lr={lr:.1e}, norm={normalize}",
        "d_model": d_model,
        "num_layers": num_layers,
        "kernel_size": kernel_size,
        "dropout": dropout,
        "optimizer": optimizer_choice,
        "loss": loss_choice,
        "lr": lr,
        "normalize": normalize
    }
    
    print(f"\nЗапуск испытания {trial+1}/{n_trials} с параметрами:")
    print(exp_params)
    
    try:
        result = run_experiment(exp_params, num_epochs=11, patience=15)
    except Exception as e:
        print(f"Испытание {trial+1} завершилось ошибкой: {e}")
        continue


# Mamba v3

In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torch.nn.utils.rnn import pad_sequence
import copy
from sklearn.metrics import f1_score, recall_score, confusion_matrix
import matplotlib.pyplot as plt
from collections import defaultdict
import random
import math
import itertools
from torch.utils.data import WeightedRandomSampler

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)

torch.use_deterministic_algorithms(False)

GLOBAL_SEED = 42
set_seed(GLOBAL_SEED)

def l2_normalize(x):
    # x: (seq_length, input_size) – нормализуем каждый временной вектор по L2
    norm = torch.norm(x, p=2, dim=1, keepdim=True) + 1e-8
    return x / norm

class EmotionDataset(Dataset):
    def __init__(self, embeddings_file, labels_csv, emotion_mapping, csv_type="csv1", transform=None):
        """
        embeddings_file: .npy файл со словарём {file_id: embedding_array}
        labels_csv: CSV с метками
        emotion_mapping: словарь преобразования метки в число
        csv_type: "csv1" или "csv2"
        transform: функция для предобработки эмбеддингов 
        """
        self.embeddings = np.load(embeddings_file, allow_pickle=True).item()
        df = pd.read_csv(labels_csv)
        if csv_type == "csv1":
            self.labels = dict(zip(df['name'], df['emotion']))
        elif csv_type == "csv2":
            df['Dialogue_ID'] = df['Dialogue_ID'].astype(str)
            df['Utterance_ID'] = df['Utterance_ID'].astype(str)
            df['file_id'] = "dia" + df['Dialogue_ID'] + "_utt" + df['Utterance_ID']
            self.labels = dict(zip(df['file_id'], df['Emotion']))
        else:
            raise ValueError("csv_type должен быть 'csv1' или 'csv2'")
        self.ids = list(set(self.embeddings.keys()).intersection(set(self.labels.keys())))
        missing = set(self.embeddings.keys()) - set(self.ids)
        if missing:
            print(f"Предупреждение: отсутствуют метки для {missing}")
        self.emotion_mapping = emotion_mapping
        self.transform = transform

    def __len__(self):
        return len(self.ids)
    
    def __getitem__(self, index):
        file_id = self.ids[index]
        # (1, seq_length, input_size) -> (seq_length, input_size)
        emb = self.embeddings[file_id][0]
        emb_tensor = torch.tensor(emb, dtype=torch.float32)
        if self.transform is not None:
            emb_tensor = self.transform(emb_tensor)
        emotion_str = self.labels[file_id]
        if emotion_str not in self.emotion_mapping:
            raise ValueError(f"Эмоция {emotion_str} не найдена в emotion_mapping для {file_id}")
        label = self.emotion_mapping[emotion_str]
        label_tensor = torch.tensor(label, dtype=torch.long)
        return emb_tensor, label_tensor, file_id

def collate_fn(batch):
    embeddings, labels, file_ids = zip(*batch)
    lengths = torch.tensor([emb.shape[0] for emb in embeddings], dtype=torch.long)
    padded_embeddings = pad_sequence(embeddings, batch_first=True)
    labels = torch.stack(labels)
    return padded_embeddings, lengths, labels, list(file_ids)

class CustomMambaBlock(nn.Module):
    def __init__(self, d_input, d_model, dropout=0.1):
        super().__init__()
        self.in_proj = nn.Linear(d_input, d_model)
        self.s_B = nn.Linear(d_model, d_model)
        self.s_C = nn.Linear(d_model, d_model)
        self.out_proj = nn.Linear(d_model, d_input)
        self.norm = nn.LayerNorm(d_input)
        self.dropout = nn.Dropout(dropout)
        self.activation = nn.ReLU()  # или ReLU

    def forward(self, x):
        x_in = x  # сохраняем вход
        x = self.in_proj(x)
        B = self.s_B(x)
        C = self.s_C(x)
        x = x + B + C
        x = self.activation(x)
        x = self.out_proj(x)
        x = self.dropout(x)
        x = self.norm(x + x_in)  # residual + norm
        return x




class CustomMambaClassifier(nn.Module):
    def __init__(self, input_size=1024, d_model=256, num_layers=2, num_classes=7, dropout=0.1):
        super().__init__()
        self.input_proj = nn.Linear(input_size, d_model)
        self.blocks = nn.ModuleList([
            CustomMambaBlock(d_model, d_model, dropout=dropout)
            for _ in range(num_layers)
        ])
        self.fc = nn.Linear(d_model, num_classes)

    def forward(self, x, lengths):
        x = self.input_proj(x)
        for block in self.blocks:
            x = block(x)

        pooled = []
        for i, l in enumerate(lengths):
            if l > 0:
                pooled.append(x[i, :l, :].mean(dim=0))
            else:
                pooled.append(torch.zeros(x.size(2), device=x.device))
        pooled = torch.stack(pooled, dim=0)
        return self.fc(pooled)


def get_model_mamba(params):
    return CustomMambaClassifier(
        input_size=params.get("input_size", 1024),
        d_model=params.get("d_model", 256),
        num_layers=params.get("num_layers", 2),
        num_classes=params.get("num_classes", 7),
        dropout=params.get("dropout", 0.1)
    )

def train_model(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for batch in dataloader:
        inputs, lengths, labels, _ = batch
        inputs = inputs.to(device)
        lengths = lengths.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs, lengths)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    return running_loss / len(dataloader.dataset)

def evaluate_model(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in dataloader:
            inputs, lengths, labels, _ = batch
            inputs = inputs.to(device)
            lengths = lengths.to(device)
            labels = labels.to(device)
            outputs = model(inputs, lengths)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return running_loss / len(dataloader.dataset), correct / total

def evaluate_metrics(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []
    all_file_ids = []
    with torch.no_grad():
        for batch in dataloader:
            inputs, lengths, labels, file_ids = batch
            inputs = inputs.to(device)
            lengths = lengths.to(device)
            labels = labels.to(device)
            outputs = model(inputs, lengths)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_file_ids.extend(file_ids)
    return np.array(all_labels), np.array(all_preds), all_file_ids

def compute_metrics(y_true, y_pred):
    UAR = recall_score(y_true, y_pred, average='macro')
    MF1 = f1_score(y_true, y_pred, average='macro')
    WAR = recall_score(y_true, y_pred, average='weighted')
    WF1 = f1_score(y_true, y_pred, average='weighted')
    return UAR, MF1, WAR, WF1

def plot_confusion_matrix(cm, classes, title='Confusion Matrix'):
    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    
    thresh = cm.max() / 2.0
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], 'd'),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    
    plt.tight_layout()
    plt.ylabel('Истинная метка')
    plt.xlabel('Предсказанная метка')
    plt.show()

emotion_mapping = {
    'anger': 0,
    'disgust': 1,
    'fear': 2,
    'joy': 3,
    'happiness': 3,
    'neutral': 4,
    'sadness': 5,
    'surprise': 6,
    'enthusiasm': 6
}
label_to_emotion = {
    0: 'anger',
    1: 'disgust',
    2: 'fear',
    3: 'joy/happiness',
    4: 'neutral',
    5: 'sadness',
    6: 'surprise/enthusiasm'
}

train_embeddings_file1 = "train_resd_embeddings.npy"
train_labels_csv1 = "train.csv"
train_embeddings_file2 = "train_meld_embeddings.npy"
train_labels_csv2 = "train_sent_emo.csv"
test_embeddings_file1 = "test_resd_embeddings.npy"
test_labels_csv1 = "test.csv"
test_embeddings_file2 = "test_meld_wav2vec_embeddings.npy"
test_labels_csv2 = "test_sent_emo.csv"
val_embeddings_file = "val_meld_embeddings.npy"
val_labels_csv = "dev_sent_emo.csv"

def run_experiment(exp_params, num_epochs=100, patience=5):
    print(f"\nЗапуск эксперимента: {exp_params['name']}")
    normalize_flag = exp_params.get("normalize", False)
    transform = l2_normalize if normalize_flag else None

    train_dataset1 = EmotionDataset(train_embeddings_file1, train_labels_csv1, emotion_mapping, csv_type="csv1", transform=transform)
    train_dataset2 = EmotionDataset(train_embeddings_file2, train_labels_csv2, emotion_mapping, csv_type="csv2", transform=transform)
 
    len1 = len(train_dataset1)
    len2 = len(train_dataset2)
    total = len1 + len2

    weights1 = [1.0 / len1] * len1
    weights2 = [1.0 / len2] * len2
    weights = weights1 + weights2
    merged_train_dataset = ConcatDataset([train_dataset1, train_dataset2])

    val_dataset = EmotionDataset(val_embeddings_file, val_labels_csv, emotion_mapping, csv_type="csv2", transform=transform)
    test_dataset1 = EmotionDataset(test_embeddings_file1, test_labels_csv1, emotion_mapping, csv_type="csv1", transform=transform)
    test_dataset2 = EmotionDataset(test_embeddings_file2, test_labels_csv2, emotion_mapping, csv_type="csv2", transform=transform)

    batch_size = 16
    sampler = WeightedRandomSampler(weights, num_samples=total, replacement=True)
    train_loader_local = DataLoader(merged_train_dataset, batch_size=batch_size, sampler=sampler, collate_fn=collate_fn)
    val_loader_local   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    test_loader1_local = DataLoader(test_dataset1, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    test_loader2_local = DataLoader(test_dataset2, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

    model_params = {
        "input_size": 1024,
        "d_model": exp_params.get("d_model", 256),
        "num_layers": exp_params.get("num_layers", 2),
        "num_classes": 7,
        "dropout": exp_params.get("dropout", 0.1),
        "kernel_size": exp_params.get("kernel_size", 3)
    }
    model = get_model_mamba(model_params).to(device)
    
    train_counts = {
        'neutral': 3.3,
        'joy': 2.5,
        'surprise': 2.5,
        'anger': 2.5,
        'sadness': 2.2,
        'disgust': 4,
        'fear': 4.5
    }
    
    label_counts = defaultdict(int)
    for emotion, count in train_counts.items():
        if emotion in emotion_mapping:
            label = emotion_mapping[emotion]
            label_counts[label] += count
    weights = []
    for label in range(7):
        if label in label_counts and label_counts[label] > 0:
            weights.append(1.0 / label_counts[label])
        else:
            weights.append(1.0)
    weights = torch.tensor(weights, dtype=torch.float)
    weights = weights / weights.sum() * 7
    weights = weights.to(device)
    
    loss_type = exp_params.get("loss", "crossentropy")
    if loss_type == "crossentropy":
        criterion = nn.CrossEntropyLoss(weight=weights)
    else:
        raise ValueError("Неизвестный тип loss")
    
    opt_type = exp_params.get("optimizer", "adam")
    lr = exp_params.get("lr", 1e-4)
    if opt_type == "adam":
        optimizer = optim.Adam(model.parameters(), lr=lr)
    else:
        raise ValueError("Неизвестный тип оптимизатора")
    
    best_val_loss = float('inf')
    counter = 0
    best_model_state = None
    
    for epoch in range(num_epochs):
        train_loss = train_model(model, train_loader_local, criterion, optimizer, device)
        
        y_true_train, y_pred_train, _ = evaluate_metrics(model, train_loader_local, device)
        train_metrics = compute_metrics(y_true_train, y_pred_train)
        
        val_loss, val_acc = evaluate_model(model, val_loader_local, criterion, device)
        y_true_val, y_pred_val, _ = evaluate_metrics(model, val_loader_local, device)
        val_metrics = compute_metrics(y_true_val, y_pred_val)
        
        y_true_test1, y_pred_test1, _ = evaluate_metrics(model, test_loader1_local, device)
        test1_metrics = compute_metrics(y_true_test1, y_pred_test1)
        
        y_true_test2, y_pred_test2, _ = evaluate_metrics(model, test_loader2_local, device)
        test2_metrics = compute_metrics(y_true_test2, y_pred_test2)
        
        print(f"\nЭпоха {epoch+1}/{num_epochs}:")
        print(f"  Train Loss: {train_loss:.4f}, WAR: {train_metrics[2]:.4f}, UAR: {train_metrics[0]:.4f}, MF1: {train_metrics[1]:.4f}, WF1: {train_metrics[3]:.4f}")
        print(f"  Val   Loss: {val_loss:.4f}, WAR: {val_metrics[2]:.4f}, UAR: {val_metrics[0]:.4f}, MF1: {val_metrics[1]:.4f}, WF1: {val_metrics[3]:.4f}")
        print(f"  Test1:               WAR: {test1_metrics[2]:.4f}, UAR: {test1_metrics[0]:.4f}, MF1: {test1_metrics[1]:.4f}, WF1: {test1_metrics[3]:.4f}")
        print(f"  Test2:               WAR: {test2_metrics[2]:.4f}, UAR: {test2_metrics[0]:.4f}, MF1: {test2_metrics[1]:.4f}, WF1: {test2_metrics[3]:.4f}")
        
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print(f"Ранняя остановка на эпохе {epoch+1}")
                break
       
    
    best_model_state = copy.deepcopy(model.state_dict())  
    torch.save(best_model_state, "new_best_model.pt")
    
    y_true_val, y_pred_val, _ = evaluate_metrics(model, val_loader_local, device)
    val_metrics = compute_metrics(y_true_val, y_pred_val)
    y_true_test1, y_pred_test1, _ = evaluate_metrics(model, test_loader1_local, device)
    test1_metrics = compute_metrics(y_true_test1, y_pred_test1)
    y_true_test2, y_pred_test2, _ = evaluate_metrics(model, test_loader2_local, device)
    test2_metrics = compute_metrics(y_true_test2, y_pred_test2)
    
    cm_test1 = confusion_matrix(y_true_test1, y_pred_test1)
    plot_confusion_matrix(cm_test1, classes=[label_to_emotion[i] for i in range(7)], title='Test1 Confusion Matrix')
    
    cm_test2 = confusion_matrix(y_true_test2, y_pred_test2)
    plot_confusion_matrix(cm_test2, classes=[label_to_emotion[i] for i in range(7)], title='Test2 Confusion Matrix')
    
    return {
        "exp_name": exp_params["name"],
        "val": {"UAR": val_metrics[0], "MF1": val_metrics[1], "WAR": val_metrics[2], "WF1": val_metrics[3]},
        "test1": {"UAR": test1_metrics[0], "MF1": test1_metrics[1], "WAR": test1_metrics[2], "WF1": test1_metrics[3]},
        "test2": {"UAR": test2_metrics[0], "MF1": test2_metrics[1], "WAR": test2_metrics[2], "WF1": test2_metrics[3]}
    }

  
n_trials = 1
best_mf1 = -1
best_params = None
all_results = []

d_models = [256, 512]
num_layers_list = [2, 3, 4]
kernel_sizes = [3, 5, 7]     
dropouts = [0.1, 0.2, 0.3]
optimizers_choices = ["adam"]
losses_choices = ["crossentropy"]
lrs = [1e-3, 1e-4]
normalizations_choices = [False, True]

for trial in range(n_trials):
    d_model = random.choice(d_models)
    num_layers = random.choice(num_layers_list)
    kernel_size = random.choice(kernel_sizes)
    dropout = random.choice(dropouts)
    optimizer_choice = random.choice(optimizers_choices)
    loss_choice = random.choice(losses_choices)
    lr = 10 ** random.uniform(math.log10(1e-4), math.log10(1e-3))
    normalize = random.choice(normalizations_choices)
    
    exp_params = {
        "name": f"Trial {trial+1}: mamba, d_model={d_model}, layers={num_layers}, kernel={kernel_size}, dropout={dropout}, opt={optimizer_choice}, loss={loss_choice}, lr={lr:.1e}, norm={normalize}",
        "d_model": d_model,
        "num_layers": num_layers,
        "kernel_size": kernel_size,
        "dropout": dropout,
        "optimizer": optimizer_choice,
        "loss": loss_choice,
        "lr": lr,
        "normalize": normalize
    }
    
    print(f"\nЗапуск испытания {trial+1}/{n_trials} с параметрами:")
    print(exp_params)
    
    try:
        result = run_experiment(exp_params, num_epochs=13, patience=70)
    except Exception as e:
        print(f"Испытание {trial+1} завершилось ошибкой: {e}")
        continue


v1 - первая реализация
v2 - вторая реализация с результатами повыше 
v3 - третья реализация с балансом языков (в табличке указаны метрики именно этой модели)