In [None]:
# =========================
# Dynamic Runner: Hybrid (Embeddings + Multi-Graph-GNN + Text + Ensemble)
# Runs multiple datasets in one execution
# Leakage-safe vs TEST: graphs built using TRAIN+VAL only, and TEST connects only to TRAIN+VAL
# Multi-graph (4 views) edges are MERGED into one weighted graph for the existing GCN
# Views:
#  1) Semantic similarity (SentenceTransformer cosine KNN)
#  2) Keyword graph (TF-IDF cosine KNN)
#  3) Negation graph (same negation signature)
#  4) Intensity graph (same intensity signature)
# =========================

!pip -q install torch-geometric faiss-cpu sentence-transformers transformers accelerate scikit-learn

import os, random, gc, re, math
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F

import faiss
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

from torch_geometric.data import Data
from torch_geometric.nn import GCNConv

# -------------------------
# 0) Reproducibility
# -------------------------
SEED = 42
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(SEED)

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

# -------------------------
# 1) GLOBAL CONFIG
# -------------------------
TEXT_COL  = "Sentence"
SPLIT_COL = "Split"     # 0=train, 1=val, 2=test

# Multi-graph hyperparams (tune these)
K_SEM   = 10
THR_SEM = 0.60

K_TFIDF   = 25
THR_TFIDF = 0.05

MAX_SIG_NEI = 20   # for negation/intensity group connections (prevents clique explosion)

# View weights (how much each view contributes when merging edges)
W_SEM   = 1.00
W_TFIDF = 0.80
W_NEG   = 0.50
W_INT   = 0.50

# Models
SENT_MODEL      = "pritamdeka/BioBERT-mnli-snli-scinli-scitail-mednli-stsb"
BASE_TEXT_MODEL = "dmis-lab/biobert-base-cased-v1.1"

# Training hyperparams
GNN_HID      = 256
GNN_DROPOUT  = 0.5
GNN_LR       = 5e-4
GNN_WD       = 5e-4
GNN_EPOCHS   = 300
GNN_PATIENCE = 25

TEXT_EPOCHS  = 4
TEXT_BS      = 16
TEXT_LR      = 3e-5
TEXT_WD      = 0.01
MAX_LEN      = 128

# -------------------------
# 2) DATASET LIST (EDIT THIS)
# -------------------------
DATASETS = [
    {"name": "Sadness", "path": "/content/sample_data/Sadness_anon.csv", "label": "Sadness"},
    {"name": "Joy",     "path": "/content/sample_data/Joy_anon.csv",     "label": "Joy"},
    {"name": "Fear",    "path": "/content/sample_data/Fear_anon.csv",    "label": "Fear"},
    {"name": "Anger",   "path": "/content/sample_data/Anger_anon.csv",   "label": "Anger"},
    {"name": "Surprise","path": "/content/sample_data/Surprise_anon.csv","label": "Surprise"},
    {"name": "Disgust", "path": "/content/sample_data/Disgust_anon.csv", "label": "Disgust"},
    {"name": "Trust",   "path": "/content/sample_data/Trust_anon.csv",   "label": "Trust"},
    {"name": "Anticip", "path": "/content/sample_data/Anticipation_anon.csv", "label": "Anticipation"},
]

# -------------------------
# 3) Helpers
# -------------------------
def cleanup_cuda():
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

def acc_from_probs(p, ytrue):
    pred = p.argmax(axis=1)
    return float((pred == ytrue).mean())

def metrics_from_probs(p, ytrue):
    pred = p.argmax(axis=1)
    return {
        "acc": float(accuracy_score(ytrue, pred)),
        "f1": float(f1_score(ytrue, pred, zero_division=0)),
        "precision": float(precision_score(ytrue, pred, zero_division=0)),
        "recall": float(recall_score(ytrue, pred, zero_division=0)),
    }

def make_class_weights(y, train_mask):
    y_train = np.array(y)[train_mask]
    pos = int((y_train == 1).sum())
    neg = int((y_train == 0).sum())
    if pos == 0 or neg == 0:
        return torch.tensor([1.0, 1.0], dtype=torch.float, device=device)
    w0 = (pos + neg) / (2.0 * neg)
    w1 = (pos + neg) / (2.0 * pos)
    return torch.tensor([w0, w1], dtype=torch.float, device=device)

class WeightedGCN(nn.Module):
    def __init__(self, in_dim, hid=256, num_classes=2, dropout=0.5):
        super().__init__()
        self.dropout = dropout
        self.c1 = GCNConv(in_dim, hid)
        self.c2 = GCNConv(hid, num_classes)

    def forward(self, data):
        x = F.dropout(data.x, p=self.dropout, training=self.training)
        x = self.c1(x, data.edge_index, data.edge_weight)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.c2(x, data.edge_index, data.edge_weight)
        return x

class SimpleDataset(torch.utils.data.Dataset):
    def __init__(self, enc, y):
        self.enc = enc
        self.y = y
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.enc.items()}
        item["labels"] = torch.tensor(int(self.y[idx]))
        return item

def comp_metrics_hf(eval_pred):
    logits, labels_ = eval_pred
    preds = np.argmax(logits, axis=1)
    return {
        "accuracy": accuracy_score(labels_, preds),
        "f1": f1_score(labels_, preds, zero_division=0),
        "precision": precision_score(labels_, preds, zero_division=0),
        "recall": recall_score(labels_, preds, zero_division=0),
    }

# -------------------------
# 3.1) Multi-graph edge builders (leakage-safe)
# -------------------------
NEG_WORDS = {
    "no","not","never","none","nothing","nowhere","neither","nor",
    "can't","cannot","dont","don't","doesnt","doesn't","didnt","didn't",
    "won't","wouldn't","isn't","aren't","wasn't","weren't","haven't","hasn't","hadn't",
    "without","lack","lacking"
}
INTENSITY_WORDS = {
    "very","extremely","really","so","too","highly","deeply","totally","completely",
    "terribly","horribly","absolutely","severely","incredibly"
}
SLEEP_PHRASES = {"cannot sleep","can't sleep","no sleep","sleepless","insomnia"}

def negation_signature(sent: str):
    s = sent.lower()
    tokens = re.findall(r"[a-z']+", s)
    neg_pos = [i for i,t in enumerate(tokens) if t in NEG_WORDS]
    cnt = len(neg_pos)
    bucket = "none" if cnt==0 else ("one" if cnt==1 else "multi")
    early = 0
    if cnt > 0:
        early = 1 if neg_pos[0] <= 4 else 2
    return f"{bucket}|{early}"

def intensity_signature(sent: str):
    s = sent.lower()
    tokens = re.findall(r"[a-z']+", s)
    has_int = any(t in INTENSITY_WORDS for t in tokens)
    has_sleep = any(p in s for p in SLEEP_PHRASES)
    exclam = 1 if "!" in sent else 0
    caps = 1 if sum(1 for c in sent if c.isupper()) >= 5 else 0
    return f"int{int(has_int)}|slp{int(has_sleep)}|ex{exclam}|cp{caps}"

def _add_edge(edge_map, u, v, w):
    # accumulate weights if edge repeats from multiple views
    key = (int(u), int(v))
    edge_map[key] = edge_map.get(key, 0.0) + float(w)

def build_semantic_edges(X_np, idx_pool, idx_test, k=10, thr=0.6, w_view=1.0):
    """
    X_np: L2-normalized dense embeddings (N,D)
    idx_pool: indices used to build index (TRAIN+VAL)
    idx_test: indices of TEST to connect to pool (TEST->POOL)
    returns: dict edge_map (directed)
    """
    edge_map = {}
    d = X_np.shape[1]
    index = faiss.IndexFlatIP(d)
    index.add(X_np[idx_pool])

    # pool->pool edges
    sims, nbrs = index.search(X_np[idx_pool], k+1)
    for local_i, i in enumerate(idx_pool):
        for j in range(1, k+1):
            nb_local = int(nbrs[local_i, j])
            nb = int(idx_pool[nb_local])
            s = float(sims[local_i, j])
            if s >= thr:
                _add_edge(edge_map, i, nb, w_view * s)

    # test->pool edges
    if len(idx_test) > 0:
        sims_t, nbrs_t = index.search(X_np[idx_test], k)
        for local_i, i in enumerate(idx_test):
            for j in range(k):
                nb_local = int(nbrs_t[local_i, j])
                nb = int(idx_pool[nb_local])
                s = float(sims_t[local_i, j])
                if s >= thr:
                    _add_edge(edge_map, i, nb, w_view * s)

    return edge_map

def build_tfidf_edges(texts, idx_pool, idx_test, k=25, thr=0.05, w_view=0.8,
                      max_features=30000, ngram_range=(1,2), min_df=2):
    """
    TF-IDF cosine neighbors (leakage-safe to TEST):
      - fit vectorizer on pool only
      - pool->pool kNN and test->pool kNN
    """
    edge_map = {}
    vec = TfidfVectorizer(
        max_features=max_features,
        ngram_range=ngram_range,
        min_df=min_df,
        lowercase=True
    )
    X_pool = vec.fit_transform([texts[i] for i in idx_pool])

    # cosine distance in NearestNeighbors: smaller is closer
    nn_pool = NearestNeighbors(n_neighbors=min(k+1, X_pool.shape[0]), metric="cosine", algorithm="brute")
    nn_pool.fit(X_pool)

    # pool->pool
    dist, nbrs = nn_pool.kneighbors(X_pool, return_distance=True)
    for local_i, i in enumerate(idx_pool):
        for j in range(1, nbrs.shape[1]):
            nb_local = int(nbrs[local_i, j])
            nb = int(idx_pool[nb_local])
            sim = 1.0 - float(dist[local_i, j])
            if sim >= thr:
                _add_edge(edge_map, i, nb, w_view * sim)

    # test->pool
    if len(idx_test) > 0:
        X_test = vec.transform([texts[i] for i in idx_test])
        dist_t, nbrs_t = nn_pool.kneighbors(X_test, n_neighbors=min(k, X_pool.shape[0]), return_distance=True)
        for local_i, i in enumerate(idx_test):
            for j in range(nbrs_t.shape[1]):
                nb_local = int(nbrs_t[local_i, j])
                nb = int(idx_pool[nb_local])
                sim = 1.0 - float(dist_t[local_i, j])
                if sim >= thr:
                    _add_edge(edge_map, i, nb, w_view * sim)

    return edge_map

def build_signature_edges(texts, idx_pool, idx_test, signature_fn, max_nei=20, w_view=0.5):
    """
    Connect nodes that share the same signature.
    To avoid huge cliques:
      - inside each group, connect each node to at most max_nei others (deterministic)
    Also connects test->pool for same signature.
    """
    edge_map = {}
    sig_pool = {}
    for i in idx_pool:
        sg = signature_fn(texts[i])
        sig_pool.setdefault(sg, []).append(i)

    # pool->pool (limited)
    for sg, nodes in sig_pool.items():
        if len(nodes) < 2:
            continue
        nodes_sorted = sorted(nodes)
        for idx, u in enumerate(nodes_sorted):
            # connect to next max_nei nodes (wrap-free, deterministic)
            for v in nodes_sorted[idx+1: idx+1+max_nei]:
                _add_edge(edge_map, u, v, w_view * 1.0)

    # test->pool
    if len(idx_test) > 0:
        for i in idx_test:
            sg = signature_fn(texts[i])
            pool_nodes = sig_pool.get(sg, [])
            if not pool_nodes:
                continue
            # connect to up to max_nei pool nodes
            for nb in pool_nodes[:max_nei]:
                _add_edge(edge_map, i, nb, w_view * 1.0)

    return edge_map

def merge_edge_maps(*maps):
    merged = {}
    for mp in maps:
        for (u,v), w in mp.items():
            merged[(u,v)] = merged.get((u,v), 0.0) + float(w)
    return merged

def edge_map_to_undirected_tensors(edge_map, n_nodes):
    # add reverse edges and build tensors
    src, dst, wts = [], [], []
    for (u,v), w in edge_map.items():
        if u < 0 or v < 0 or u >= n_nodes or v >= n_nodes:
            continue
        src.append(u); dst.append(v); wts.append(w)
        src.append(v); dst.append(u); wts.append(w)

    if len(src) == 0:
        # avoid empty graph
        src = [0]; dst = [0]; wts = [1.0]

    edge_index = torch.tensor([src, dst], dtype=torch.long)
    edge_weight = torch.tensor(wts, dtype=torch.float)

    # (optional) stabilize weights: squash into [0,1] using tanh
    edge_weight = torch.tanh(edge_weight)

    return edge_index, edge_weight

# -------------------------
# 4) Load shared models ONCE
# -------------------------
sent_model = SentenceTransformer(SENT_MODEL, device=device)
tok = AutoTokenizer.from_pretrained(BASE_TEXT_MODEL)

def tokenize_list(text_list, max_len=128):
    return tok(text_list, truncation=True, padding=True, max_length=max_len)

# -------------------------
# 5) Core runner for ONE dataset
# -------------------------
def run_one_dataset(name, csv_path, label_col):
    print("\n" + "="*70)
    print(f"DATASET: {name}")
    print("="*70)

    df = pd.read_csv(csv_path)
    df = df[[TEXT_COL, label_col, SPLIT_COL]].dropna().reset_index(drop=True)

    texts  = df[TEXT_COL].astype(str).tolist()
    labels = df[label_col].astype(int).tolist()
    splits = df[SPLIT_COL].astype(int).tolist()

    splits_np = np.array(splits)
    train_mask = splits_np == 0
    val_mask   = splits_np == 1
    test_mask  = splits_np == 2

    print("Total:", len(df))
    print("Train/Val/Test:", int(train_mask.sum()), int(val_mask.sum()), int(test_mask.sum()))
    print("Train label counts:", df[df[SPLIT_COL]==0][label_col].value_counts().to_dict())

    # -------------------------
    # A) Sentence embeddings (semantic features for GNN)
    # -------------------------
    X = sent_model.encode(
        texts,
        batch_size=64,
        convert_to_tensor=True,
        normalize_embeddings=True
    )
    X_np = X.detach().cpu().numpy().astype("float32")  # already L2-normalized

    # -------------------------
    # B) Multi-Graph (4 views) edges (leakage-safe to TEST)
    #    pool = TRAIN+VAL, test connects only to pool
    # -------------------------
    idx_pool = np.where(splits_np != 2)[0]  # train+val
    idx_test = np.where(splits_np == 2)[0]

    # View-1 semantic KNN
    em_sem = build_semantic_edges(
        X_np, idx_pool, idx_test,
        k=K_SEM, thr=THR_SEM, w_view=W_SEM
    )

    # View-2 TF-IDF keyword graph
    em_tfidf = build_tfidf_edges(
        texts, idx_pool, idx_test,
        k=K_TFIDF, thr=THR_TFIDF, w_view=W_TFIDF
    )

    # View-3 negation graph
    em_neg = build_signature_edges(
        texts, idx_pool, idx_test,
        signature_fn=negation_signature,
        max_nei=MAX_SIG_NEI, w_view=W_NEG
    )

    # View-4 intensity graph
    em_int = build_signature_edges(
        texts, idx_pool, idx_test,
        signature_fn=intensity_signature,
        max_nei=MAX_SIG_NEI, w_view=W_INT
    )

    # Merge all views
    edge_map = merge_edge_maps(em_sem, em_tfidf, em_neg, em_int)

    edge_index, edge_weight = edge_map_to_undirected_tensors(edge_map, n_nodes=len(df))

    print("Undirected edges:", int(edge_index.shape[1]))

    # -------------------------
    # C) PyG data + weighted loss
    # -------------------------
    y = torch.tensor(labels, dtype=torch.long)

    data = Data(
        x=X.detach().cpu(),
        edge_index=edge_index,
        edge_weight=edge_weight,
        y=y,
        train_mask=torch.tensor(train_mask),
        val_mask=torch.tensor(val_mask),
        test_mask=torch.tensor(test_mask),
    ).to(device)

    class_weights = make_class_weights(labels, train_mask)
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    print("Class weights:", class_weights.detach().cpu().tolist())

    # -------------------------
    # D) Train GNN
    # -------------------------
    gnn = WeightedGCN(in_dim=data.x.size(1), hid=GNN_HID, num_classes=2, dropout=GNN_DROPOUT).to(device)
    opt = torch.optim.AdamW(gnn.parameters(), lr=GNN_LR, weight_decay=GNN_WD)

    @torch.no_grad()
    def gnn_probs(mask):
        gnn.eval()
        logits = gnn(data)
        probs = torch.softmax(logits, dim=1)
        return probs[mask].detach().cpu().numpy()

    def gnn_train(max_epochs=300, patience=25):
        best_val = -1
        best_state = None
        bad = 0

        for epoch in range(1, max_epochs+1):
            gnn.train()
            opt.zero_grad()
            logits = gnn(data)
            loss = criterion(logits[data.train_mask], data.y[data.train_mask])
            loss.backward()
            torch.nn.utils.clip_grad_norm_(gnn.parameters(), 1.0)
            opt.step()

            if epoch % 10 == 0 or epoch == 1:
                gnn.eval()
                pred = logits.argmax(dim=1)
                tr = (pred[data.train_mask] == data.y[data.train_mask]).float().mean().item()
                va = (pred[data.val_mask]   == data.y[data.val_mask]).float().mean().item()
                te = (pred[data.test_mask]  == data.y[data.test_mask]).float().mean().item()
                print(f"GNN Epoch {epoch:03d} | loss {loss.item():.4f} | train {tr:.3f} | val {va:.3f} | test {te:.3f}")

                if va > best_val:
                    best_val = va
                    best_state = {k: v.detach().cpu().clone() for k, v in gnn.state_dict().items()}
                    bad = 0
                else:
                    bad += 1
                    if bad >= patience:
                        print(f"GNN Early stop at epoch {epoch} (best val={best_val:.3f})")
                        break

        if best_state is not None:
            gnn.load_state_dict(best_state)
        return float(best_val)

    best_val_gnn = gnn_train(max_epochs=GNN_EPOCHS, patience=GNN_PATIENCE)

    # -------------------------
    # E) Train Text model (BioBERT)
    # -------------------------
    X_train = df[train_mask][TEXT_COL].tolist()
    y_train = df[train_mask][label_col].astype(int).tolist()
    X_val   = df[val_mask][TEXT_COL].tolist()
    y_val   = df[val_mask][label_col].astype(int).tolist()
    X_test  = df[test_mask][TEXT_COL].tolist()
    y_test  = df[test_mask][label_col].astype(int).tolist()

    train_ds = SimpleDataset(tokenize_list(X_train, MAX_LEN), y_train)
    val_ds   = SimpleDataset(tokenize_list(X_val,   MAX_LEN), y_val)
    test_ds  = SimpleDataset(tokenize_list(X_test,  MAX_LEN), y_test)

    text_model = AutoModelForSequenceClassification.from_pretrained(BASE_TEXT_MODEL, num_labels=2)

    out_dir = f"./tmp_text_{name}"
    args = TrainingArguments(
        output_dir=out_dir,
        num_train_epochs=TEXT_EPOCHS,
        per_device_train_batch_size=TEXT_BS,
        per_device_eval_batch_size=TEXT_BS,
        learning_rate=TEXT_LR,
        weight_decay=TEXT_WD,
        eval_strategy="epoch",
        save_strategy="no",
        logging_steps=50,
        report_to="none",
        fp16=torch.cuda.is_available(),
        seed=SEED,
    )

    trainer = Trainer(
        model=text_model,
        args=args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        compute_metrics=comp_metrics_hf,
    )
    trainer.train()

    @torch.no_grad()
    def text_probs(dataset):
        preds = trainer.predict(dataset)
        logits = preds.predictions
        probs = torch.softmax(torch.tensor(logits), dim=1).cpu().numpy()
        return probs

    p_text_val  = text_probs(val_ds)
    p_text_test = text_probs(test_ds)

    # -------------------------
    # F) Ensemble (tune alpha on val; report on test)
    # -------------------------
    p_gnn_val  = gnn_probs(data.val_mask)
    p_gnn_test = gnn_probs(data.test_mask)

    y_val_np  = np.array(y_val)
    y_test_np = np.array(y_test)

    best_alpha = None
    best_val_acc = -1
    best_test_metrics = None
    best_val_metrics  = None

    alphas = [i/10 for i in range(11)]
    for alpha in alphas:
        p_ens_val  = alpha * p_text_val  + (1 - alpha) * p_gnn_val
        val_acc = acc_from_probs(p_ens_val, y_val_np)

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_alpha = alpha

            p_ens_test = alpha * p_text_test + (1 - alpha) * p_gnn_test
            best_test_metrics = metrics_from_probs(p_ens_test, y_test_np)
            best_val_metrics  = metrics_from_probs(p_ens_val,  y_val_np)

    text_val_metrics  = metrics_from_probs(p_text_val,  y_val_np)
    text_test_metrics = metrics_from_probs(p_text_test, y_test_np)

    gnn_val_metrics   = metrics_from_probs(p_gnn_val,  y_val_np)
    gnn_test_metrics  = metrics_from_probs(p_gnn_test, y_test_np)

    print("\n---------------- RESULTS ----------------")
    print(f"GNN best val acc: {best_val_gnn:.3f}")
    print("Standalone GNN  (val):", gnn_val_metrics)
    print("Standalone GNN (test):", gnn_test_metrics)
    print("Standalone Text (val):", text_val_metrics)
    print("Standalone Text(test):", text_test_metrics)
    print(f"Ensemble best alpha: {best_alpha}")
    print("Ensemble (val):", best_val_metrics)
    print("Ensemble(test):", best_test_metrics)

    result = {
        "dataset": name,
        "csv": csv_path,
        "label_col": label_col,
        "n_total": int(len(df)),
        "n_train": int(train_mask.sum()),
        "n_val": int(val_mask.sum()),
        "n_test": int(test_mask.sum()),

        "K_SEM": K_SEM,
        "THR_SEM": THR_SEM,
        "K_TFIDF": K_TFIDF,
        "THR_TFIDF": THR_TFIDF,
        "MAX_SIG_NEI": MAX_SIG_NEI,

        "gnn_best_val_acc": float(best_val_gnn),

        "gnn_val_acc": gnn_val_metrics["acc"],
        "gnn_val_f1": gnn_val_metrics["f1"],
        "gnn_test_acc": gnn_test_metrics["acc"],
        "gnn_test_f1": gnn_test_metrics["f1"],

        "text_val_acc": text_val_metrics["acc"],
        "text_val_f1": text_val_metrics["f1"],
        "text_test_acc": text_test_metrics["acc"],
        "text_test_f1": text_test_metrics["f1"],

        "ens_alpha": float(best_alpha),
        "ens_val_acc": best_val_metrics["acc"],
        "ens_val_f1": best_val_metrics["f1"],
        "ens_test_acc": best_test_metrics["acc"],
        "ens_test_f1": best_test_metrics["f1"],
    }
    return result

# -------------------------
# 6) Run ALL datasets
# -------------------------
all_results = []
for ds in DATASETS:
    try:
        res = run_one_dataset(ds["name"], ds["path"], ds["label"])
        all_results.append(res)
    except Exception as e:
        print(f"\n[ERROR] Dataset {ds.get('name')} failed:", repr(e))
    finally:
        cleanup_cuda()

# -------------------------
# 7) Summary table + save
# -------------------------
res_df = pd.DataFrame(all_results)
if len(res_df) > 0:
    res_df = res_df.sort_values(by="ens_test_acc", ascending=False).reset_index(drop=True)
    print("\n\n================== FINAL SUMMARY (sorted by Ensemble test acc) ==================")
    display(res_df[[
        "dataset",
        "n_train","n_val","n_test",
        "gnn_test_acc","text_test_acc","ens_test_acc",
        "gnn_test_f1","text_test_f1","ens_test_f1",
        "ens_alpha",
        "K_SEM","THR_SEM","K_TFIDF","THR_TFIDF","MAX_SIG_NEI"
    ]])

    out_csv = "hybrid_all8_results_multigraph.csv"
    res_df.to_csv(out_csv, index=False)
    print("\nSaved:", out_csv)
else:
    print("No results were produced.")


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.7/63.7 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m30.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m34.3 MB/s[0m eta [36m0:00:00[0m
[?25hDevice: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/691 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/433M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/412 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/313 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]


DATASET: Sadness
Total: 3606
Train/Val/Test: 2884 361 361
Train label counts: {0: 1457, 1: 1427}
Undirected edges: 414450
Class weights: [0.9897048473358154, 1.0105115175247192]
GNN Epoch 001 | loss 0.6948 | train 0.505 | val 0.457 | test 0.501
GNN Epoch 010 | loss 0.6212 | train 0.990 | val 0.900 | test 0.640
GNN Epoch 020 | loss 0.5319 | train 0.990 | val 0.928 | test 0.643
GNN Epoch 030 | loss 0.4426 | train 0.989 | val 0.942 | test 0.618
GNN Epoch 040 | loss 0.3506 | train 0.990 | val 0.922 | test 0.623
GNN Epoch 050 | loss 0.2788 | train 0.990 | val 0.917 | test 0.615
GNN Epoch 060 | loss 0.2201 | train 0.991 | val 0.939 | test 0.632
GNN Epoch 070 | loss 0.1710 | train 0.992 | val 0.922 | test 0.626
GNN Epoch 080 | loss 0.1378 | train 0.991 | val 0.934 | test 0.612
GNN Epoch 090 | loss 0.1170 | train 0.991 | val 0.925 | test 0.612
GNN Epoch 100 | loss 0.0979 | train 0.992 | val 0.922 | test 0.640
GNN Epoch 110 | loss 0.0832 | train 0.992 | val 0.922 | test 0.632
GNN Epoch 120 | l

pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5874,0.517725,0.756233,0.789474,0.743243,0.841837
2,0.463,0.519372,0.759003,0.784119,0.763285,0.806122
3,0.2714,0.637171,0.731302,0.738544,0.782857,0.69898
4,0.186,0.847681,0.734072,0.757576,0.75,0.765306


model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]


---------------- RESULTS ----------------
GNN best val acc: 0.942
Standalone GNN  (val): {'acc': 0.925207756232687, 'f1': 0.9346246973365617, 'precision': 0.8894009216589862, 'recall': 0.9846938775510204}
Standalone GNN (test): {'acc': 0.6343490304709142, 'f1': 0.611764705882353, 'precision': 0.65, 'recall': 0.5777777777777777}
Standalone Text (val): {'acc': 0.7340720221606648, 'f1': 0.7575757575757576, 'precision': 0.75, 'recall': 0.7653061224489796}
Standalone Text(test): {'acc': 0.6869806094182825, 'f1': 0.7002652519893899, 'precision': 0.6700507614213198, 'recall': 0.7333333333333333}
Ensemble best alpha: 0.0
Ensemble (val): {'acc': 0.925207756232687, 'f1': 0.9346246973365617, 'precision': 0.8894009216589862, 'recall': 0.9846938775510204}
Ensemble(test): {'acc': 0.6343490304709142, 'f1': 0.611764705882353, 'precision': 0.65, 'recall': 0.5777777777777777}

DATASET: Joy
Total: 6043
Train/Val/Test: 4834 604 605
Train label counts: {0: 2424, 1: 2410}
Undirected edges: 716976
Class wei

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.4524,0.416943,0.817881,0.819672,0.83612,0.803859
2,0.3186,0.482703,0.806291,0.802698,0.843972,0.765273
3,0.2253,0.59842,0.796358,0.797364,0.817568,0.778135
4,0.1114,0.813929,0.80298,0.807754,0.811688,0.803859



---------------- RESULTS ----------------
GNN best val acc: 0.977
Standalone GNN  (val): {'acc': 0.9735099337748344, 'f1': 0.9746031746031746, 'precision': 0.9623824451410659, 'recall': 0.9871382636655949}
Standalone GNN (test): {'acc': 0.7454545454545455, 'f1': 0.6932270916334662, 'precision': 0.8656716417910447, 'recall': 0.5780730897009967}
Standalone Text (val): {'acc': 0.8029801324503312, 'f1': 0.8077544426494345, 'precision': 0.8116883116883117, 'recall': 0.8038585209003215}
Standalone Text(test): {'acc': 0.8, 'f1': 0.7959527824620574, 'precision': 0.8082191780821918, 'recall': 0.7840531561461794}
Ensemble best alpha: 0.2
Ensemble (val): {'acc': 0.9784768211920529, 'f1': 0.9793322734499205, 'precision': 0.9685534591194969, 'recall': 0.9903536977491961}
Ensemble(test): {'acc': 0.7884297520661157, 'f1': 0.7584905660377359, 'precision': 0.8777292576419214, 'recall': 0.6677740863787376}

DATASET: Fear
Total: 5388
Train/Val/Test: 4310 539 539
Train label counts: {0: 2162, 1: 2148}
Un

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5767,0.571643,0.703154,0.745223,0.63587,0.9
2,0.4805,0.597285,0.70872,0.727903,0.662461,0.807692
3,0.327,0.714441,0.688312,0.72,0.635294,0.830769
4,0.2339,0.885844,0.70872,0.716094,0.675768,0.761538



---------------- RESULTS ----------------
GNN best val acc: 0.946
Standalone GNN  (val): {'acc': 0.9424860853432282, 'f1': 0.9426987060998152, 'precision': 0.9074733096085409, 'recall': 0.9807692307692307}
Standalone GNN (test): {'acc': 0.6178107606679035, 'f1': 0.6484641638225256, 'precision': 0.6333333333333333, 'recall': 0.6643356643356644}
Standalone Text (val): {'acc': 0.7087198515769945, 'f1': 0.7160940325497287, 'precision': 0.6757679180887372, 'recall': 0.7615384615384615}
Standalone Text(test): {'acc': 0.7068645640074211, 'f1': 0.7285223367697594, 'precision': 0.7162162162162162, 'recall': 0.7412587412587412}
Ensemble best alpha: 0.0
Ensemble (val): {'acc': 0.9424860853432282, 'f1': 0.9426987060998152, 'precision': 0.9074733096085409, 'recall': 0.9807692307692307}
Ensemble(test): {'acc': 0.6178107606679035, 'f1': 0.6484641638225256, 'precision': 0.6333333333333333, 'recall': 0.6643356643356644}

DATASET: Anger
Total: 837
Train/Val/Test: 669 84 84
Train label counts: {1: 344, 

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.678417,0.583333,0.631579,0.555556,0.731707
2,0.700500,0.662274,0.666667,0.666667,0.651163,0.682927
3,0.529800,0.726195,0.666667,0.631579,0.685714,0.585366
4,0.323400,0.797343,0.642857,0.625,0.641026,0.609756



---------------- RESULTS ----------------
GNN best val acc: 0.524
Standalone GNN  (val): {'acc': 0.42857142857142855, 'f1': 0.5384615384615384, 'precision': 0.4444444444444444, 'recall': 0.6829268292682927}
Standalone GNN (test): {'acc': 0.5952380952380952, 'f1': 0.15, 'precision': 0.5, 'recall': 0.08823529411764706}
Standalone Text (val): {'acc': 0.6428571428571429, 'f1': 0.625, 'precision': 0.6410256410256411, 'recall': 0.6097560975609756}
Standalone Text(test): {'acc': 0.6904761904761905, 'f1': 0.6578947368421053, 'precision': 0.5952380952380952, 'recall': 0.7352941176470589}
Ensemble best alpha: 0.1
Ensemble (val): {'acc': 0.6428571428571429, 'f1': 0.625, 'precision': 0.6410256410256411, 'recall': 0.6097560975609756}
Ensemble(test): {'acc': 0.6904761904761905, 'f1': 0.6486486486486487, 'precision': 0.6, 'recall': 0.7058823529411765}

DATASET: Surprise
Total: 826
Train/Val/Test: 614 102 110
Train label counts: {0: 307, 1: 307}
Undirected edges: 90876
Class weights: [1.0, 1.0]
GNN E

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.637312,0.627451,0.424242,0.933333,0.27451
2,0.636500,0.53527,0.754902,0.766355,0.732143,0.803922
3,0.360600,0.580461,0.764706,0.76,0.77551,0.745098
4,0.192300,0.630902,0.745098,0.74,0.755102,0.72549



---------------- RESULTS ----------------
GNN best val acc: 0.608
Standalone GNN  (val): {'acc': 0.5490196078431373, 'f1': 0.6891891891891891, 'precision': 0.5257731958762887, 'recall': 1.0}
Standalone GNN (test): {'acc': 0.5, 'f1': 0.15384615384615385, 'precision': 0.5, 'recall': 0.09090909090909091}
Standalone Text (val): {'acc': 0.7450980392156863, 'f1': 0.74, 'precision': 0.7551020408163265, 'recall': 0.7254901960784313}
Standalone Text(test): {'acc': 0.7545454545454545, 'f1': 0.7326732673267327, 'precision': 0.8043478260869565, 'recall': 0.6727272727272727}
Ensemble best alpha: 0.2
Ensemble (val): {'acc': 0.7941176470588235, 'f1': 0.8, 'precision': 0.7777777777777778, 'recall': 0.8235294117647058}
Ensemble(test): {'acc': 0.7454545454545455, 'f1': 0.7142857142857143, 'precision': 0.813953488372093, 'recall': 0.6363636363636364}

DATASET: Disgust
Total: 891
Train/Val/Test: 735 90 66
Train label counts: {1: 369, 0: 366}
Undirected edges: 96588
Class weights: [1.0040984153747559, 0.9

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.642879,0.633333,0.697248,0.59375,0.844444
2,0.682700,0.620338,0.677778,0.707071,0.648148,0.777778
3,0.573100,0.669101,0.622222,0.690909,0.584615,0.844444
4,0.455800,0.704529,0.611111,0.660194,0.586207,0.755556



---------------- RESULTS ----------------
GNN best val acc: 0.700
Standalone GNN  (val): {'acc': 0.7666666666666667, 'f1': 0.8108108108108109, 'precision': 0.6818181818181818, 'recall': 1.0}
Standalone GNN (test): {'acc': 0.5606060606060606, 'f1': 0.40816326530612246, 'precision': 0.625, 'recall': 0.30303030303030304}
Standalone Text (val): {'acc': 0.6111111111111112, 'f1': 0.6601941747572816, 'precision': 0.5862068965517241, 'recall': 0.7555555555555555}
Standalone Text(test): {'acc': 0.6666666666666666, 'f1': 0.6857142857142857, 'precision': 0.6486486486486487, 'recall': 0.7272727272727273}
Ensemble best alpha: 0.0
Ensemble (val): {'acc': 0.7666666666666667, 'f1': 0.8108108108108109, 'precision': 0.6818181818181818, 'recall': 1.0}
Ensemble(test): {'acc': 0.5606060606060606, 'f1': 0.40816326530612246, 'precision': 0.625, 'recall': 0.30303030303030304}

DATASET: Trust
Total: 1887
Train/Val/Test: 1509 189 189
Train label counts: {1: 756, 0: 753}
Undirected edges: 210636
Class weights: 

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6856,0.674768,0.619048,0.632653,0.613861,0.652632
2,0.5742,0.706404,0.62963,0.65,0.619048,0.684211
3,0.4008,0.835574,0.624339,0.620321,0.630435,0.610526
4,0.2525,0.897122,0.608466,0.626263,0.601942,0.652632



---------------- RESULTS ----------------
GNN best val acc: 0.815
Standalone GNN  (val): {'acc': 0.7883597883597884, 'f1': 0.8198198198198198, 'precision': 0.7165354330708661, 'recall': 0.9578947368421052}
Standalone GNN (test): {'acc': 0.6031746031746031, 'f1': 0.6031746031746031, 'precision': 0.59375, 'recall': 0.6129032258064516}
Standalone Text (val): {'acc': 0.6084656084656085, 'f1': 0.6262626262626263, 'precision': 0.6019417475728155, 'recall': 0.6526315789473685}
Standalone Text(test): {'acc': 0.6243386243386243, 'f1': 0.6467661691542289, 'precision': 0.6018518518518519, 'recall': 0.6989247311827957}
Ensemble best alpha: 0.0
Ensemble (val): {'acc': 0.7883597883597884, 'f1': 0.8198198198198198, 'precision': 0.7165354330708661, 'recall': 0.9578947368421052}
Ensemble(test): {'acc': 0.6031746031746031, 'f1': 0.6031746031746031, 'precision': 0.59375, 'recall': 0.6129032258064516}

DATASET: Anticip

[ERROR] Dataset Anticip failed: FileNotFoundError(2, 'No such file or directory')




Unnamed: 0,dataset,n_train,n_val,n_test,gnn_test_acc,text_test_acc,ens_test_acc,gnn_test_f1,text_test_f1,ens_test_f1,ens_alpha,K_SEM,THR_SEM,K_TFIDF,THR_TFIDF,MAX_SIG_NEI
0,Joy,4834,604,605,0.745455,0.8,0.78843,0.693227,0.795953,0.758491,0.2,10,0.6,25,0.05,20
1,Surprise,614,102,110,0.5,0.754545,0.745455,0.153846,0.732673,0.714286,0.2,10,0.6,25,0.05,20
2,Anger,669,84,84,0.595238,0.690476,0.690476,0.15,0.657895,0.648649,0.1,10,0.6,25,0.05,20
3,Sadness,2884,361,361,0.634349,0.686981,0.634349,0.611765,0.700265,0.611765,0.0,10,0.6,25,0.05,20
4,Fear,4310,539,539,0.617811,0.706865,0.617811,0.648464,0.728522,0.648464,0.0,10,0.6,25,0.05,20
5,Trust,1509,189,189,0.603175,0.624339,0.603175,0.603175,0.646766,0.603175,0.0,10,0.6,25,0.05,20
6,Disgust,735,90,66,0.560606,0.666667,0.560606,0.408163,0.685714,0.408163,0.0,10,0.6,25,0.05,20



Saved: hybrid_all8_results_multigraph.csv
