In [1]:
!pip install -q transformers librosa miditok symusic scikit-learn tqdm accelerate

import os
from pathlib import Path
import re
import warnings
import pickle
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import librosa

# Fix compatibility
if not hasattr(np.random, 'integers'):
    np.random.integers = np.random.randint

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"üîß Device: {device}")

[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m159.0/159.0 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m2.5/2.5 MB[0m [31m44.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m363.4/363.4 MB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m13.8/13.8 MB[0m [31m94.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m24.6/24.6 MB[0m [31m77.2 MB/s[0m eta [36m0:00:00[0m


In [2]:
BASE = Path("/kaggle/input/multimodal-mirex-emotion-dataset")
DATASET_DIR = BASE / "dataset"
AUDIO_DIR = DATASET_DIR / "Audio"
LYRICS_DIR = DATASET_DIR / "Lyrics"
MIDI_DIR = DATASET_DIR / "MIDIs"
CSV_PATH = DATASET_DIR / "dataset info.csv"
CAT_PATH = DATASET_DIR / "categories.txt"
CLUST_PATH = DATASET_DIR / "clusters.txt"
WORK_DIR = Path("/kaggle/working")
WORK_DIR.mkdir(exist_ok=True)

HYPERPARAMS = dict(
    # Data
    num_classes=5,
    n_splits=5,
    random_state=42,
    
    # Audio (MERT)
    audio_sr=24000,             # MERT expects 24kHz
    audio_duration=30,
    audio_segments=3,
    audio_embed_dim=1024,       # MERT output dim
    
    # Text (MusicBERT)
    text_max_length=256,
    text_embed_dim=768,         # MusicBERT output dim
    
    # MIDI
    midi_max_len=2048,
    midi_embed_dim=512,
    
    # Fusion
    fusion_dim=512,
    fusion_layers=4,
    fusion_heads=8,
    fusion_dropout=0.2,
    classifier_dropout=0.4,
    
    # Training
    batch_size=24,              # MERT is large ‚Üí reduce batch size
    epochs=30,
    lr=1e-4,
    weight_decay=1e-3,
    label_smoothing=0.1,
    patience=10,
    
    # Augmentation
    modality_dropout_lyrics=0.10,
    modality_dropout_midi=0.30,
)


In [3]:
with open(CAT_PATH) as f:
    categories = [line.strip() for line in f.readlines()]
with open(CLUST_PATH) as f:
    clusters = [line.strip() for line in f.readlines()]

track_ids = sorted([f.split(".")[0] for f in os.listdir(AUDIO_DIR)])
df_labels = pd.DataFrame({
    "track_id": track_ids,
    "category": categories[:len(track_ids)],
    "cluster": clusters[:len(track_ids)]
})
df_labels["cluster_id"] = df_labels["cluster"].apply(lambda x: int(x.split(" ")[1]))
emotion_ids = df_labels["cluster_id"].values


In [4]:
from transformers import Wav2Vec2FeatureExtractor, AutoModel

print("Loading MERT v1-330M...")
audio_processor = Wav2Vec2FeatureExtractor.from_pretrained("m-a-p/MERT-v1-330M", trust_remote_code=True)
audio_model = AutoModel.from_pretrained("m-a-p/MERT-v1-330M", trust_remote_code=True).to(device)
audio_model.eval()
for p in audio_model.parameters():
    p.requires_grad = False

def encode_audio_mert(path, sr=HYPERPARAMS['audio_sr'], n_segments=HYPERPARAMS['audio_segments']):
    try:
        y, _ = librosa.load(path, sr=sr, mono=True, duration=HYPERPARAMS['audio_duration'])
        segment_samples = 10 * sr
        embeddings = []
        for i in range(n_segments):
            start = i * segment_samples
            end = start + segment_samples
            if end <= len(y):
                segment = y[start:end]
            else:
                segment = y[start:]
                segment = np.pad(segment, (0, segment_samples - len(segment)))
            inputs = audio_processor(segment, sampling_rate=sr, return_tensors="pt").to(device)
            with torch.no_grad():
                outputs = audio_model(**inputs, output_hidden_states=True)
                emb = outputs.hidden_states[-1].mean(dim=1).cpu().numpy().squeeze()
            embeddings.append(emb)
        return np.mean(embeddings, axis=0).astype(np.float32)
    except Exception as e:
        return np.zeros(HYPERPARAMS['audio_embed_dim'], dtype=np.float32)

AUDIO_NPY = WORK_DIR / "X_audio_mert.npy"
if not AUDIO_NPY.exists():
    X_audio = np.vstack([encode_audio_mert(str(AUDIO_DIR / f"{tid}.mp3")) for tid in tqdm(track_ids, desc="MERT Audio")])
    np.save(AUDIO_NPY, X_audio)
else:
    X_audio = np.load(AUDIO_NPY)


Loading MERT v1-330M...


preprocessor_config.json:   0%|          | 0.00/212 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

configuration_MERT.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/m-a-p/MERT-v1-330M:
- configuration_MERT.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_MERT.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/m-a-p/MERT-v1-330M:
- modeling_MERT.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
2025-12-12 06:51:36.795636: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765522296.982455      20 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765522297.030539      20 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'



pytorch_model.bin:   0%|          | 0.00/1.26G [00:00<?, ?B/s]

MERT Audio: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 903/903 [12:49<00:00,  1.17it/s]


In [5]:
# =============================================================================
# 4. TEXT EMBEDDING ‚Äî Emotion-Specific DistilBERT (PUBLIC & WORKING)
# =============================================================================
from transformers import AutoTokenizer, AutoModel

print("Loading Emotion-Specific DistilBERT...")
text_tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
text_model = AutoModel.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion").to(device)
text_model.eval()
for p in text_model.parameters():
    p.requires_grad = False

def clean_text(t):
    t = t.lower()
    t = re.sub(r"[^a-z0-9\s]", " ", t)
    return re.sub(r"\s+", " ", t).strip()

TEXT_NPY = WORK_DIR / "X_text_emotionbert.npy"
if not TEXT_NPY.exists():
    X_text = []
    for tid in tqdm(track_ids, desc="EmotionBERT Text"):
        path = LYRICS_DIR / f"{tid}.txt"
        text = clean_text(open(path, encoding="utf-8", errors="ignore").read()) if path.exists() else "[NO_LYRICS]"
        inputs = text_tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=HYPERPARAMS['text_max_length']).to(device)
        with torch.no_grad():
            emb = text_model(**inputs).last_hidden_state.mean(dim=1).cpu().numpy().squeeze()
        X_text.append(emb)
    X_text = np.vstack(X_text).astype(np.float32)
    np.save(TEXT_NPY, X_text)
else:
    X_text = np.load(TEXT_NPY)


Loading Emotion-Specific DistilBERT...


tokenizer_config.json:   0%|          | 0.00/291 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/768 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

EmotionBERT Text: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 903/903 [00:13<00:00, 66.14it/s]


In [6]:
from miditok import REMI, TokenizerConfig
from symusic import Score

tok_cfg = TokenizerConfig(
    pitch_range=(21, 109),
    beat_res={(0, 4): 8, (4, 12): 4},
    num_velocities=32,
    use_chords=True,
    use_tempos=True,
    use_time_signatures=True,
    use_programs=True
)
remi_tok = REMI(tok_cfg)

class MidiEncoder(nn.Module):
    def __init__(self, vocab_size, d=HYPERPARAMS['midi_embed_dim']):
        super().__init__()
        self.emb = nn.Embedding(vocab_size, d, padding_idx=0)
        self.pos_enc = nn.Parameter(torch.randn(1, HYPERPARAMS['midi_max_len'], d) * 0.02)
        layer = nn.TransformerEncoderLayer(d_model=d, nhead=8, dim_feedforward=d*4, batch_first=True, dropout=0.2, activation='gelu')
        self.enc = nn.TransformerEncoder(layer, num_layers=6)  # ‚Üë layer count
        self.proj = nn.Sequential(nn.Linear(d, d), nn.LayerNorm(d), nn.GELU(), nn.Dropout(0.2))
    def forward(self, ids):
        mask = ids == 0
        x = self.emb(ids) + self.pos_enc[:, :ids.size(1), :]
        x = self.enc(x, src_key_padding_mask=mask)
        mask_inv = (~mask).unsqueeze(-1).float()
        pooled = (x * mask_inv).sum(dim=1) / mask_inv.sum(dim=1).clamp(min=1)
        return self.proj(pooled)

# Build MIDI sequence dict
midi_seq_dict = {}
for tid in track_ids:
    path = MIDI_DIR / f"{tid}.mid"
    if path.exists():
        try:
            score = Score(str(path))
            midi_seq_dict[tid] = remi_tok(score).ids
        except:
            midi_seq_dict[tid] = []
    else:
        midi_seq_dict[tid] = []

midi_vocab_size = len(remi_tok.vocab)
midi_encoder = MidiEncoder(midi_vocab_size).to(device)
midi_encoder.eval()

MIDI_EMB_NPY = WORK_DIR / "X_midi_remi.npy"
if not MIDI_EMB_NPY.exists():
    X_midi = []
    with torch.no_grad():
        for tid in tqdm(track_ids, desc="MIDI Embedding"):
            seq = midi_seq_dict[tid]
            if len(seq) == 0:
                X_midi.append(np.zeros(HYPERPARAMS['midi_embed_dim'], dtype=np.float32))
            else:
                if len(seq) > HYPERPARAMS['midi_max_len']:
                    seq = seq[:HYPERPARAMS['midi_max_len']]
                else:
                    seq = seq + [0] * (HYPERPARAMS['midi_max_len'] - len(seq))
                ids = torch.tensor([seq], dtype=torch.long).to(device)
                emb = midi_encoder(ids).cpu().numpy().squeeze()
                X_midi.append(emb)
    X_midi = np.vstack(X_midi).astype(np.float32)
    np.save(MIDI_EMB_NPY, X_midi)
else:
    X_midi = np.load(MIDI_EMB_NPY)


MIDI Embedding: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 903/903 [00:09<00:00, 91.96it/s] 


In [7]:
mask_mod = np.ones((len(track_ids), 3), dtype=np.uint8)
for i, tid in enumerate(track_ids):
    mask_mod[i, 0] = 0  # audio always present
    if (LYRICS_DIR / f"{tid}.txt").exists():
        mask_mod[i, 1] = 0
    if len(midi_seq_dict[tid]) > 0:
        mask_mod[i, 2] = 0

In [8]:
class MDataset(Dataset):
    def __init__(self, A, T, M, mask, y, p_drop_lyrics=0.0, p_drop_midi=0.0):
        self.A = torch.tensor(A).float()
        self.T = torch.tensor(T).float()
        self.M = torch.tensor(M).float()
        self.mask = torch.tensor(mask, dtype=torch.uint8)
        self.y = torch.tensor(y - 1).long()
        self.p_drop_lyrics = p_drop_lyrics
        self.p_drop_midi = p_drop_midi
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        A, T, M, mask, y = self.A[idx], self.T[idx], self.M[idx], self.mask[idx].clone(), self.y[idx]
        if self.p_drop_lyrics > 0 and mask[1] == 0 and torch.rand(1).item() < self.p_drop_lyrics:
            mask[1] = 1
        if self.p_drop_midi > 0 and mask[2] == 0 and torch.rand(1).item() < self.p_drop_midi:
            mask[2] = 1
        return A, T, M, mask, y

class MultimodalTransformerFusion(nn.Module):
    def __init__(self, d=HYPERPARAMS['fusion_dim'], num_classes=HYPERPARAMS['num_classes']):
        super().__init__()
        self.proj_audio = nn.Sequential(nn.Linear(HYPERPARAMS['audio_embed_dim'], d), nn.LayerNorm(d))
        self.proj_text  = nn.Sequential(nn.Linear(HYPERPARAMS['text_embed_dim'], d), nn.LayerNorm(d))
        self.proj_midi  = nn.Sequential(nn.Linear(HYPERPARAMS['midi_embed_dim'], d), nn.LayerNorm(d))
        self.cls_token = nn.Parameter(torch.randn(1, 1, d))
        
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d,
            nhead=HYPERPARAMS['fusion_heads'],
            dim_feedforward=d * 4,
            dropout=HYPERPARAMS['fusion_dropout'],
            activation='gelu',
            batch_first=True,
            norm_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=HYPERPARAMS['fusion_layers'])
        self.classifier = nn.Sequential(
            nn.Linear(d, d),
            nn.LayerNorm(d),
            nn.GELU(),
            nn.Dropout(HYPERPARAMS['classifier_dropout']),
            nn.Linear(d, num_classes)
        )

    def forward(self, A, T, M, mask):
        A = self.proj_audio(A)
        T = self.proj_text(T)
        M = self.proj_midi(M)
        x = torch.stack([A, T, M], dim=1)
        B = x.size(0)
        cls_tokens = self.cls_token.expand(B, -1, -1)
        x = torch.cat([cls_tokens, x], dim=1)
        modality_mask = mask == 1
        cls_mask = torch.zeros(B, 1, dtype=torch.bool, device=mask.device)
        key_padding_mask = torch.cat([cls_mask, modality_mask], dim=1)
        x = self.transformer(x, src_key_padding_mask=key_padding_mask)
        return self.classifier(x[:, 0])

In [9]:
from collections import Counter

def train_kfold(X_audio, X_text, X_midi, mask_mod, emotion_ids):
    skf = StratifiedKFold(n_splits=HYPERPARAMS['n_splits'], shuffle=True, random_state=HYPERPARAMS['random_state'])
    fold_results = []
    
    for fold, (train_idx, val_idx) in enumerate(skf.split(X_audio, emotion_ids)):
        print(f"\n{'='*60}\nFOLD {fold+1}\n{'='*60}")
        
        train_loader = DataLoader(
            MDataset(X_audio[train_idx], X_text[train_idx], X_midi[train_idx], mask_mod[train_idx], emotion_ids[train_idx],
                     p_drop_lyrics=HYPERPARAMS['modality_dropout_lyrics'],
                     p_drop_midi=HYPERPARAMS['modality_dropout_midi']),
            batch_size=HYPERPARAMS['batch_size'], shuffle=True, num_workers=2, pin_memory=True
        )
        val_loader = DataLoader(
            MDataset(X_audio[val_idx], X_text[val_idx], X_midi[val_idx], mask_mod[val_idx], emotion_ids[val_idx]),
            batch_size=HYPERPARAMS['batch_size'], shuffle=False, num_workers=2, pin_memory=True
        )
        
        model = MultimodalTransformerFusion().to(device)
        counts = Counter(emotion_ids[train_idx])
        weights = torch.tensor([len(train_idx) / (HYPERPARAMS['num_classes'] * counts[i]) for i in range(1, HYPERPARAMS['num_classes']+1)], dtype=torch.float32).to(device)
        criterion = nn.CrossEntropyLoss(weight=weights, label_smoothing=HYPERPARAMS['label_smoothing'])
        optimizer = torch.optim.AdamW(model.parameters(), lr=HYPERPARAMS['lr'], weight_decay=HYPERPARAMS['weight_decay'])
        scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=HYPERPARAMS['lr'], epochs=HYPERPARAMS['epochs'], steps_per_epoch=len(train_loader))
        
        best_f1 = 0
        wait = 0
        for epoch in range(HYPERPARAMS['epochs']):
            model.train()
            for A, T, M, mask, y in train_loader:
                A, T, M, mask, y = A.to(device), T.to(device), M.to(device), mask.to(device), y.to(device)
                optimizer.zero_grad()
                logits = model(A, T, M, mask)
                loss = criterion(logits, y)
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                optimizer.step()
                scheduler.step()
            
            # Validation
            model.eval()
            preds, truths = [], []
            with torch.no_grad():
                for A, T, M, mask, y in val_loader:
                    A, T, M, mask, y = A.to(device), T.to(device), M.to(device), mask.to(device), y.to(device)
                    logits = model(A, T, M, mask)
                    preds.extend(logits.argmax(1).cpu().numpy())
                    truths.extend(y.cpu().numpy())
            val_f1 = f1_score(truths, preds, average="macro")
            
            if val_f1 > best_f1:
                best_f1 = val_f1
                torch.save(model.state_dict(), WORK_DIR / f"best_fold{fold}.pt")
                wait = 0
            else:
                wait += 1
                if wait >= HYPERPARAMS['patience']:
                    break
        
        # Final eval
        model.load_state_dict(torch.load(WORK_DIR / f"best_fold{fold}.pt"))
        model.eval()
        preds, truths = [], []
        with torch.no_grad():
            for A, T, M, mask, y in val_loader:
                A, T, M, mask, y = A.to(device), T.to(device), M.to(device), mask.to(device), y.to(device)
                logits = model(A, T, M, mask)
                preds.extend(logits.argmax(1).cpu().numpy())
                truths.extend(y.cpu().numpy())
        acc = accuracy_score(truths, preds)
        f1 = f1_score(truths, preds, average="macro")
        fold_results.append({"fold": fold+1, "accuracy": acc, "f1": f1})
        print(f"Fold {fold+1} ‚Üí Acc: {acc:.4f}, F1: {f1:.4f}")
    
    return fold_results

In [10]:
fold_results = train_kfold(X_audio, X_text, X_midi, mask_mod, emotion_ids)

avg_acc = np.mean([r['accuracy'] for r in fold_results])
avg_f1 = np.mean([r['f1'] for r in fold_results])
print(f" FINAL (5-Fold CV): Acc={avg_acc:.4f}, F1={avg_f1:.4f}")



FOLD 1


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Fold 1 ‚Üí Acc: 0.5193, F1: 0.5092

FOLD 2


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Fold 2 ‚Üí Acc: 0.5028, F1: 0.4822

FOLD 3


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Fold 3 ‚Üí Acc: 0.4972, F1: 0.4795

FOLD 4


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Fold 4 ‚Üí Acc: 0.4667, F1: 0.4554

FOLD 5


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Fold 5 ‚Üí Acc: 0.5056, F1: 0.4890
 FINAL (5-Fold CV): Acc=0.4983, F1=0.4831


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
