In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from scipy.sparse import hstack as sp_hstack
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib
import json

In [2]:
train_dir = Path("../dataset_train.csv")
test_dir = Path("../dataset_test.csv")

In [3]:
df = pd.read_csv(train_dir)
print(f"Dataset size: {len(df)}")
df.head()

Dataset size: 8475


Unnamed: 0,movie_name,genre,description
0,Silent Hill,"Horror, Mystery","Rose, a desperate mother takes her adopted dau..."
1,Breaking the Waves,"Drama, Romance","In a small and conservative Scottish village, ..."
2,Wind Chill,"Drama, Horror, Thriller",Two college students share a ride home for the...
3,Godmothered,"Family, Fantasy, Comedy",A young and unskilled fairy godmother that ven...
4,Donkey Skin,"Fantasy, Comedy, Music, Romance",A fairy godmother helps a princess disguise he...


In [4]:
df["text"] = df["movie_name"].fillna("") + " [SEP] " + df["description"].fillna("")
y_list = df["genre"].apply(lambda s: [g.strip() for g in str(s).split(",") if g.strip()])

In [5]:
mlb = MultiLabelBinarizer()
Y = mlb.fit_transform(y_list)
X_tr, X_va, y_tr, y_va = train_test_split(df["text"], Y, test_size=0.1, random_state=42)
print(f"Training samples: {len(X_tr)}, Validation samples: {len(X_va)}")
print(f"Number of labels: {len(mlb.classes_)}")

Training samples: 7627, Validation samples: 848
Number of labels: 18


In [6]:
tfidf_word = TfidfVectorizer(
    ngram_range=(1,3),
    min_df=2,
    max_features=500_000,
    sublinear_tf=True,
    stop_words="english",
    max_df=0.85,
    strip_accents='unicode',
    lowercase=True
)

tfidf_char = TfidfVectorizer(
    analyzer="char_wb",
    ngram_range=(3,6),
    min_df=2,
    max_features=500_000,
    sublinear_tf=True,
    max_df=0.85,
    strip_accents='unicode'
)

Xw_tr = tfidf_word.fit_transform(X_tr)
Xw_va = tfidf_word.transform(X_va)
Xc_tr = tfidf_char.fit_transform(X_tr)
Xc_va = tfidf_char.transform(X_va)
print(f"Word TF-IDF shape: {Xw_tr.shape}, Char TF-IDF shape: {Xc_tr.shape}")

Word TF-IDF shape: (7627, 29337), Char TF-IDF shape: (7627, 128989)


In [7]:
XTR_tfidf = sp_hstack([Xw_tr, Xc_tr], format="csr")
XVA_tfidf = sp_hstack([Xw_va, Xc_va], format="csr")
print(f"Combined TF-IDF features shape: {XTR_tfidf.shape}")

Combined TF-IDF features shape: (7627, 158326)


In [8]:
from sentence_transformers import SentenceTransformer

print("Loading sentence transformer model...")
st_model = SentenceTransformer('all-MiniLM-L6-v2')
print("Generating embeddings for training set...")
emb_tr = st_model.encode(X_tr.tolist(), show_progress_bar=True, batch_size=32)
print("Generating embeddings for validation set...")
emb_va = st_model.encode(X_va.tolist(), show_progress_bar=True, batch_size=32)
print(f"Embedding shape: {emb_tr.shape}")

  from .autonotebook import tqdm as notebook_tqdm



Loading sentence transformer model...
Loading sentence transformer model...
Generating embeddings for training set...
Generating embeddings for training set...


Batches: 100%|██████████| 239/239 [00:57<00:00,  4.18it/s]



Generating embeddings for validation set...


Batches: 100%|██████████| 27/27 [00:06<00:00,  4.50it/s]

Embedding shape: (7627, 384)





In [9]:
from scipy.sparse import csr_matrix

XTR_combined = sp_hstack([XTR_tfidf, csr_matrix(emb_tr)], format="csr")
XVA_combined = sp_hstack([XVA_tfidf, csr_matrix(emb_va)], format="csr")
print(f"Combined features (TF-IDF + Embeddings) shape: {XTR_combined.shape}")

Combined features (TF-IDF + Embeddings) shape: (7627, 158710)


In [10]:
clf_logreg = OneVsRestClassifier(
    LogisticRegression(C=8.0, solver="saga", max_iter=4000, class_weight='balanced', random_state=42),
    n_jobs=-1
)
print("Training LogisticRegression with combined features...")
clf_logreg.fit(XTR_combined, y_tr)
print("Training complete!")

Training LogisticRegression with combined features...
Training complete!
Training complete!


In [11]:
logits_logreg = clf_logreg.decision_function(XVA_combined)
ths_logreg = np.zeros(logits_logreg.shape[1])

print("Calibrating thresholds for LogReg...")
for k in range(logits_logreg.shape[1]):
    s = logits_logreg[:, k]
    best_f1, best_t = 0.0, 0.0
    candidates = np.concatenate([
        np.quantile(s, np.linspace(0.01, 0.99, 50)),
        [s.mean(), np.median(s), 0.0, -0.5, 0.5],
    ])
    candidates = np.unique(candidates)
    for t in candidates:
        preds_k = (s >= t).astype(int)
        f1 = f1_score(y_va[:, k], preds_k, zero_division=0)
        if f1 > best_f1:
            best_f1, best_t = f1, t
    ths_logreg[k] = best_t

pred_logreg = (logits_logreg >= ths_logreg).astype(int)
print(f"LogReg - micro-F1: {f1_score(y_va, pred_logreg, average='micro'):.4f}, macro-F1: {f1_score(y_va, pred_logreg, average='macro'):.4f}")

Calibrating thresholds for LogReg...
LogReg - micro-F1: 0.5966, macro-F1: 0.6031
LogReg - micro-F1: 0.5966, macro-F1: 0.6031


In [12]:
from xgboost import XGBClassifier
from sklearn.multioutput import MultiOutputClassifier

clf_xgb = MultiOutputClassifier(
    XGBClassifier(n_estimators=300, max_depth=6, learning_rate=0.1, random_state=42, n_jobs=-1)
)
print("Training XGBoost with embeddings...")
clf_xgb.fit(emb_tr, y_tr)
print("Training complete!")

Training XGBoost with embeddings...
Training complete!
Training complete!


In [13]:
pred_proba_xgb = clf_xgb.predict_proba(emb_va)
logits_xgb = np.column_stack([p[:, 1] for p in pred_proba_xgb])
ths_xgb = np.zeros(logits_xgb.shape[1])

print("Calibrating thresholds for XGBoost...")
for k in range(logits_xgb.shape[1]):
    s = logits_xgb[:, k]
    best_f1, best_t = 0.0, 0.0
    candidates = np.concatenate([
        np.quantile(s, np.linspace(0.01, 0.99, 50)),
        [s.mean(), np.median(s), 0.0, 0.3, 0.5, 0.7],
    ])
    candidates = np.unique(candidates)
    for t in candidates:
        preds_k = (s >= t).astype(int)
        f1 = f1_score(y_va[:, k], preds_k, zero_division=0)
        if f1 > best_f1:
            best_f1, best_t = f1, t
    ths_xgb[k] = best_t

pred_xgb = (logits_xgb >= ths_xgb).astype(int)
print(f"XGBoost - micro-F1: {f1_score(y_va, pred_xgb, average='micro'):.4f}, macro-F1: {f1_score(y_va, pred_xgb, average='macro'):.4f}")

Calibrating thresholds for XGBoost...
XGBoost - micro-F1: 0.6311, macro-F1: 0.5757
XGBoost - micro-F1: 0.6311, macro-F1: 0.5757


In [14]:
from sklearn.svm import LinearSVC

clf_svc = OneVsRestClassifier(
    LinearSVC(C=2.0, max_iter=4000, class_weight='balanced', dual='auto', random_state=42),
    n_jobs=-1
)
print("Training LinearSVC with TF-IDF features...")
clf_svc.fit(XTR_tfidf, y_tr)
print("Training complete!")

Training LinearSVC with TF-IDF features...
Training complete!
Training complete!


In [15]:
logits_svc = clf_svc.decision_function(XVA_tfidf)
ths_svc = np.zeros(logits_svc.shape[1])

print("Calibrating thresholds for LinearSVC...")
for k in range(logits_svc.shape[1]):
    s = logits_svc[:, k]
    best_f1, best_t = 0.0, 0.0
    candidates = np.concatenate([
        np.quantile(s, np.linspace(0.01, 0.99, 50)),
        [s.mean(), np.median(s), 0.0, -0.5, 0.5],
    ])
    candidates = np.unique(candidates)
    for t in candidates:
        preds_k = (s >= t).astype(int)
        f1 = f1_score(y_va[:, k], preds_k, zero_division=0)
        if f1 > best_f1:
            best_f1, best_t = f1, t
    ths_svc[k] = best_t

pred_svc = (logits_svc >= ths_svc).astype(int)
print(f"LinearSVC - micro-F1: {f1_score(y_va, pred_svc, average='micro'):.4f}, macro-F1: {f1_score(y_va, pred_svc, average='macro'):.4f}")

Calibrating thresholds for LinearSVC...
LinearSVC - micro-F1: 0.6256, macro-F1: 0.5597
LinearSVC - micro-F1: 0.6256, macro-F1: 0.5597


In [16]:
ensemble_logits = 0.5 * logits_logreg + 0.35 * logits_xgb + 0.15 * logits_svc
ths_ensemble = np.zeros(ensemble_logits.shape[1])

print("Calibrating thresholds for ensemble...")
for k in range(ensemble_logits.shape[1]):
    s = ensemble_logits[:, k]
    best_f1, best_t = 0.0, 0.0
    candidates = np.concatenate([
        np.quantile(s, np.linspace(0.005, 0.995, 60)),
        [s.mean(), np.median(s), 0.0, -1.0, -0.5, 0.5, 1.0],
        np.linspace(s.min(), s.max(), 20)
    ])
    candidates = np.unique(candidates)
    for t in candidates:
        preds_k = (s >= t).astype(int)
        f1 = f1_score(y_va[:, k], preds_k, zero_division=0)
        if f1 > best_f1:
            best_f1, best_t = f1, t
    ths_ensemble[k] = best_t

pred_ensemble = (ensemble_logits >= ths_ensemble).astype(int)
print(f"Ensemble - micro-F1: {f1_score(y_va, pred_ensemble, average='micro'):.4f}, macro-F1: {f1_score(y_va, pred_ensemble, average='macro'):.4f}")

Calibrating thresholds for ensemble...
Ensemble - micro-F1: 0.6012, macro-F1: 0.6073
Ensemble - micro-F1: 0.6012, macro-F1: 0.6073


In [17]:
print("="*60)
print("PERFORMANCE COMPARISON")
print("="*60)
print(f"1. LogReg (TF-IDF+Embeddings):  micro-F1: {f1_score(y_va, pred_logreg, average='micro'):.4f}, macro-F1: {f1_score(y_va, pred_logreg, average='macro'):.4f}")
print(f"2. XGBoost (Embeddings):        micro-F1: {f1_score(y_va, pred_xgb, average='micro'):.4f}, macro-F1: {f1_score(y_va, pred_xgb, average='macro'):.4f}")
print(f"3. LinearSVC (TF-IDF):          micro-F1: {f1_score(y_va, pred_svc, average='micro'):.4f}, macro-F1: {f1_score(y_va, pred_svc, average='macro'):.4f}")
print(f"4. ENSEMBLE (All):              micro-F1: {f1_score(y_va, pred_ensemble, average='micro'):.4f}, macro-F1: {f1_score(y_va, pred_ensemble, average='macro'):.4f}")
print("="*60)

PERFORMANCE COMPARISON
1. LogReg (TF-IDF+Embeddings):  micro-F1: 0.5966, macro-F1: 0.6031
2. XGBoost (Embeddings):        micro-F1: 0.6311, macro-F1: 0.5757
3. LinearSVC (TF-IDF):          micro-F1: 0.6256, macro-F1: 0.5597
4. ENSEMBLE (All):              micro-F1: 0.6012, macro-F1: 0.6073


In [18]:
joblib.dump(tfidf_word, "tfidf_word.joblib")
joblib.dump(tfidf_char, "tfidf_char.joblib")
joblib.dump(st_model, "sentence_transformer.joblib")
joblib.dump(clf_logreg, "clf_logreg.joblib")
joblib.dump(clf_xgb, "clf_xgb.joblib")
joblib.dump(clf_svc, "clf_svc.joblib")
with open("labels.json", "w") as f:
    json.dump(mlb.classes_.tolist(), f)
np.save("thresholds_logreg.npy", ths_logreg)
np.save("thresholds_xgb.npy", ths_xgb)
np.save("thresholds_svc.npy", ths_svc)
np.save("thresholds_ensemble.npy", ths_ensemble)
print("All models and artifacts saved successfully!")

All models and artifacts saved successfully!


In [19]:
df_test = pd.read_csv(test_dir)
df_test["text"] = df_test["movie_name"].fillna("") + " [SEP] " + df_test["description"].fillna("")

Xw_test = tfidf_word.transform(df_test["text"])
Xc_test = tfidf_char.transform(df_test["text"])
X_test_tfidf = sp_hstack([Xw_test, Xc_test], format="csr")
emb_test = st_model.encode(df_test["text"].tolist(), show_progress_bar=True, batch_size=32)
X_test_combined = sp_hstack([X_test_tfidf, csr_matrix(emb_test)], format="csr")

logits_logreg_test = clf_logreg.decision_function(X_test_combined)
pred_proba_xgb_test = clf_xgb.predict_proba(emb_test)
logits_xgb_test = np.column_stack([p[:, 1] for p in pred_proba_xgb_test])
logits_svc_test = clf_svc.decision_function(X_test_tfidf)

ensemble_logits_test = 0.5 * logits_logreg_test + 0.35 * logits_xgb_test + 0.15 * logits_svc_test
pred_test = (ensemble_logits_test >= ths_ensemble).astype(int)

pred_labels = [", ".join([mlb.classes_[j] for j, v in enumerate(row) if v == 1]) for row in pred_test]
result_df = pd.DataFrame({
    "movie_name": df_test["movie_name"],
    "genre": pred_labels,
    "description": df_test["description"]
})
result_df.to_csv("dataset_test_preds.csv", index=False)
print(f"Test predictions saved! Generated {len(result_df)} predictions.")

Batches: 100%|██████████| 30/30 [00:07<00:00,  4.11it/s]



Test predictions saved! Generated 942 predictions.


In [20]:
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset
import torch

class MovieGenreDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts.iloc[idx]) if hasattr(self.texts, 'iloc') else str(self.texts[idx])
        encoding = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(self.labels[idx], dtype=torch.float)
        }

print("Loading DistilBERT tokenizer and model...")
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertForSequenceClassification.from_pretrained(
    'distilbert-base-uncased',
    num_labels=len(mlb.classes_),
    problem_type="multi_label_classification"
)

train_dataset = MovieGenreDataset(X_tr, y_tr, tokenizer)
val_dataset = MovieGenreDataset(X_va, y_va, tokenizer)
print(f"Datasets created: {len(train_dataset)} training, {len(val_dataset)} validation")

Loading DistilBERT tokenizer and model...


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this mo

Datasets created: 7627 training, 848 validation


In [22]:
training_args = TrainingArguments(
    output_dir='./distilbert_results',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=100,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

print("Starting DistilBERT fine-tuning...")
trainer.train()
print("Fine-tuning complete!")

Starting DistilBERT fine-tuning...




Epoch,Training Loss,Validation Loss
1,0.2834,0.253061
2,0.2174,0.220534
3,0.1605,0.215089




Fine-tuning complete!


In [23]:
model.eval()
with torch.no_grad():
    val_inputs = tokenizer(X_va.tolist(), truncation=True, padding=True, max_length=128, return_tensors='pt')
    outputs = model(**val_inputs)
    logits_distilbert = torch.sigmoid(outputs.logits).cpu().numpy()

ths_distilbert = np.zeros(logits_distilbert.shape[1])

print("Calibrating thresholds for DistilBERT...")
for k in range(logits_distilbert.shape[1]):
    s = logits_distilbert[:, k]
    best_f1, best_t = 0.0, 0.0
    candidates = np.concatenate([
        np.quantile(s, np.linspace(0.01, 0.99, 50)),
        [s.mean(), np.median(s), 0.3, 0.5, 0.7],
    ])
    candidates = np.unique(candidates)
    for t in candidates:
        preds_k = (s >= t).astype(int)
        f1 = f1_score(y_va[:, k], preds_k, zero_division=0)
        if f1 > best_f1:
            best_f1, best_t = f1, t
    ths_distilbert[k] = best_t

pred_distilbert = (logits_distilbert >= ths_distilbert).astype(int)
print(f"DistilBERT - micro-F1: {f1_score(y_va, pred_distilbert, average='micro'):.4f}, macro-F1: {f1_score(y_va, pred_distilbert, average='macro'):.4f}")

Calibrating thresholds for DistilBERT...
DistilBERT - micro-F1: 0.6991, macro-F1: 0.6486
DistilBERT - micro-F1: 0.6991, macro-F1: 0.6486


In [24]:
ensemble_final_logits = 0.4 * logits_distilbert + 0.25 * logits_logreg + 0.2 * logits_xgb + 0.15 * logits_svc
ths_ensemble_final = np.zeros(ensemble_final_logits.shape[1])

print("Calibrating thresholds for FINAL ensemble (with DistilBERT)...")
for k in range(ensemble_final_logits.shape[1]):
    s = ensemble_final_logits[:, k]
    best_f1, best_t = 0.0, 0.0
    candidates = np.concatenate([
        np.quantile(s, np.linspace(0.005, 0.995, 70)),
        [s.mean(), np.median(s), 0.0, 0.3, 0.5, 0.7],
        np.linspace(s.min(), s.max(), 25)
    ])
    candidates = np.unique(candidates)
    for t in candidates:
        preds_k = (s >= t).astype(int)
        f1 = f1_score(y_va[:, k], preds_k, zero_division=0)
        if f1 > best_f1:
            best_f1, best_t = f1, t
    ths_ensemble_final[k] = best_t

pred_ensemble_final = (ensemble_final_logits >= ths_ensemble_final).astype(int)
print(f"FINAL ENSEMBLE - micro-F1: {f1_score(y_va, pred_ensemble_final, average='micro'):.4f}, macro-F1: {f1_score(y_va, pred_ensemble_final, average='macro'):.4f}")

Calibrating thresholds for FINAL ensemble (with DistilBERT)...
FINAL ENSEMBLE - micro-F1: 0.6178, macro-F1: 0.6248
FINAL ENSEMBLE - micro-F1: 0.6178, macro-F1: 0.6248


In [25]:
print("="*70)
print("FINAL PERFORMANCE COMPARISON - ALL MODELS")
print("="*70)
print(f"1. LogReg (TF-IDF+Embed):       micro-F1: {f1_score(y_va, pred_logreg, average='micro'):.4f}, macro-F1: {f1_score(y_va, pred_logreg, average='macro'):.4f}")
print(f"2. XGBoost (Embeddings):        micro-F1: {f1_score(y_va, pred_xgb, average='micro'):.4f}, macro-F1: {f1_score(y_va, pred_xgb, average='macro'):.4f}")
print(f"3. LinearSVC (TF-IDF):          micro-F1: {f1_score(y_va, pred_svc, average='micro'):.4f}, macro-F1: {f1_score(y_va, pred_svc, average='macro'):.4f}")
print(f"4. DistilBERT (Fine-tuned):     micro-F1: {f1_score(y_va, pred_distilbert, average='micro'):.4f}, macro-F1: {f1_score(y_va, pred_distilbert, average='macro'):.4f}")
print(f"5. Ensemble (no DistilBERT):    micro-F1: {f1_score(y_va, pred_ensemble, average='micro'):.4f}, macro-F1: {f1_score(y_va, pred_ensemble, average='macro'):.4f}")
print(f"6. FINAL ENSEMBLE (All 4):      micro-F1: {f1_score(y_va, pred_ensemble_final, average='micro'):.4f}, macro-F1: {f1_score(y_va, pred_ensemble_final, average='macro'):.4f}")
print("="*70)

FINAL PERFORMANCE COMPARISON - ALL MODELS
1. LogReg (TF-IDF+Embed):       micro-F1: 0.5966, macro-F1: 0.6031
2. XGBoost (Embeddings):        micro-F1: 0.6311, macro-F1: 0.5757
3. LinearSVC (TF-IDF):          micro-F1: 0.6256, macro-F1: 0.5597
4. DistilBERT (Fine-tuned):     micro-F1: 0.6991, macro-F1: 0.6486
5. Ensemble (no DistilBERT):    micro-F1: 0.6012, macro-F1: 0.6073
6. FINAL ENSEMBLE (All 4):      micro-F1: 0.6178, macro-F1: 0.6248


In [26]:
model.save_pretrained("./distilbert_model")
tokenizer.save_pretrained("./distilbert_model")
joblib.dump(tfidf_word, "tfidf_word.joblib")
joblib.dump(tfidf_char, "tfidf_char.joblib")
joblib.dump(st_model, "sentence_transformer.joblib")
joblib.dump(clf_logreg, "clf_logreg.joblib")
joblib.dump(clf_xgb, "clf_xgb.joblib")
joblib.dump(clf_svc, "clf_svc.joblib")
with open("labels.json", "w") as f:
    json.dump(mlb.classes_.tolist(), f)
np.save("thresholds_logreg.npy", ths_logreg)
np.save("thresholds_xgb.npy", ths_xgb)
np.save("thresholds_svc.npy", ths_svc)
np.save("thresholds_distilbert.npy", ths_distilbert)
np.save("thresholds_ensemble.npy", ths_ensemble)
np.save("thresholds_ensemble_final.npy", ths_ensemble_final)
print("All models and artifacts saved successfully!")

All models and artifacts saved successfully!


In [27]:
df_test = pd.read_csv(test_dir)
df_test["text"] = df_test["movie_name"].fillna("") + " [SEP] " + df_test["description"].fillna("")

Xw_test = tfidf_word.transform(df_test["text"])
Xc_test = tfidf_char.transform(df_test["text"])
X_test_tfidf = sp_hstack([Xw_test, Xc_test], format="csr")
emb_test = st_model.encode(df_test["text"].tolist(), show_progress_bar=True, batch_size=32)
X_test_combined = sp_hstack([X_test_tfidf, csr_matrix(emb_test)], format="csr")

with torch.no_grad():
    test_inputs = tokenizer(df_test["text"].tolist(), truncation=True, padding=True, max_length=128, return_tensors='pt')
    outputs = model(**test_inputs)
    logits_distilbert_test = torch.sigmoid(outputs.logits).cpu().numpy()

logits_logreg_test = clf_logreg.decision_function(X_test_combined)
pred_proba_xgb_test = clf_xgb.predict_proba(emb_test)
logits_xgb_test = np.column_stack([p[:, 1] for p in pred_proba_xgb_test])
logits_svc_test = clf_svc.decision_function(X_test_tfidf)

ensemble_final_logits_test = 0.4 * logits_distilbert_test + 0.25 * logits_logreg_test + 0.2 * logits_xgb_test + 0.15 * logits_svc_test
pred_test = (ensemble_final_logits_test >= ths_ensemble_final).astype(int)

pred_labels = [", ".join([mlb.classes_[j] for j, v in enumerate(row) if v == 1]) for row in pred_test]
result_df = pd.DataFrame({
    "movie_name": df_test["movie_name"],
    "genre": pred_labels,
    "description": df_test["description"]
})
result_df.to_csv("dataset_test_preds_final.csv", index=False)
print(f"FINAL test predictions saved! Generated {len(result_df)} predictions.")

Batches: 100%|██████████| 30/30 [00:09<00:00,  3.06it/s]



FINAL test predictions saved! Generated 942 predictions.
