In [None]:
"""
UAUM + xQuAD Long-Tail Re-ranking + Popularity Penalty
Full pipeline script: embeddings, sentiment, UAUM, popularity-penalty, xQuAD re-ranking, auto-tune, metrics, save outputs.

Expects: cleaned_dataset.csv with columns: Title, Genre, Description, Director, Cast, Rating, Votes
Outputs: topN_cleaned_imdb_recs.csv, metrics_cleaned_imdb_summary.csv
"""

# Optional installs if using Colab / fresh env:
# !pip install -U sentence-transformers transformers==4.43.3 scikit-learn==1.4.2 pandas numpy tensorflow

import os, re, math, random, sys
from typing import List, Set, Dict, Tuple
import numpy as np
import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import minmax_scale

# Sentence-BERT embeddings
from sentence_transformers import SentenceTransformer

# Sentiment DistilBERT SST-2 (TF)
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
import tensorflow as tf

# ----------------------------
# Parameters (edit as needed)
# ----------------------------
CSV_PATH = "cleaned_dataset.csv"
SEED_TITLES = ["The Dark Knight", "Inception", "Interstellar"]
TOP_K = 20

# Tail control & candidate pool
TAIL_PERCENTILE = 0.2     # bottom 20% votes => Tail
TAIL_RATIO = 0.35         # desired minimum fraction of Tail in final Top-K
POOL_M = 600              # candidate pool size
EXPLORATION_TAIL = 50     # random tail injections
SEED = 42

# Sentiment
SENT_MODEL = "distilbert-base-uncased-finetuned-sst-2-english"
SENT_BATCH = 64
SENT_POS_THRESHOLD = 0.0

# Auto-tuning grids (tiny by default)
DO_AUTOTUNE = True
GRID_ALPHAS = [0.6, 0.7, 0.8]         # UAUM alpha (quality vs unexpectedness)
GRID_W_TFIDF = [0.4, 0.6, 0.8]        # weight TF-IDF vs BERT for similarity
GRID_LAMBDA_XQ = [0.4, 0.5, 0.6]      # xQuAD lambda (diversity mixing)
GRID_GAMMA = [0.0, 0.3, 0.6]          # popularity penalty multiplier
GRID_TAU = [0.25, 0.35]               # tail ratio (hard enforce) - can match TAIL_RATIO

# MISC
RANDOM_STATE = SEED
np.random.seed(RANDOM_STATE)
random.seed(RANDOM_STATE)

# Output files
FINAL_PATH = "topN_cleaned_imdb_recs.csv"
METRICS_PATH = "metrics_cleaned_imdb_summary.csv"

# ----------------------------
# Helpers
# ----------------------------
def to_int_votes(x):
    if pd.isna(x):
        return 0
    if isinstance(x, str):
        x = re.sub(r"[^0-9]", "", x)
        if not x:
            return 0
        return int(x)
    try:
        return int(x)
    except Exception:
        return 0

def softmax_rowwise(logits: np.ndarray):
    e = np.exp(logits - logits.max(axis=1, keepdims=True))
    return e / e.sum(axis=1, keepdims=True)

# Metrics
def precision_at_k(recommended: List[int], relevant: Set[int], k: int) -> float:
    if k == 0: return 0.0
    rec_k = recommended[:k]
    return sum(1 for r in rec_k if r in relevant) / k

def recall_at_k(recommended: List[int], relevant: Set[int], k: int) -> float:
    if not relevant: return 0.0
    rec_k = recommended[:k]
    return sum(1 for r in rec_k if r in relevant) / len(relevant)

def dcg_at_k(recommended: List[int], relevant: Set[int], k: int) -> float:
    dcg = 0.0
    for i, r in enumerate(recommended[:k], start=1):
        rel = 1.0 if r in relevant else 0.0
        dcg += (2**rel - 1) / math.log2(i + 1)
    return dcg

def ndcg_at_k(recommended: List[int], relevant: Set[int], k: int) -> float:
    ideal = min(len(relevant), k)
    if ideal == 0: return 0.0
    idcg = sum((2**1 - 1) / math.log2(i + 1) for i in range(1, ideal + 1))
    return dcg_at_k(recommended, relevant, k) / idcg if idcg > 0 else 0.0

def intra_list_diversity(recommended: List[int], X_dense_or_sparse) -> float:
    if len(recommended) <= 1: return 0.0
    sims = cosine_similarity(X_dense_or_sparse[recommended])
    n = len(recommended)
    iu = np.triu_indices(n, 1)
    mean_sim = float(np.mean(sims[iu])) if len(iu[0]) else 0.0
    return 1.0 - mean_sim

def novelty_bits(recommended: List[int], popularity: np.ndarray) -> float:
    pop = popularity.astype(float)
    total = pop.sum() if pop.sum() > 0 else 1.0
    p = (pop / total)[recommended]
    p = np.clip(p, 1e-12, 1.0)
    return float(np.mean(-np.log2(p)))

def coverage_rate(recommended: List[int], catalog_size: int) -> float:
    return len(set(recommended)) / catalog_size

def serendipity(recommended: List[int], relevant: Set[int], dissim: np.ndarray) -> float:
    vals = [(1.0 if r in relevant else 0.0) * float(dissim[r]) for r in recommended]
    return float(np.mean(vals)) if vals else 0.0

# ----------------------------
# Load data
# ----------------------------
if not os.path.exists(CSV_PATH):
    raise FileNotFoundError(f"{CSV_PATH} not found. Place cleaned_dataset.csv next to this script.")

df = pd.read_csv(CSV_PATH)
required = ["Title", "Genre", "Description", "Rating", "Votes"]
missing = [c for c in required if c not in df.columns]
if missing:
    raise ValueError(f"Missing columns: {missing}")

for c in ["Director", "Cast"]:
    if c not in df.columns: df[c] = ""

df["Votes_clean"] = df["Votes"].apply(to_int_votes)
df["text"] = (
    df["Title"].astype(str) + " \n" +
    df["Genre"].astype(str) + " \n" +
    df["Director"].astype(str) + " \n" +
    df["Cast"].astype(str) + " \n" +
    df["Description"].astype(str)
)

N = len(df)
print(f"Loaded {N} items")

# ----------------------------
# Embeddings: TF-IDF + SBERT
# ----------------------------
print("Building TF-IDF vectors …")
vectorizer = TfidfVectorizer(lowercase=True, strip_accents='unicode', ngram_range=(1,2), max_features=120_000)
X_tfidf = vectorizer.fit_transform(df["text"].fillna(""))

print("Encoding Sentence-BERT embeddings (all-MiniLM-L6-v2) …")
bert_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
X_bert = bert_model.encode(df["text"].tolist(), batch_size=128, convert_to_numpy=True, normalize_embeddings=True)
X_bert = np.asarray(X_bert)

# ----------------------------
# User profile from SEED_TITLES
# ----------------------------
seed_idx = []
for t in SEED_TITLES:
    mask = df["Title"].str.contains(re.escape(t), case=False, na=False)
    seed_idx.extend(df.index[mask].tolist())
seed_idx = sorted(set(seed_idx))
if not seed_idx:
    raise ValueError("None of SEED_TITLES matched titles in the dataset. Please edit SEED_TITLES.")

# profile embeddings
profile_tfidf = X_tfidf[seed_idx].mean(axis=0)
profile_tfidf = np.asarray(profile_tfidf).reshape(1, -1)
profile_bert = X_bert[seed_idx].mean(axis=0)
profile_bert = profile_bert / (np.linalg.norm(profile_bert) + 1e-12)
profile_bert = profile_bert.reshape(1, -1)

# ----------------------------
# Similarities: TF-IDF & BERT
# ----------------------------
print("Computing TF-IDF and BERT similarities to profile …")
sim_tfidf = cosine_similarity(X_tfidf, profile_tfidf).ravel()
sim_bert = cosine_similarity(X_bert, profile_bert).ravel()

# ----------------------------
# Sentiment scoring (DistilBERT SST-2)
# ----------------------------
print("Scoring sentiment with DistilBERT … (this may download a model)")
tok = AutoTokenizer.from_pretrained(SENT_MODEL)
sent_mdl = TFAutoModelForSequenceClassification.from_pretrained(SENT_MODEL, from_pt=True)

texts = df["Description"].astype(str).tolist()
sent_scores = []
for i in range(0, len(texts), SENT_BATCH):
    batch = texts[i:i+SENT_BATCH]
    enc = tok(batch, truncation=True, padding=True, max_length=160, return_tensors='tf')
    logits = sent_mdl(enc).logits.numpy()
    probs = softmax_rowwise(logits)
    pos = probs[:,1]; neg = probs[:,0]
    sent_scores.extend((pos - neg).tolist())
sent_scores = np.array(sent_scores)

# ----------------------------
# Popularity & aspects
# ----------------------------
votes = df["Votes_clean"].astype(int).values
pop_norm = votes / (votes.max() if votes.max() > 0 else 1)
# define Head/Mid/Tail by quantiles (Tail = <= q20, Head >= q80)
q_low = np.quantile(votes, TAIL_PERCENTILE)
q_high = np.quantile(votes, 1.0 - TAIL_PERCENTILE)
def aspect_label(v):
    if v <= q_low:
        return "Tail"
    elif v >= q_high:
        return "Head"
    else:
        return "Mid"
aspect = np.array([aspect_label(v) for v in votes])

# dissimilarity (hybrid)
dissimilarity = 1.0 - 0.5 * (sim_tfidf + sim_bert)

# ----------------------------
# UAUM / Base score (using initial W_TFIDF etc. but these will vary during auto-tune)
# We'll implement functions so we can recompute for different params in grid search.
# ----------------------------
def compute_hybrid_similarity(sim_tfidf_arr, sim_bert_arr, w_tfidf: float):
    """weighted linear combination (not min-maxed here)"""
    return w_tfidf * sim_tfidf_arr + (1.0 - w_tfidf) * sim_bert_arr

def pred_rating_proxy_from_sim(sim_h):
    return 5.0 * (sim_h - sim_h.min()) / (sim_h.max() - sim_h.min() + 1e-8)

def popularity_penalty_function(pop_norm_arr, votes_arr, method="log"):
    """
    returns an array penalty in [0, +inf). We'll use f(pop) to be added * gamma.
    method: "linear" uses pop_norm, "log" uses log(1+votes), then minmax scale
    """
    if method == "linear":
        return pop_norm_arr.copy()
    elif method == "log":
        raw = np.log1p(votes_arr.astype(float))
        # min-max scale to [0,1]
        return (raw - raw.min()) / (raw.max() - raw.min() + 1e-12)
    else:
        return pop_norm_arr.copy()

def compute_uaum_and_base(alpha: float, w_tfidf: float, gamma: float, pop_pen_method="log"):
    """
    Returns arrays:
      - uaum: UAUM(i)
      - base: UAUM(i) - gamma * pop_penalty(i)
      - pred_proxy, quality, unexpectedness
    """
    sim_h = compute_hybrid_similarity(sim_tfidf, sim_bert, w_tfidf)
    pred_proxy = pred_rating_proxy_from_sim(sim_h)
    quality = pred_proxy * (1.0 + sent_scores)    # quality with sentiment uplift
    unexpectedness = dissimilarity * (1.0 - pop_norm)
    uaum = alpha * quality + (1.0 - alpha) * unexpectedness
    pop_pen = popularity_penalty_function(pop_norm, votes, method=pop_pen_method)
    base = uaum - gamma * pop_pen
    return uaum, base, pred_proxy, quality, unexpectedness, pop_pen

# ----------------------------
# Filtering & Candidate generation
# ----------------------------
print("Applying filters: Rating>6 & Positive Sentiment …")
rating_vals = pd.to_numeric(df["Rating"], errors='coerce').fillna(0).values
mask_rating = rating_vals > 6.0
mask_sent = sent_scores > SENT_POS_THRESHOLD
mask_base = mask_rating & mask_sent

# Remove seed items from candidates
mask_seed = np.ones(len(df), dtype=bool)
mask_seed[seed_idx] = False

# we'll use this helper to produce pool and re-rank later given params
def build_candidate_pool(base_scores: np.ndarray, candidates_mask: np.ndarray, pool_m=POOL_M, exploration_tail=EXPLORATION_TAIL):
    candidates = np.where(candidates_mask & mask_seed)[0].tolist()

    # inject random tail items for exploration
    tail_indices = np.where(aspect == "Tail")[0]
    explore = []
    if len(tail_indices) > 0 and exploration_tail > 0:
        nsel = min(exploration_tail, len(tail_indices))
        explore = np.random.choice(tail_indices, size=nsel, replace=False).tolist()

    cand_union = sorted(set(candidates + explore))
    order = np.argsort(-base_scores[cand_union])
    topM = [cand_union[i] for i in order[:min(pool_m, len(order))]]
    return topM

# ----------------------------
# xQuAD re-ranking (aspects = Head/Mid/Tail)
# ----------------------------
def xquad_rerank(pool_items: List[int], base_scores: np.ndarray, k: int, lambda_xq: float,
                 aspect_arr: np.ndarray, P_aspect_user: Dict[str, float] = None):
    """
    Greedy xQuAD re-ranking for aspects {Head, Mid, Tail}.
    - pool_items: candidate item indices (ordered or unordered)
    - base_scores: array indexed by item -> relevance score
    - k: desired final length
    - lambda_xq: mixing parameter between relevance and aspect-coverage
    - aspect_arr: array mapping each item index to aspect label ("Head","Mid","Tail")
    - P_aspect_user: dict of aspect priors; if None, uniform but can upweight Tail
    """
    if P_aspect_user is None:
        P_aspect_user = {"Head": 0.33, "Mid": 0.33, "Tail": 0.34}

    S = []
    coverage = {a: 0.0 for a in P_aspect_user.keys()}  # fraction coverage
    pool_set = set(pool_items)
    # Precompute P(i|a) as deterministic one-hot
    def P_i_given_a(item, a):
        return 1.0 if aspect_arr[item] == a else 0.0

    while len(S) < k and pool_set:
        best_item, best_score = None, -1e18
        for i in pool_set:
            rel = base_scores[i]
            # expected coverage gain = sum_a P(a|user) * (1 - coverage_a) * P(i|a)
            coverage_boost = 0.0
            for a in P_aspect_user:
                coverage_boost += P_aspect_user[a] * (1.0 - coverage[a]) * P_i_given_a(i, a)
            score = (1.0 - lambda_xq) * rel + lambda_xq * coverage_boost
            if score > best_score:
                best_item, best_score = i, score
        if best_item is None:
            break
        S.append(best_item)
        pool_set.remove(best_item)
        # update coverage: simple fraction of selected items per aspect
        cnt = len(S)
        counts = {a: sum(1 for it in S if aspect_arr[it] == a) for a in coverage}
        for a in coverage:
            coverage[a] = counts[a] / cnt if cnt > 0 else 0.0
    return S

# ----------------------------
# Hard tail ratio enforcement
# ----------------------------
def enforce_tail_ratio(selected_list: List[int], base_scores: np.ndarray,
                       pool_items: List[int], min_tail_frac: float):
    """
    Ensure at least ceil(min_tail_frac * K) tail items in selected_list by replacing low-base head items.
    """
    K = len(selected_list)
    min_tail = int(math.ceil(min_tail_frac * K))
    cur_tail = [i for i in selected_list if aspect[i] == "Tail"]
    if len(cur_tail) >= min_tail:
        return selected_list  # ok

    deficit = min_tail - len(cur_tail)
    # pool_tail: tail items in pool not already selected, sorted by base desc
    pool_tail = [i for i in pool_items if aspect[i] == "Tail" and i not in selected_list]
    pool_tail_sorted = sorted(pool_tail, key=lambda idx: -base_scores[idx])
    if not pool_tail_sorted:
        return selected_list  # nothing to do

    # head_positions: positions of head items in selected_list sorted by ascending base (replace worst)
    head_positions = [(pos, it) for pos, it in enumerate(selected_list) if aspect[it] == "Head"]
    head_positions_sorted = sorted(head_positions, key=lambda tpl: base_scores[tpl[1]])  # smallest base first
    replacements = min(deficit, len(pool_tail_sorted), len(head_positions_sorted))
    if replacements <= 0:
        return selected_list
    for r in range(replacements):
        pos_to_replace = head_positions_sorted[r][0]
        new_item = pool_tail_sorted[r]
        selected_list[pos_to_replace] = new_item
    return selected_list

# ----------------------------
# Recommender pipeline wrapper (given hyperparams)
# ----------------------------
def recommend_with_params(alpha: float, w_tfidf: float, gamma: float,
                          lambda_xq: float, tau: float,
                          pool_m: int = POOL_M, exploration_tail: int = EXPLORATION_TAIL,
                          pop_pen_method="log"):
    """
    Returns: recommended_list (length TOP_K)
    """
    uaum, base, pred_proxy, quality, unexpectedness, pop_pen = compute_uaum_and_base(alpha, w_tfidf, gamma, pop_pen_method)
    # candidate mask: rating>6 & positive sentiment
    candidates_mask = mask_base.copy()
    # remove seeds already by mask_seed when building pool
    pool = build_candidate_pool(base, candidates_mask, pool_m, exploration_tail)
    if len(pool) == 0:
        return []

    # xQuAD re-rank
    # set P(aspect|user): we can upweight Tail slightly to encourage its selection
    P_aspect_user = {"Head": 0.25, "Mid": 0.40, "Tail": 0.35}
    selected = xquad_rerank(pool, base, TOP_K, lambda_xq=lambda_xq, aspect_arr=aspect, P_aspect_user=P_aspect_user)

    # enforce tail ratio hard constraint
    selected_final = enforce_tail_ratio(selected, base, pool, min_tail_frac=tau)
    # If selected_final has fewer than TOP_K (because pool small), pad with highest base from pool not selected
    if len(selected_final) < TOP_K:
        remaining = [i for i in pool if i not in selected_final]
        remaining_sorted = sorted(remaining, key=lambda idx: -base[idx])
        to_add = remaining_sorted[:(TOP_K - len(selected_final))]
        selected_final.extend(to_add)
    return selected_final

# ----------------------------
# Surrogate ground truth (for tuning): top-50 by hybrid sim_h & rating>6
# We'll define relevant indices to compute recall/precision for autotune objective
# ----------------------------
# Use current default weights for initial ground truth sim_h (w=0.5)
sim_h_initial = compute_hybrid_similarity(sim_tfidf, sim_bert, 0.5)
sim_hybrid = sim_h_initial  # save
rel_mask = mask_rating  # rating>6 as relevant flag
# choose top-50 by hybrid sim (and rating>6)
sorted_by_sim = np.argsort(-sim_hybrid)
top50 = [i for i in sorted_by_sim if rel_mask[i]][:50]
relevant_indices = set(top50)

# ----------------------------
# Auto-tuning (grid search)
# ----------------------------
best_params = {"alpha": 0.7, "w_tfidf": 0.6, "gamma": 0.0, "lambda_xq": 0.5, "tau": TAIL_RATIO}
best_obj = -1e18
best_list = []
if DO_AUTOTUNE:
    print("Auto-tuning hyperparameters …")
    for a in GRID_ALPHAS:
        for w in GRID_W_TFIDF:
            for lam in GRID_LAMBDA_XQ:
                for g in GRID_GAMMA:
                    for tau in GRID_TAU:
                        recs = recommend_with_params(alpha=a, w_tfidf=w, gamma=g,
                                                     lambda_xq=lam, tau=tau,
                                                     pool_m=POOL_M, exploration_tail=EXPLORATION_TAIL)
                        if not recs:
                            continue
                        prec = precision_at_k(recs, relevant_indices, TOP_K)
                        rec = recall_at_k(recs, relevant_indices, TOP_K)
                        ndcg = ndcg_at_k(recs, relevant_indices, TOP_K)
                        ild = intra_list_diversity(recs, X_tfidf)
                        nov = novelty_bits(recs, votes)
                        cov = coverage_rate(recs, len(df))
                        # dissimilarity for serendipity (use previously computed dissimilarity)
                        ser = serendipity(recs, relevant_indices, dissimilarity)
                        # objective: recall + 0.25*coverage + 0.25*serendipity
                        obj = rec + 0.25 * cov + 0.25 * ser
                        if obj > best_obj:
                            best_obj = obj
                            best_params = {"alpha": a, "w_tfidf": w, "gamma": g, "lambda_xq": lam, "tau": tau}
                            best_list = recs
    print("Auto-tune finished. Best params:", best_params)
else:
    best_list = recommend_with_params(alpha=best_params["alpha"], w_tfidf=best_params["w_tfidf"],
                                      gamma=best_params["gamma"], lambda_xq=best_params["lambda_xq"],
                                      tau=best_params["tau"], pool_m=POOL_M, exploration_tail=EXPLORATION_TAIL)

# ----------------------------
# Final evaluation using best params
# ----------------------------
final_recs = best_list
if not final_recs:
    # fallback: run once with defaults
    final_recs = recommend_with_params(alpha=best_params["alpha"], w_tfidf=best_params["w_tfidf"],
                                       gamma=best_params["gamma"], lambda_xq=best_params["lambda_xq"],
                                       tau=best_params["tau"], pool_m=POOL_M, exploration_tail=EXPLORATION_TAIL)

prec = precision_at_k(final_recs, relevant_indices, TOP_K)
recall = recall_at_k(final_recs, relevant_indices, TOP_K)
ndcg = ndcg_at_k(final_recs, relevant_indices, TOP_K)
ild = intra_list_diversity(final_recs, X_tfidf)
nov = novelty_bits(final_recs, votes)
cov = coverage_rate(final_recs, len(df))
ser = serendipity(final_recs, relevant_indices, dissimilarity)
sent_mean = float(np.mean(sent_scores[final_recs])) if final_recs else 0.0
sent_var = float(np.var(sent_scores[final_recs])) if final_recs else 0.0

print("\n=== Recommendation Metrics (Final) ===")
print(f"Precision@{TOP_K}: {prec:.4f}")
print(f"Recall@{TOP_K}: {recall:.4f}")
print(f"NDCG@{TOP_K}: {ndcg:.4f}")
print(f"Intra-list Diversity (ILD): {ild:.4f}")
print(f"Novelty (avg −log2 p): {nov:.4f}")
print(f"Coverage: {cov:.4f}")
print(f"Serendipity: {ser:.4f}")
print(f"Sentiment mean: {sent_mean:.4f}, variance: {sent_var:.6f}")
print("Best params ->", best_params)

# ----------------------------
# Save outputs (final CSV + metrics)
# ----------------------------
final_df = df.loc[final_recs].copy()
final_df = final_df.reset_index(drop=False).rename(columns={"index": "item_index"})

# recompute base scores using best params to store
alpha_b = best_params["alpha"]
w_tfidf_b = best_params["w_tfidf"]
gamma_b = best_params.get("gamma", 0.0)
uaum_b, base_b, pred_proxy_b, quality_b, unexp_b, pop_pen_b = compute_uaum_and_base(alpha_b, w_tfidf_b, gamma_b)

final_df["FinalScore"] = base_b[final_recs]
final_df["UAUM"] = uaum_b[final_recs]
final_df["PredRatingProxy"] = pred_proxy_b[final_recs]
final_df["Quality"] = quality_b[final_recs]
final_df["Unexpectedness"] = unexp_b[final_recs]
final_df["SentimentScore"] = sent_scores[final_recs]
final_df["PopularityNorm"] = pop_norm[final_recs]
final_df["Aspect"] = aspect[final_recs]

final_df.to_csv(FINAL_PATH, index=False)

metrics = {
    "Precision@K": prec,
    "Recall@K": recall,
    "NDCG@K": ndcg,
    "ILD": ild,
    "Novelty": nov,
    "Coverage": cov,
    "Serendipity": ser,
    "SentimentMean": sent_mean,
    "SentimentVar": sent_var,
    "Best_ALPHA": alpha_b,
    "Best_W_TFIDF": w_tfidf_b,
    "Best_GAMMA": gamma_b,
    "Best_LAMBDA_XQ": best_params.get("lambda_xq"),
    "Best_TAU": best_params.get("tau"),
}
pd.DataFrame([metrics]).to_csv(METRICS_PATH, index=False)

print(f"\nSaved recommendations -> {FINAL_PATH}")
print(f"Saved metrics summary -> {METRICS_PATH}")


Loaded 9482 items
Building TF-IDF vectors …
Encoding Sentence-BERT embeddings (all-MiniLM-L6-v2) …
Computing TF-IDF and BERT similarities to profile …
Scoring sentiment with DistilBERT … (this may download a model)


pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFDistilBertForSequenceClassification.

All the weights of TFDistilBertForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.
TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.


Applying filters: Rating>6 & Positive Sentiment …
Auto-tuning hyperparameters …
Auto-tune finished. Best params: {'alpha': 0.8, 'w_tfidf': 0.4, 'gamma': 0.0, 'lambda_xq': 0.4, 'tau': 0.25}

=== Recommendation Metrics (Final) ===
Precision@20: 0.7000
Recall@20: 0.2800
NDCG@20: 0.7852
Intra-list Diversity (ILD): 0.9835
Novelty (avg −log2 p): 13.1379
Coverage: 0.0021
Serendipity: 0.4568
Sentiment mean: 0.9591, variance: 0.007542
Best params -> {'alpha': 0.8, 'w_tfidf': 0.4, 'gamma': 0.0, 'lambda_xq': 0.4, 'tau': 0.25}

Saved recommendations -> topN_cleaned_imdb_recs.csv
Saved metrics summary -> metrics_cleaned_imdb_summary.csv
