# Functional Testing

In [4]:
# Functional testing for the Content Matching & Cold-Start modules
import pandas as pd
import numpy as np
# --------------------
# Load data
# --------------------
path = "Content_Reco.xlsx"
xls = pd.ExcelFile(path)
user = pd.read_excel(xls, "user")
physio = pd.read_excel(xls, "physio")
gA = pd.read_excel(xls, "GroupA_Rank")
gB = pd.read_excel(xls, "GroupB_Rank")
gC = pd.read_excel(xls, "GroupC_Rank")
top50 = pd.read_excel(xls, "top50_match_rank")
cold10 = pd.read_excel(xls, "cold_start_top10")

# Basic checks
sheets_ok = set(xls.sheet_names) >= {"user","physio","GroupA_Rank","GroupB_Rank","GroupC_Rank","top50_match_rank","cold_start_top10"}

# --------------------
# Helper: split specialties for matching
# --------------------
def specialties_overlap(user_spec, trainer_spec):
    if pd.isna(user_spec) or pd.isna(trainer_spec):
        return 0
    us = [s.strip().lower() for s in str(user_spec).split(",")]
    ts = [s.strip().lower() for s in str(trainer_spec).split(",")]
    return int(len(set(us) & set(ts)) > 0)

# Recompute raw_score for each (user, trainer) pair present in top50
# Join necessary attributes
u_cols = ["user_id","gender","city","preferred_specialties","available_time","age_group"]
p_cols = ["trainer_id","gender","city","specialities","available_time","serve_age_group"]
u = user[u_cols].rename(columns={"gender":"u_gender","city":"u_city","preferred_specialties":"u_specs","available_time":"u_time","age_group":"u_age"})
p = physio[p_cols].rename(columns={"gender":"p_gender","city":"p_city","specialities":"p_specs","available_time":"p_time","serve_age_group":"p_age"})

tp = top50.merge(u, on="user_id", how="left").merge(p, on="trainer_id", how="left")

# Compute component matches
comp = pd.DataFrame({
    "gender_m": (tp["u_gender"] == tp["p_gender"]).astype(int),
    "city_m": (tp["u_city"] == tp["p_city"]).astype(int),
    "time_m": (tp["u_time"] == tp["p_time"]).astype(int),
    "age_m": (tp["u_age"] == tp["p_age"]).astype(int),
})

spec_m = tp.apply(lambda r: specialties_overlap(r["u_specs"], r["p_specs"]), axis=1)
comp["spec_m"] = spec_m.values

# Raw score and match_score recompute
tp["raw_recalc"] = comp.sum(axis=1)
tp["match_recalc"] = tp["raw_recalc"] / 7.0

# --------------------
# 1.1 Flow correctness checks
# --------------------

# (a) Compare recalculated match_score with stored one
tp["abs_diff_match"] = (tp["match_recalc"] - tp["match_score"]).abs()
match_score_agree_ratio = (tp["abs_diff_match"] < 1e-6).mean()

# (b) For each user, confirm top50 size (<=50) and monotonic non-increasing by match_score then rank
grp = tp.sort_values(["user_id","match_score","rank"], ascending=[True, False, True]).groupby("user_id")
size_ok_ratio = (grp.size() <= 50).mean()
# Monotonic check: within each user, match_score should be non-increasing with rank
def is_monotonic(group):
    g = group.sort_values("rank")
    return (g["match_score"].diff().fillna(0) <= 1e-9).all()
monotonic_ratio = grp.apply(is_monotonic).mean()

# (c) Verify that cold_start_top10 trainers have final_score merged from the right group table
# Build trainer->(group, final_score) map
gA["group"] = "A"; gB["group"] = "B"; gC["group"] = "C"
trainer_info = pd.concat([gA[["trainer_id","final_score","group"]],
                          gB[["trainer_id","final_score","group"]],
                          gC[["trainer_id","final_score","group"]]], ignore_index=True)

c10 = cold10.merge(trainer_info, on="trainer_id", how="left", suffixes=("","_from_group"))
# check final_score consistency (allow rounding to 3 decimals per your script)
fs_consistent_ratio = (c10["final_score"].round(3) == c10["final_score_from_group"].round(3)).mean()

# (d) Verify Top10 per user count == 10
c10_counts = cold10.groupby("user_id").size()
top10_count_ok_ratio = (c10_counts == 10).mean()

# (e) Verify group composition rule (A:4, B:3, C:3) where possible; allow fallback when不足
def comp_ok(sub):
    counts = sub["group"].value_counts()
    # accept either exact 4/3/3 or any composition summing to 10 when某组不足
    return (counts.get("A",0), counts.get("B",0), counts.get("C",0))
comp_counts = cold10.groupby("user_id").apply(comp_ok)
comp_counts_df = comp_counts.apply(pd.Series).rename(columns={0:"A",1:"B",2:"C"})
display(comp_counts_df.reset_index())

# --------------------
# 1.2 Fault case diagnostics
# --------------------

# Users with fewer than 50 candidates
user_top50_counts = tp.groupby("user_id").size()
users_lt50 = user_top50_counts[user_top50_counts < 50]
users_lt50_summary = pd.DataFrame({"user_id": users_lt50.index, "top50_count": users_lt50.values}).sort_values("top50_count")
display(users_lt50_summary)

# Pairs with zero match_score in top50 (should be rare)
zero_match_rows = tp[tp["match_score"] <= 1e-9][["user_id","trainer_id","match_score","rank","u_city","p_city","u_specs","p_specs","u_time","p_time","u_age","p_age"]].head(50)
display(zero_match_rows)

# Missing features in user/physio and their impact
missing_user = user.isna().sum()
missing_physio = physio.isna().sum()
missing_summary = pd.DataFrame({"user_missing": missing_user, "physio_missing": missing_physio})
display(missing_summary.reset_index().rename(columns={"index":"field"}))

# Does group dominance happen? check group distribution overall in cold10
group_dist = cold10["group"].value_counts(normalize=True).rename("share")
display(group_dist.reset_index().rename(columns={"index":"group"}))

# --------------------
# 1.3 Example anomaly explanation
# Find top-ranked pairs where component matches are low (<=2/7) but recommended rank is high (<=3)
c10_detail = cold10.merge(u, on="user_id", how="left").merge(p, on="trainer_id", how="left")
def comp_score_row(r):
    return int(r["u_gender"]==r["p_gender"]) + int(r["u_city"]==r["p_city"]) + int(r["u_time"]==r["p_time"]) + \
           specialties_overlap(r["u_specs"], r["p_specs"]) + int(r["u_age"]==r["p_age"])
# compute content-only components for cold start top3 per user
c10_top3 = c10_detail[c10_detail["recommend_rank"]<=3].copy()
c10_top3["content_components"] = c10_top3.apply(comp_score_row, axis=1)
# pick anomalies: high rank (1-3) but content_components <= 2
anomalies = c10_top3[c10_top3["content_components"] <= 2][
    ["user_id","trainer_id","recommend_rank","match_score","final_score","cold_start_score","group",
     "u_city","p_city","u_time","p_time","u_specs","p_specs","u_age","p_age","content_components"]
].head(20)
display(anomalies)

# --------------------
# Pack summary metrics for quick reference
summary_metrics = {
    "sheets_ok": bool(sheets_ok),
    "match_score_agree_ratio": float(match_score_agree_ratio),
    "top50_size_ok_ratio": float(size_ok_ratio),
    "top50_monotonic_ratio": float(monotonic_ratio),
    "final_score_consistency_ratio": float(fs_consistent_ratio),
    "top10_count_ok_ratio": float(top10_count_ok_ratio),
    "num_users_lt50": int(users_lt50.shape[0]),
    "overall_group_share": group_dist.to_dict()
}
summary_metrics


  monotonic_ratio = grp.apply(is_monotonic).mean()
  comp_counts = cold10.groupby("user_id").apply(comp_ok)


Unnamed: 0,user_id,A,B,C
0,U0000,4,3,3
1,U0001,4,3,3
2,U0002,4,3,3
3,U0003,4,3,3
4,U0004,4,3,3
...,...,...,...,...
995,U0995,4,3,3
996,U0996,4,3,3
997,U0997,4,3,3
998,U0998,4,3,3


Unnamed: 0,user_id,top50_count


Unnamed: 0,user_id,trainer_id,match_score,rank,u_city,p_city,u_specs,p_specs,u_time,p_time,u_age,p_age


Unnamed: 0,field,user_missing,physio_missing
0,age_group,0.0,
1,available_time,0.0,0.0
2,city,0.0,0.0
3,gender,0.0,0.0
4,name,,0.0
5,preferred_specialties,0.0,
6,serve_age_group,,0.0
7,specialities,,0.0
8,trainer_id,,0.0
9,user_id,0.0,


Unnamed: 0,group,share
0,A,0.4
1,C,0.3
2,B,0.3


Unnamed: 0,user_id,trainer_id,recommend_rank,match_score,final_score,cold_start_score,group,u_city,p_city,u_time,p_time,u_specs,p_specs,u_age,p_age,content_components
5181,U0518,754,2,0.571,0.795,0.659,B,Bath,Bath,Evening,Morning,"Elderly Fitness, Flexibility, Powerlifting","Elderly Fitness, Powerlifting, Flexibility",elderly,youth,2
7632,U0763,7,3,0.571,0.766,0.636,B,Oxford,Bristol,Morning,Afternoon,"Sports-Specific, Prenatal Fitness, Functional ...","Prenatal Fitness, Functional Training, Sports-...",middle-aged,elderly,2
9201,U0920,700,2,0.571,0.822,0.68,C,Bath,London,Evening,Afternoon,"Pilates, Weight Loss, Prenatal Fitness","Pilates, Weight Loss, Prenatal Fitness",youth,elderly,2


{'sheets_ok': True,
 'match_score_agree_ratio': 0.81898,
 'top50_size_ok_ratio': 1.0,
 'top50_monotonic_ratio': 1.0,
 'final_score_consistency_ratio': 1.0,
 'top10_count_ok_ratio': 1.0,
 'num_users_lt50': 0,
 'overall_group_share': {'A': 0.4, 'C': 0.3, 'B': 0.3}}

# Behaviroual Testing

In [7]:
# Behavioural testing for the Content Matching & Cold-Start modules
import pandas as pd
import numpy as np
from itertools import combinations

# --------------------
# Load data
# --------------------
path = "Content_Reco.xlsx"
xls = pd.ExcelFile(path)
user = pd.read_excel(xls, "user")
physio = pd.read_excel(xls, "physio")
gA = pd.read_excel(xls, "GroupA_Rank")
gB = pd.read_excel(xls, "GroupB_Rank")
gC = pd.read_excel(xls, "GroupC_Rank")
top50 = pd.read_excel(xls, "top50_match_rank")
cold10 = pd.read_excel(xls, "cold_start_top10")

# --------------------
# Helpers
# --------------------
def split_list(x):
    if pd.isna(x):
        return []
    return [s.strip().lower() for s in str(x).split(",") if s.strip()]

def specialties_overlap(a, b):
    sa, sb = set(split_list(a)), set(split_list(b))
    return int(len(sa & sb) > 0)

def content_components(row):
    # five components: gender, city, time, specialities, age group
    c = 0
    c += int(row["u_gender"] == row["p_gender"])
    c += int(row["u_city"] == row["p_city"])
    c += int(row["u_time"] == row["p_time"])
    c += specialties_overlap(row["u_specs"], row["p_specs"])
    c += int(row["u_age"] == row["p_age"])
    return c

def jaccard_distance(a, b):
    sa, sb = set(split_list(a)), set(split_list(b))
    if not sa and not sb:
        return 0.0
    return 1.0 - (len(sa & sb) / max(1, len(sa | sb)))

def intra_list_diversity(df_items, col_specs):
    pairs = list(combinations(df_items.index, 2))
    if not pairs:
        return np.nan
    dists = []
    for i, j in pairs:
        dists.append(jaccard_distance(df_items.loc[i, col_specs], df_items.loc[j, col_specs]))
    return float(np.mean(dists))

def group_entropy(values):
    vc = values.value_counts(normalize=True)
    return float(-(vc * np.log(vc + 1e-12)).sum())

# --------------------
# Prepare merged views
# --------------------
u = user.rename(columns={"gender":"u_gender","city":"u_city","preferred_specialties":"u_specs",
                         "available_time":"u_time","age_group":"u_age"})
p = physio.rename(columns={"gender":"p_gender","city":"p_city","specialities":"p_specs",
                           "available_time":"p_time","serve_age_group":"p_age"})

# Trainer group/final_score map
gA["group"] = "A"; gB["group"] = "B"; gC["group"] = "C"
trainer_group = pd.concat([gA[["trainer_id","final_score","group"]],
                           gB[["trainer_id","final_score","group"]],
                           gC[["trainer_id","final_score","group"]]], ignore_index=True)

# Build content top10 per user from top50 (by match_score desc then rank asc)
top50_sorted = top50.sort_values(["user_id","match_score","rank"], ascending=[True, False, True])
content_top10 = top50_sorted.groupby("user_id").head(10).copy()
content_top10 = content_top10.merge(trainer_group, on="trainer_id", how="left")
content_top10 = content_top10.merge(u[["user_id","u_gender","u_city","u_specs","u_time","u_age"]], on="user_id", how="left")
content_top10 = content_top10.merge(p[["trainer_id","p_gender","p_city","p_specs","p_time","p_age"]], on="trainer_id", how="left")

# Cold-start top10 enriched
cold10_enriched = cold10.merge(u[["user_id","u_gender","u_city","u_specs","u_time","u_age"]], on="user_id", how="left")
cold10_enriched = cold10_enriched.merge(p[["trainer_id","p_gender","p_city","p_specs","p_time","p_age"]], on="trainer_id", how="left")

# --------------------
# 1) Accuracy proxy: Precision@10 using content-components>=3 as "relevant"
# --------------------
content_top10["components"] = content_top10.apply(content_components, axis=1)
cold10_enriched["components"] = cold10_enriched.apply(content_components, axis=1)

# relevant if >=3 of 5 match
content_rel = content_top10.assign(relevant=(content_top10["components"]>=5).astype(int))
cold_rel = cold10_enriched.assign(relevant=(cold10_enriched["components"]>=5).astype(int))

prec_content = content_rel.groupby("user_id")["relevant"].mean().rename("p_at_10_content")
prec_cold = cold_rel.groupby("user_id")["relevant"].mean().rename("p_at_10_cold")

prec_compare = pd.concat([prec_content, prec_cold], axis=1)
prec_compare["delta"] = prec_compare["p_at_10_cold"] - prec_compare["p_at_10_content"]

# --------------------
# 2) Diversity: intra-list diversity by specialties (Jaccard distance), and group-entropy
# --------------------
div_content = content_top10.groupby("user_id").apply(lambda g: intra_list_diversity(g, "p_specs")).rename("ild_content")
div_cold = cold10_enriched.groupby("user_id").apply(lambda g: intra_list_diversity(g, "p_specs")).rename("ild_cold")

ent_content = content_top10.groupby("user_id")["group"].apply(group_entropy).rename("entropy_content")
ent_cold = cold10_enriched.groupby("user_id")["group"].apply(group_entropy).rename("entropy_cold")

div_compare = pd.concat([div_content, div_cold, ent_content, ent_cold], axis=1)
div_compare["ild_delta"] = div_compare["ild_cold"] - div_compare["ild_content"]
div_compare["ent_delta"] = div_compare["entropy_cold"] - div_compare["entropy_content"]

# --------------------
# 3) Fairness: overall and average per-user group share before/after
# --------------------
def group_share(df):
    return df["group"].value_counts(normalize=True)

overall_share_content = group_share(content_top10).rename("content_share")
overall_share_cold = group_share(cold10_enriched).rename("cold_share")
overall_share = pd.concat([overall_share_content, overall_share_cold], axis=1).fillna(0.0)

# per-user average shares
per_user_share_content = content_top10.pivot_table(index="user_id", columns="group", values="trainer_id", aggfunc="count").fillna(0)/10.0
per_user_share_cold = cold10_enriched.pivot_table(index="user_id", columns="group", values="trainer_id", aggfunc="count").fillna(0)/10.0
avg_share = pd.DataFrame({
    "avg_content_A": per_user_share_content.get("A", pd.Series(0,index=per_user_share_content.index)).mean(),
    "avg_content_B": per_user_share_content.get("B", pd.Series(0,index=per_user_share_content.index)).mean(),
    "avg_content_C": per_user_share_content.get("C", pd.Series(0,index=per_user_share_content.index)).mean(),
    "avg_cold_A": per_user_share_cold.get("A", pd.Series(0,index=per_user_share_cold.index)).mean(),
    "avg_cold_B": per_user_share_cold.get("B", pd.Series(0,index=per_user_share_cold.index)).mean(),
    "avg_cold_C": per_user_share_cold.get("C", pd.Series(0,index=per_user_share_cold.index)).mean(),
}, index=[0])

# --------------------
# 4) Swap analysis: how cold-start trades off match_score vs final_score
# --------------------
# For each user, compute the difference in average match_score and average final_score between cold10 and content top10
avg_content = content_top10.groupby("user_id").agg(avg_match=("match_score","mean"), avg_final=("final_score","mean")).rename(columns={"avg_match":"avg_match_content","avg_final":"avg_final_content"})
avg_cold = cold10_enriched.groupby("user_id").agg(avg_match=("match_score","mean"), avg_final=("final_score","mean")).rename(columns={"avg_match":"avg_match_cold","avg_final":"avg_final_cold"})
swap_summary = avg_content.join(avg_cold)
swap_summary["delta_match"] = swap_summary["avg_match_cold"] - swap_summary["avg_match_content"]
swap_summary["delta_final"] = swap_summary["avg_final_cold"] - swap_summary["avg_final_content"]

# pick users with largest positive delta_final and negative delta_match (classic trade-off)
swap_cases = swap_summary.sort_values(["delta_final","delta_match"], ascending=[False, True]).head(20).reset_index()

# --------------------
# 5) Aggregate summaries for the paper
# --------------------
paper_stats = {
    "precision_content_mean": float(prec_compare["p_at_10_content"].mean()),
    "precision_cold_mean": float(prec_compare["p_at_10_cold"].mean()),
    "precision_delta_mean": float(prec_compare["delta"].mean()),
    "ild_content_mean": float(div_compare["ild_content"].mean()),
    "ild_cold_mean": float(div_compare["ild_cold"].mean()),
    "ild_delta_mean": float(div_compare["ild_delta"].mean()),
    "ent_content_mean": float(div_compare["entropy_content"].mean()),
    "ent_cold_mean": float(div_compare["entropy_cold"].mean()),
    "ent_delta_mean": float(div_compare["ent_delta"].mean()),
}

# --------------------
# Display key tables
# --------------------
display(prec_compare.reset_index())
display(div_compare.reset_index())
display(overall_share.reset_index().rename(columns={"index":"group"}))
display(avg_share)
display(swap_cases)

paper_stats


  div_content = content_top10.groupby("user_id").apply(lambda g: intra_list_diversity(g, "p_specs")).rename("ild_content")
  div_cold = cold10_enriched.groupby("user_id").apply(lambda g: intra_list_diversity(g, "p_specs")).rename("ild_cold")


Unnamed: 0,user_id,p_at_10_content,p_at_10_cold,delta
0,U0000,0.1,0.0,-0.1
1,U0001,0.5,0.2,-0.3
2,U0002,0.7,0.2,-0.5
3,U0003,0.3,0.2,-0.1
4,U0004,0.1,0.0,-0.1
...,...,...,...,...
995,U0995,0.5,0.2,-0.3
996,U0996,0.4,0.1,-0.3
997,U0997,0.3,0.2,-0.1
998,U0998,0.3,0.2,-0.1


Unnamed: 0,user_id,ild_content,ild_cold,entropy_content,entropy_cold,ild_delta,ent_delta
0,U0000,0.819630,0.810370,1.054920,1.0889,-0.009259,0.033980
1,U0001,0.847407,0.848148,1.088900,1.0889,0.000741,0.000000
2,U0002,0.810000,0.795926,1.029653,1.0889,-0.014074,0.059247
3,U0003,0.758148,0.826667,0.897946,1.0889,0.068519,0.190954
4,U0004,0.836667,0.862593,1.029653,1.0889,0.025926,0.059247
...,...,...,...,...,...,...,...
995,U0995,0.726296,0.681852,1.029653,1.0889,-0.044444,0.059247
996,U0996,0.683333,0.784074,0.943348,1.0889,0.100741,0.145552
997,U0997,0.880370,0.857037,1.088900,1.0889,-0.023333,0.000000
998,U0998,0.734074,0.798519,1.029653,1.0889,0.064444,0.059247


Unnamed: 0,group,content_share,cold_share
0,A,0.3454,0.4
1,B,0.3375,0.3
2,C,0.3171,0.3


Unnamed: 0,avg_content_A,avg_content_B,avg_content_C,avg_cold_A,avg_cold_B,avg_cold_C
0,0.3454,0.3375,0.3171,0.4,0.3,0.3


Unnamed: 0,user_id,avg_match_content,avg_final_content,avg_match_cold,avg_final_cold,delta_match,delta_final
0,U0259,0.614286,0.5275,0.571,0.7541,-0.043286,0.2266
1,U0835,0.714286,0.5802,0.5996,0.8065,-0.114686,0.2263
2,U0440,0.614286,0.5626,0.571,0.7839,-0.043286,0.2213
3,U0816,0.628571,0.5523,0.5853,0.7717,-0.043271,0.2194
4,U0142,0.628571,0.5858,0.571,0.8022,-0.057571,0.2164
5,U0746,0.628571,0.5858,0.571,0.8022,-0.057571,0.2164
6,U0332,0.6,0.5447,0.5853,0.7598,-0.0147,0.2151
7,U0506,0.614286,0.569,0.5996,0.7721,-0.014686,0.2031
8,U0065,0.7,0.558,0.6139,0.7588,-0.0861,0.2008
9,U0751,0.685714,0.572,0.5996,0.7713,-0.086114,0.1993


{'precision_content_mean': 0.3992,
 'precision_cold_mean': 0.20540000000000003,
 'precision_delta_mean': -0.1938,
 'ild_content_mean': 0.7957896296296296,
 'ild_cold_mean': 0.8302896296296296,
 'ild_delta_mean': 0.03449999999999999,
 'ent_content_mean': 0.9871843796379561,
 'ent_cold_mean': 1.088899975342224,
 'ent_delta_mean': 0.10171559570426773}

# Decision Evaluation

In [2]:
import pandas as pd
import numpy as np
from itertools import combinations
from IPython.display import display

# --------------------
# Load data
# --------------------
path = "Content_Reco.xlsx"
xls = pd.ExcelFile(path)
user = pd.read_excel(xls, "user")
physio = pd.read_excel(xls, "physio")
gA = pd.read_excel(xls, "GroupA_Rank")
gB = pd.read_excel(xls, "GroupB_Rank")
gC = pd.read_excel(xls, "GroupC_Rank")
top50 = pd.read_excel(xls, "top50_match_rank")
cold10 = pd.read_excel(xls, "cold_start_top10")

# --------------------
# Build trainer info: final_score + group + specialities
# --------------------
gA = gA.copy(); gA["group"] = "A"
gB = gB.copy(); gB["group"] = "B"
gC = gC.copy(); gC["group"] = "C"

trainer_group = pd.concat(
    [
        gA[["trainer_id", "final_score", "group"]],
        gB[["trainer_id", "final_score", "group"]],
        gC[["trainer_id", "final_score", "group"]],
    ],
    ignore_index=True
)

trainer_specs = physio[["trainer_id", "specialities"]]
trainer_info = trainer_group.merge(trainer_specs, on="trainer_id", how="left")

# Normalize final_score (for manual weighted composite)
fs_min, fs_max = trainer_info["final_score"].min(), trainer_info["final_score"].max()
trainer_info["final_score_norm"] = (trainer_info["final_score"] - fs_min) / (fs_max - fs_min + 1e-12)

# Merge candidate pool with trainer info
cand = top50.merge(trainer_info, on="trainer_id", how="left")

# --------------------
# Utility functions: harmonize columns & safe metric computation
# --------------------
def harmonize_columns(df):
    """Unify column names and fill missing values; return a copy."""
    out = df.copy()
    # group
    if "group" not in out.columns:
        gx = out.get("group_x"); gy = out.get("group_y")
        if gx is not None or gy is not None:
            out["group"] = (gy if gy is not None else pd.Series(index=out.index)).combine_first(
                gx if gx is not None else pd.Series(index=out.index)
            )
        else:
            out["group"] = "Unknown"
    out["group"] = out["group"].fillna("Unknown")

    # final_score
    if "final_score" not in out.columns:
        fx = out.get("final_score_x"); fy = out.get("final_score_y")
        if fx is not None or fy is not None:
            out["final_score"] = (fx if fx is not None else pd.Series(index=out.index)).combine_first(
                fy if fy is not None else pd.Series(index=out.index)
            )

    # specialities
    if "specialities" not in out.columns:
        out["specialities"] = ""

    # Keep user_id / trainer_id; if match_score is missing, downstream metrics handle it
    return out

THRESH = 4.0/7.0  # ≈ 0.5714

def precision_at_10(df):
    """Compute Precision@10 if match_score exists, else return NaN."""
    if "match_score" not in df.columns:
        return np.nan
    return df.groupby("user_id").apply(lambda g: (g["match_score"] >= THRESH).mean()).mean()

def ild_by_specs(df):
    """Compute ILD based on Jaccard distance of specialities."""
    def ild_one(g):
        specs = g["specialities"].fillna("").astype(str).tolist()
        pairs = list(combinations(range(len(specs)), 2))
        if not pairs:
            return np.nan
        dists = []
        for i, j in pairs:
            sa = set([s.strip().lower() for s in specs[i].split(",") if s.strip()])
            sb = set([s.strip().lower() for s in specs[j].split(",") if s.strip()])
            if not sa and not sb:
                d = 0.0
            else:
                inter = len(sa & sb)
                union = len(sa | sb) if len(sa | sb) > 0 else 1
                d = 1.0 - inter / union
            dists.append(d)
        return float(np.mean(dists))
    return df.groupby("user_id").apply(ild_one).mean()

def fairness_entropy(df):
    """Compute entropy of group distribution for each user's recommendations."""
    if "group" not in df.columns:
        df = harmonize_columns(df)
    def ent_one(g):
        if "group" not in g.columns:
            return np.nan
        p = g["group"].value_counts(normalize=True)
        return float(-(p * np.log(p + 1e-12)).sum())
    return df.groupby("user_id").apply(ent_one).mean()

def overall_group_share(df):
    """Compute overall group share."""
    if "group" not in df.columns:
        df = harmonize_columns(df)
    return df["group"].value_counts(normalize=True).rename("share")

# --------------------
# Define four recommendation schemes
# --------------------
# A) Pure content: top10 by match_score
content_top10 = (
    cand.sort_values(["user_id", "match_score", "rank"], ascending=[True, False, True])
        .groupby("user_id")
        .head(10)
)
content_top10 = harmonize_columns(content_top10)

# B) Manual weighting: 0.6 * match_score + 0.4 * final_score_norm
cand_manual = cand.copy()
if "match_score" in cand_manual.columns:
    cand_manual["manual_score"] = 0.6 * cand_manual["match_score"] + 0.4 * cand_manual["final_score_norm"]
else:
    cand_manual["manual_score"] = cand_manual["final_score_norm"]
manual_top10 = (
    cand_manual.sort_values(["user_id", "manual_score", "rank"], ascending=[True, False, True])
        .groupby("user_id")
        .head(10)
)
manual_top10 = harmonize_columns(manual_top10)

# C) ML proxy: top10 by final_score only
cand_ml = cand.copy()
if "final_score" not in cand_ml.columns:
    cand_ml = harmonize_columns(cand_ml)
ml_top10 = (
    cand_ml.sort_values(["user_id", "final_score", "rank"], ascending=[True, False, True])
        .groupby("user_id")
        .head(10)
)
ml_top10 = harmonize_columns(ml_top10)

# D) Current method: cold_start_top10 merged with trainer_info
cold10_enriched = cold10.merge(trainer_info, on="trainer_id", how="left")
cold10_enriched = harmonize_columns(cold10_enriched)

# --------------------
# (Optional) further cleanup: unify *_x / *_y columns
# --------------------
for col_base in ["group", "final_score"]:
    x_col, y_col = f"{col_base}_x", f"{col_base}_y"
    if x_col in cold10_enriched.columns or y_col in cold10_enriched.columns:
        cold10_enriched[col_base] = cold10_enriched.get(x_col, pd.Series(index=cold10_enriched.index)).where(
            cold10_enriched.get(x_col, pd.Series(index=cold10_enriched.index)).notna(),
            cold10_enriched.get(y_col)
        )
        for c in [x_col, y_col]:
            if c in cold10_enriched.columns:
                cold10_enriched.drop(columns=c, inplace=True)

# --------------------
# Compute metrics for the four schemes
# --------------------
schemes = {
    "Content_Top10": content_top10,
    "Manual_Top10(0.6*match+0.4*final_norm)": manual_top10,
    "ML_Top10(final_score)": ml_top10,
    "ColdStart_Top10(0.5/0.5)": cold10_enriched
}

rows = []
group_shares = []
for name, df_s in schemes.items():
    # Ensure required columns exist
    df_s = harmonize_columns(df_s)

    rows.append({
        "Scheme": name,
        "Precision@10(threshold=4/7)": precision_at_10(df_s),
        "ILD@10(specialities_diversity)": ild_by_specs(df_s),
        "FairnessEntropy(group_balance)": fairness_entropy(df_s)
    })
    gs = overall_group_share(df_s).reset_index().rename(columns={"index": "group", "share": name})
    group_shares.append(gs)

results_df = pd.DataFrame(rows).sort_values("Scheme").reset_index(drop=True)

# Combine group share tables
group_share_df = group_shares[0]
for gs in group_shares[1:]:
    group_share_df = group_share_df.merge(gs, on="group", how="outer")
group_share_df = group_share_df.fillna(0.0)

# --------------------
# Display results
# --------------------
display(results_df)
display(group_share_df)

# Export dictionary form if needed (for logging/export)
_ = results_df.to_dict(orient="records")


  return df.groupby("user_id").apply(lambda g: (g["match_score"] >= THRESH).mean()).mean()
  return df.groupby("user_id").apply(ild_one).mean()
  return df.groupby("user_id").apply(ent_one).mean()
  return df.groupby("user_id").apply(lambda g: (g["match_score"] >= THRESH).mean()).mean()
  return df.groupby("user_id").apply(ild_one).mean()
  return df.groupby("user_id").apply(ent_one).mean()
  return df.groupby("user_id").apply(lambda g: (g["match_score"] >= THRESH).mean()).mean()
  return df.groupby("user_id").apply(ild_one).mean()
  return df.groupby("user_id").apply(ent_one).mean()
  return df.groupby("user_id").apply(lambda g: (g["match_score"] >= THRESH).mean()).mean()
  return df.groupby("user_id").apply(ild_one).mean()
  return df.groupby("user_id").apply(ent_one).mean()


Unnamed: 0,Scheme,Precision@10(threshold=4/7),ILD@10(specialities_diversity),FairnessEntropy(group_balance)
0,ColdStart_Top10(0.5/0.5),0.3311,0.83029,1.0889
1,Content_Top10,0.6518,0.79579,0.987184
2,ML_Top10(final_score),0.1385,0.855446,0.992179
3,Manual_Top10(0.6*match+0.4*final_norm),0.4418,0.818936,0.986562


Unnamed: 0,group,Content_Top10,Manual_Top10(0.6*match+0.4*final_norm),ML_Top10(final_score),ColdStart_Top10(0.5/0.5)
0,A,0.3454,0.3517,0.3296,0.4
1,B,0.3375,0.3365,0.3461,0.3
2,C,0.3171,0.3118,0.3243,0.3
