In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import chi2_contingency, proportions_ztest

# Load data
df = pd.read_csv("click_rates.csv")

print("Dataset Loaded:")
print(df.head())

# -------------------------------------------------------------------
# 1️⃣ CLEAN + PREPARE THE DATA
# -------------------------------------------------------------------
df["is_click"] = df["Click"].apply(lambda x: 1 if x.lower()=="click" else 0)

summary = df.groupby("Headline").agg(
    clicks = ("Rate", lambda x: x[df["Click"]=="Click"].sum()),
    no_clicks = ("Rate", lambda x: x[df["Click"]=="No-click"].sum())
)

summary["total"] = summary["clicks"] + summary["no_clicks"]
summary["CTR"] = summary["clicks"] / summary["total"]

print("\n=== CTR SUMMARY ===")
print(summary)

# -------------------------------------------------------------------
# 2️⃣ BAR CHART – CTR per Headline
# -------------------------------------------------------------------
plt.figure(figsize=(8,5))
sns.barplot(x=summary.index, y=summary["CTR"], palette="Blues")
plt.title("Click-Through Rate (CTR) by Headline")
plt.ylabel("CTR")
plt.xlabel("Headline")
plt.ylim(0, summary["CTR"].max() * 1.3)
for i, v in enumerate(summary["CTR"]):
    plt.text(i, v+0.005, f"{v:.3f}", ha="center")
plt.show()

# -------------------------------------------------------------------
# 3️⃣ CHI-SQUARE TEST – Are click rates different?
# -------------------------------------------------------------------
chi_data = summary[["clicks", "no_clicks"]].values
chi2, p, dof, expected = chi2_contingency(chi_data)

print("\n=== CHI-SQUARE TEST ===")
print(f"Chi-square = {chi2:.4f}")
print(f"P-value = {p:.6f}")
print("Conclusion:", 
      "SIGNIFICANT difference between headlines" if p<0.05 
      else "No significant difference")

# -------------------------------------------------------------------
# 4️⃣ A/B TEST (Proportion Z-Test) – pairwise comparison
# -------------------------------------------------------------------
print("\n=== PROPORTION Z-TESTS (A/B Tests) ===")
headlines = summary.index.tolist()

for i in range(len(headlines)):
    for j in range(i+1, len(headlines)):
        A = headlines[i]
        B = headlines[j]

        clicks = [summary.loc[A,"clicks"], summary.loc[B,"clicks"]]
        totals = [summary.loc[A,"total"], summary.loc[B,"total"]]

        z, p_val = proportions_ztest(clicks, totals)

        print(f"\n{A} vs {B}")
        print(f"Z = {z:.4f}, p = {p_val:.6f} →",
              "SIGNIFICANT" if p_val<0.05 else "Not significant")

# -------------------------------------------------------------------
# 5️⃣ HEATMAP – Clicks vs No-clicks
# -------------------------------------------------------------------
plt.figure(figsize=(6,5))
sns.heatmap(summary[["clicks","no_clicks"]], annot=True, fmt="d", cmap="YlGnBu")
plt.title("Clicks vs No-clicks Heatmap")
plt.show()
