In [None]:
# # 車種競合分析デモ（model025を例に）

import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency

# --- 関数定義 ---

def load_crosstab(path, sheet=0):
    return pd.read_excel(path, sheet_name=sheet, index_col=0)

def prepare_stats(tbl):
    total = tbl.values.sum()
    row_tot = tbl.sum(axis=1)
    col_tot = tbl.sum(axis=0)
    expected = np.outer(row_tot, col_tot) / total
    return total, row_tot, col_tot, pd.DataFrame(expected, index=tbl.index, columns=tbl.columns)

def one_direction(tbl, A, axis=0, min_cnt=3, min_lift=1.0, alpha=0.1):
    if axis == 0:
        counts = tbl[A]
    else:
        counts = tbl.loc[A]
    counts = counts[counts > 0].copy()
    total, row_tot, col_tot, expected = prepare_stats(tbl)
    out = []
    for B, cnt in counts.items():
        r = tbl.loc[B] if axis == 0 else tbl[B]
        c = tbl[A] if axis == 0 else tbl.loc[A]
        sub = pd.DataFrame([[cnt, c.sum()-cnt], [r.sum()-cnt, total - r.sum() - c.sum() + cnt]])
        chi2, p, *_ = chi2_contingency(sub, correction=False)
        expected_cnt = expected.loc[B, A]
        lift = cnt / expected_cnt if expected_cnt else np.nan
        out.append((B, int(cnt), lift, p))
    res = pd.DataFrame(out, columns=['competitor', 'support', 'lift', 'p_value'])
    return res[(res.support >= min_cnt) & (res.lift >= min_lift) & (res.p_value < alpha)].sort_values(
        ['lift', 'support'], ascending=[False, False]).reset_index(drop=True)

def symmetry_score(tbl, A, B):
    cnt_AB = tbl.loc[B, A]
    cnt_BA = tbl.loc[A, B]
    return min(cnt_AB, cnt_BA) / (max(cnt_AB, cnt_BA) + 1e-6)

def two_hop(tbl, A, tier1, decay=0.7, **kwargs):
    latent = {}
    for B in tier1:
        res1 = one_direction(tbl, B, axis=1, **kwargs)
        for _, row in res1.iterrows():
            if row['competitor'] == A:
                continue
            score = row['lift'] * decay
            latent[row['competitor']] = max(latent.get(row['competitor'], 0), score)
        res2 = one_direction(tbl, B, axis=0, **kwargs)
        for _, row in res2.iterrows():
            if row['competitor'] == A:
                continue
            score = row['lift'] * decay
            latent[row['competitor']] = max(latent.get(row['competitor'], 0), score)
    return pd.DataFrame([{'competitor': k, 'score': v} for k, v in latent.items()]).sort_values(
        'score', ascending=False).reset_index(drop=True)

# --- 分析実行 ---

tbl = load_crosstab("demo_crosstab.xlsx", sheet=0)
target_model = "model025"

tier1 = one_direction(tbl, target_model, axis=0)
tier2 = one_direction(tbl, target_model, axis=1)

mutual = []
for _, row in tier1.iterrows():
    B = row['competitor']
    if B in tier2.competitor.values:
        s = symmetry_score(tbl, target_model, B)
        mutual.append((B, row['support'], row['lift'], s))
mutual_df = pd.DataFrame(mutual, columns=['competitor', 'support', 'lift', 'symmetry'])

latent_df = two_hop(tbl, target_model, mutual_df.competitor.tolist())

# --- 結果表示 ---
mutual_df.head(10), latent_df.head(10)
