In [1]:
import pandas as pd
import numpy as np

# ============================================
# 0. 데이터 로드
# ============================================
file_path = "problem_data_final.xlsx"

reward = pd.read_excel(file_path, sheet_name="Reward", parse_dates=["ts"])
trade = pd.read_excel(file_path, sheet_name="Trade", parse_dates=["ts"])
ip_df = pd.read_excel(file_path, sheet_name="IP")

# 필요하면 Funding, Spec도 같이 읽을 수 있음
# funding = pd.read_excel(file_path, sheet_name="Funding", parse_dates=["ts"])
# spec = pd.read_excel(file_path, sheet_name="Spec", parse_dates=["day"])

# ============================================
# 1. Reward 요약 지표 생성
# ============================================
reward_summary = reward.groupby("account_id").agg(
    total_reward=("reward_amount", "sum"),
    reward_count=("reward_amount", "count"),
    first_reward_ts=("ts", "min"),
    last_reward_ts=("ts", "max"),
)

# ============================================
# 2. Trade 요약 지표 생성
# ============================================
trade_summary = trade.groupby("account_id").agg(
    trade_count=("symbol", "count"),
    first_trade_ts=("ts", "min"),
    last_trade_ts=("ts", "max"),
    traded_symbols=("symbol", pd.Series.nunique),
    total_notional=("amount", "sum"),
)

# ============================================
# 3. Reward와 Trade를 account 기준으로 결합
#    (Reward가 있는 계정만 분석 대상으로 삼음)
# ============================================
summary = reward_summary.join(trade_summary, how="left")

# 거래가 전혀 없는 계정이 있을 경우를 대비한 처리
summary["trade_count"] = summary["trade_count"].fillna(0)
summary["total_notional"] = summary["total_notional"].fillna(0)

# ============================================
# 4. 계정별 IP 통계 생성
#    - n_ips: 해당 계정이 사용한 서로 다른 IP 개수
#    - max_accounts_sharing_ip: 계정이 사용한 IP 중,
#      그 IP를 공유하는 계정 수의 최댓값
# ============================================
# IP별로 몇 개의 계정이 붙어있는지 계산
ip_accounts_per_ip = ip_df.groupby("ip")["account_id"].nunique().reset_index(name="accounts_on_ip")

# 원래 IP 테이블에 붙임
ip_with_counts = ip_df.merge(ip_accounts_per_ip, on="ip", how="left")

# account_id 기준으로 요약
acc_ip_stats = ip_with_counts.groupby("account_id").agg(
    n_ips=("ip", "nunique"),
    max_accounts_sharing_ip=("accounts_on_ip", "max"),
)

# summary에 결합
summary = summary.join(acc_ip_stats, how="left")

# IP 정보가 전혀 없을 경우 기본값 설정
summary["n_ips"] = summary["n_ips"].fillna(0)
summary["max_accounts_sharing_ip"] = summary["max_accounts_sharing_ip"].fillna(1)

# ============================================
# 5. 파생 지표 계산
#    - reward_per_trade: 거래 1건당 리워드
#    - reward_per_notional: 거래 금액 대비 리워드 비율
# ============================================
summary["reward_per_trade"] = summary["total_reward"] / summary["trade_count"].replace(0, np.nan)
summary["reward_per_trade"] = summary["reward_per_trade"].fillna(0)

summary["reward_per_notional"] = np.where(
    summary["total_notional"] > 0,
    summary["total_reward"] / summary["total_notional"],
    0.0
)

# ============================================
# 6. 정규화(normalization) 작업
#    각 지표를 0~1 범위로 스케일링하여 점수 계산에 사용
# ============================================

# 6-1. total_reward 정규화
max_total_reward = summary["total_reward"].max()
summary["nr_total_reward"] = summary["total_reward"] / max_total_reward if max_total_reward > 0 else 0

# 6-2. trade_count 역정규화 (거래가 적을수록 점수 높게)
tc_min = summary["trade_count"].min()

tc_max = summary["trade_count"].max()
if tc_max > tc_min:
    summary["nr_low_trade"] = 1 - (summary["trade_count"] - tc_min) / (tc_max - tc_min)
else:
    summary["nr_low_trade"] = 0

# 6-3. reward_per_notional 정규화
max_rpn = summary["reward_per_notional"].max()
summary["nr_reward_per_notional"] = summary["reward_per_notional"] / max_rpn if max_rpn > 0 else 0

# 6-4. 동일 IP 공유 정도 정규화
#     max_accounts_sharing_ip 가 1이면 혼자 쓰는 IP, 값이 클수록 여러 계정이 공유
ip_min = 1
ip_max = summary["max_accounts_sharing_ip"].max()
if ip_max > ip_min:
    summary["nr_ip_shared"] = (summary["max_accounts_sharing_ip"] - ip_min) / (ip_max - ip_min)
else:
    summary["nr_ip_shared"] = 0

# ============================================
# 7. 보너스 노림 점수 계산
#    bonus_farming_score = w1*보상크기 + w2*거래적음 + w3*보상/거래금액비율 + w4*IP공유정도
# ============================================

w1, w2, w3, w4 = 0.3, 0.3, 0.25, 0.15

summary["bonus_farming_score"] = (
    w1 * summary["nr_total_reward"] +
    w2 * summary["nr_low_trade"] +
    w3 * summary["nr_reward_per_notional"] +
    w4 * summary["nr_ip_shared"]
)

# ============================================
# 8. 결과 정렬 및 의심 계정 상위 N개 확인
# ============================================
summary_sorted = summary.sort_values("bonus_farming_score", ascending=False)

# 상위 20개 계정만 출력
result_cols = [
    "total_reward", "reward_count",
    "trade_count", "total_notional",
    "n_ips", "max_accounts_sharing_ip",
    "reward_per_trade", "reward_per_notional",
    "nr_total_reward", "nr_low_trade",
    "nr_reward_per_notional", "nr_ip_shared",
    "bonus_farming_score",
]
summary_sorted[result_cols].head(20)


Unnamed: 0_level_0,total_reward,reward_count,trade_count,total_notional,n_ips,max_accounts_sharing_ip,reward_per_trade,reward_per_notional,nr_total_reward,nr_low_trade,nr_reward_per_notional,nr_ip_shared,bonus_farming_score
account_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
A_55021b4ae2,534.89877,5,38,159350.0,5,1,14.076283,0.003356755,0.3016,1.0,1.0,0.0,0.64048
A_c91db6cabf,534.9121,5,46,168867.7,8,1,11.628524,0.00316764,0.301608,0.998095,0.943661,0.0,0.625826
A_f96ede8d34,796.028294,22,802,13604940.0,16,2,0.992554,5.851023e-05,0.448837,0.818095,0.017431,1.0,0.534437
A_ebdb869fd3,159.991358,3,1169,130990.1,177,2,0.136862,0.0012214,0.09021,0.730714,0.363863,1.0,0.487243
A_d444580218,336.895867,10,440,10030830.0,4,2,0.765672,3.358606e-05,0.189957,0.904286,0.010006,1.0,0.480774
A_48fbd03d61,106.0,4,170,6671425.0,26,2,0.623529,1.588866e-05,0.059768,0.968571,0.004733,1.0,0.459685
A_cab220ef3c,1773.536296,8,2183,349100200.0,26,1,0.812431,5.080308e-06,1.0,0.489286,0.001513,0.0,0.447164
A_8860be39b3,10.0,1,239,63380270.0,19,2,0.041841,1.577778e-07,0.005638,0.952143,4.7e-05,1.0,0.437346
A_26ffea8fd9,243.54482,8,1036,48695940.0,96,2,0.235082,5.001337e-06,0.137322,0.762381,0.00149,1.0,0.420283
A_e27beda014,13.943506,3,752,3080857.0,24,2,0.018542,4.525853e-06,0.007862,0.83,0.001348,1.0,0.401696
