In [1]:
from pathlib import Path
import pandas as pd
import glob
import numpy as np

import os, glob, json, math
from collections import defaultdict, Counter

import matplotlib.pyplot as plt

pd.set_option("display.max_rows", 50)
pd.set_option("display.max_columns", 200)
pd.set_option("display.width", 140)

# Для графиков: по правилам — только matplotlib, один график на ячейку, без явных цветов.

BASE = Path("data")                # если ты запускаешь из корня проекта
TX_DIR = BASE / "transactions"     # папка из твоего скрина

clients = pd.read_csv(BASE / "clients.csv")
clients.columns = [c.strip() for c in clients.columns]

# 1) Транзакции (покупки)
tx_files = sorted(glob.glob(str(TX_DIR / "client_*_transactions_3m.csv")))
tx_frames = [pd.read_csv(p, sep=None, engine="python") for p in tx_files]  # авто-определение разделителя
tx = pd.concat(tx_frames, ignore_index=True) if tx_frames else pd.DataFrame()
tx.columns = [c.strip() for c in tx.columns]

# 2) Переводы (in/out)
tr_files = sorted(glob.glob(str(TX_DIR / "client_*_transfers_3m.csv")))
tr_frames = [pd.read_csv(p, sep=None, engine="python") for p in tr_files]
tr = pd.concat(tr_frames, ignore_index=True) if tr_frames else pd.DataFrame()
tr.columns = [c.strip() for c in tr.columns]

tx["date"] = pd.to_datetime(tx.get("date"), errors="coerce")
tr["date"] = pd.to_datetime(tr.get("date"), errors="coerce")

for df in (tx, tr):
    if "amount" in df.columns:
        df["amount"] = pd.to_numeric(df["amount"], errors="coerce").fillna(0.0)
    for col in ("category","currency","type","direction"):
        if col in df.columns:
            df[col] = df[col].fillna("").astype(str)

# Валютные курсы (оффлайн дефолты). При наличии проекта можно читать exchange_rates.json.
rates = {"KZT":1.0, "USD":500.0, "EUR":540.0, "RUB":5.0}
def to_kzt(amount, currency):
    try:
        r = rates.get(str(currency).upper(), 1.0)
        return float(amount) * float(r)
    except Exception:
        return 0.0

tx["amount"] = pd.to_numeric(tx.get("amount", 0), errors="coerce").fillna(0.0)
tx["amount_kzt"] = tx.apply(lambda r: to_kzt(r["amount"], r["currency"]), axis=1)

# tx
tx["date"] = pd.to_datetime(tx.get("date"), errors="coerce")
for col in ("category","currency","client_code"):
    if col in tx.columns:
        tx[col] = tx[col].fillna("").astype(str)
tx["amount"] = pd.to_numeric(tx.get("amount", 0.0), errors="coerce").fillna(0.0)
tx["amount_kzt"] = tx.apply(lambda r: to_kzt(r["amount"], r["currency"]), axis=1)
tx["ym"] = tx["date"].dt.to_period("M")

# tr
tr["date"] = pd.to_datetime(tr.get("date"), errors="coerce")
for col in ("type","direction","currency","client_code"):
    if col in tr.columns:
        tr[col] = tr[col].fillna("").astype(str)
tr["amount"] = pd.to_numeric(tr.get("amount", 0.0), errors="coerce").fillna(0.0)
tr["amount_kzt"] = tr.apply(lambda r: to_kzt(r["amount"], r["currency"]), axis=1)
tr["ym"] = tr["date"].dt.to_period("M")


In [2]:
# === DROP-IN FIX: нормализация колонок и ключа ===
import re

def normalize_cols(df: pd.DataFrame) -> pd.DataFrame:
    # нижний регистр, срез пробелов, пробелы и «экзотика» → подчеркивания
    mapping = {c: re.sub(r'[^a-z0-9_]', '_', c.strip().lower()) for c in df.columns}
    return df.rename(columns=mapping)

def ensure_client_code(df: pd.DataFrame, df_name="df") -> pd.DataFrame:
    if "client_code" in df.columns:
        return df
    # пытаемся угадать поле клиента
    cand = [c for c in df.columns if "client" in c and ("code" in c or "id" in c)]
    if cand:
        return df.rename(columns={cand[0]: "client_code"})
    raise KeyError(f"{df_name}: не нашли колонку client_code (после нормализации: {list(df.columns)})")

# 1) нормализуем названия колонок
clients = normalize_cols(clients)
tx      = normalize_cols(tx)
tr      = normalize_cols(tr)

# 2) убеждаемся, что есть client_code (подхватываем альтернативные названия)
clients = ensure_client_code(clients, "clients")
if not tx.empty:
    tx = ensure_client_code(tx, "tx")
if not tr.empty:
    tr = ensure_client_code(tr, "tr")

# 3) приводим ключ к строке везде (чтобы не было несоответствия int vs str)
clients["client_code"] = clients["client_code"].astype(str)
if "client_code" in tx.columns:
    tx["client_code"] = tx["client_code"].astype(str)
if "client_code" in tr.columns:
    tr["client_code"] = tr["client_code"].astype(str)

# 4) на всякий случай проверим наличие критичных полей
for df, name in [(tx,"tx"), (tr,"tr")]:
    if df.empty: 
        continue
    for c in ["date","amount","currency"]:
        if c not in df.columns:
            print(f"[WARN] {name}: нет колонки {c} — проверь входные файлы")

print("OK cols:", clients.columns.tolist()[:8], "...", " | tx:", list(tx.columns)[:8], " | tr:", list(tr.columns)[:8])


tx_groups = {str(k): g for k, g in (tx.groupby("client_code") if not tx.empty else [])}
tr_groups = {str(k): g for k, g in (tr.groupby("client_code") if not tr.empty else [])}

ONLINE = {"Едим дома","Смотрим дома","Играем дома"}
TRAVEL = {"Путешествия","Отели","Такси"}

def monthly_totals(df, col="amount_kzt"):
    if df.empty: return {}
    tmp = df.copy()
    tmp["ym"] = tmp["date"].dt.to_period("M")
    s = tmp.groupby("ym")[col].sum()
    return {str(k): float(v) for k, v in s.items()}

def get_num_from_row(row, *aliases, default=0.0):
    """Достаёт число по одному из возможных имён колонки (безопасно).
       Чистит пробелы/нецифры, поддерживает ',' как десятичный разделитель."""
    for key in aliases:
        if key in row and pd.notna(row[key]):
            s = str(row[key]).replace('\xa0', '').replace(' ', '')
            s = re.sub(r'[^0-9,.\-]', '', s).replace(',', '.')
            try:
                return float(s)
            except Exception:
                pass
    return float(default)

rows = []
for _, p in clients.iterrows():
    code = str(p.get("client_code"))
    df_tx = tx_groups.get(code, pd.DataFrame(columns=tx.columns))
    df_tr = tr_groups.get(code, pd.DataFrame(columns=tr.columns))

    # Покупки
    if df_tx.empty:
        spend_cat = {}; total_spend = 0.0
        fx_share_tx = 0.0
        months_tx = {}
    else:
        spend_cat = df_tx.groupby("category")["amount_kzt"].sum().to_dict()
        total_spend = sum(spend_cat.values())
        tot = df_tx["amount_kzt"].sum()
        fx_amt = df_tx.loc[df_tx["currency"].str.upper().ne("KZT"), "amount_kzt"].sum()
        fx_share_tx = (fx_amt/tot) if tot>0 else 0.0
        months_tx = monthly_totals(df_tx)

    online_share = (sum(spend_cat.get(c,0.0) for c in ONLINE)/total_spend) if total_spend>0 else 0.0
    travel_share = (sum(spend_cat.get(c,0.0) for c in TRAVEL)/total_spend) if total_spend>0 else 0.0

    # Переводы
    if df_tr.empty:
        in_sum = out_sum = 0.0
        salary_in = stipend_in = cashback_in = refund_in = 0.0
        p2p_out = card_out = atm_withdrawal = utilities_out = 0.0
        loan_payment_out = cc_repayment_out = installment_payment_out = 0.0
        invest_in = invest_out = deposit_topup_out = 0.0
        gold_buy_out = gold_sell_in = 0.0
        months_tr_in = months_tr_out = {}
    else:
        in_sum  = df_tr.loc[df_tr["direction"].eq("in"), "amount_kzt"].sum()
        out_sum = df_tr.loc[df_tr["direction"].eq("out"), "amount_kzt"].sum()
        # ключевые типы
        def s(tp): return df_tr.loc[df_tr["type"].eq(tp), "amount_kzt"].sum()
        salary_in = s("salary_in"); stipend_in = s("stipend_in")
        cashback_in = s("cashback_in"); refund_in = s("refund_in")
        p2p_out = s("p2p_out"); card_out = s("card_out"); atm_withdrawal = s("atm_withdrawal")
        utilities_out = s("utilities_out"); loan_payment_out = s("loan_payment_out")
        cc_repayment_out = s("cc_repayment_out"); installment_payment_out = s("installment_payment_out")
        invest_in = s("invest_in"); invest_out = s("invest_out")
        deposit_topup_out = s("deposit_topup_out")
        gold_buy_out = s("gold_buy_out"); gold_sell_in = s("gold_sell_in")

        months_tr_in  = monthly_totals(df_tr.loc[df_tr["direction"].eq("in")])
        months_tr_out = monthly_totals(df_tr.loc[df_tr["direction"].eq("out")])
    
    # summary row
    rows.append({
        "client_code": code,
        "status": str(p.get("status","")),
        "age": float(p.get("age", np.nan)) if pd.notna(p.get("age", np.nan)) else np.nan,
        "avg_balance": float(p.get("avg_monthly_balance_kzt", 0.0) or 0.0),

        "total_spend_3m": float(total_spend),
        "share_online": float(online_share),
        "share_travel": float(travel_share),
        "fx_share_tx": float(fx_share_tx),
        "months_seen_tx": int(len(months_tx)),
        "month_spend_mean": float(np.mean(list(months_tx.values()))) if months_tx else 0.0,
        "month_spend_std":  float(np.std(list(months_tx.values()))) if months_tx else 0.0,

        "transfers_in_3m": float(in_sum),
        "transfers_out_3m": float(out_sum),
        "net_flow_3m": float(in_sum - out_sum),

        "salary_in": float(salary_in),
        "stipend_in": float(stipend_in),
        "cashback_in": float(cashback_in),
        "refund_in": float(refund_in),

        "p2p_out": float(p2p_out),
        "card_out": float(card_out),
        "atm_withdrawal": float(atm_withdrawal),
        "utilities_out": float(utilities_out),
        "loan_payment_out": float(loan_payment_out),
        "cc_repayment_out": float(cc_repayment_out),
        "installment_payment_out": float(installment_payment_out),

        "invest_in": float(invest_in),
        "invest_out": float(invest_out),
        "deposit_topup_out": float(deposit_topup_out),
        "gold_buy_out": float(gold_buy_out),
        "gold_sell_in": float(gold_sell_in),
    })

feat = pd.DataFrame(rows)
feat = feat.merge(clients[["client_code","city"]], on="client_code", how="left")
feat.head(10)


OK cols: ['client_code', 'name', 'status', 'age', 'city', 'avg_monthly_balance_kzt'] ...  | tx: ['client_code', 'name', 'product', 'status', 'city', 'date', 'category', 'amount']  | tr: ['client_code', 'name', 'product', 'status', 'city', 'date', 'type', 'direction']


Unnamed: 0,client_code,status,age,avg_balance,total_spend_3m,share_online,share_travel,fx_share_tx,months_seen_tx,month_spend_mean,month_spend_std,transfers_in_3m,transfers_out_3m,net_flow_3m,salary_in,stipend_in,cashback_in,refund_in,p2p_out,card_out,atm_withdrawal,utilities_out,loan_payment_out,cc_repayment_out,installment_payment_out,invest_in,invest_out,deposit_topup_out,gold_buy_out,gold_sell_in,city
0,1,Зарплатный клиент,29.0,92643.0,2626914.27,0.184009,0.244409,0.0,3,875638.1,172513.468114,1875090.64,6722190.4,-4847099.76,1446421.73,0.0,138273.34,114471.5,1274129.26,3829951.76,617946.71,492152.41,508010.26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Алматы
1,2,Премиальный клиент,41.0,1577073.0,2623272.32,0.180276,0.166927,0.0,3,874424.1,89349.195422,1646273.33,6636050.47,-4989777.14,1278082.01,0.0,107537.19,55248.95,1305706.44,3652000.64,775789.21,396920.77,505633.41,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Астана
2,3,Студент,22.0,63116.0,2272999.63,0.198575,0.320537,0.280208,3,757666.5,251780.894628,690885.17,3526821.01,-2835935.84,0.0,124207.58,187247.61,60804.2,496377.33,1315353.95,759624.87,395640.68,559824.18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Алматы
3,4,Зарплатный клиент,36.0,83351.0,2540993.42,0.177756,0.241687,0.0,3,846997.8,165135.314667,1786681.16,6103937.34,-4317256.18,1372775.52,0.0,107111.63,61478.54,1170717.9,3312986.71,634755.85,453960.22,531516.66,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Караганда
4,5,Премиальный клиент,45.0,1336536.0,2703671.0,0.199707,0.237067,0.0,3,901223.7,182854.492842,1746521.4,6200126.04,-4453604.64,1262119.01,0.0,126590.18,74928.97,1261804.77,3344845.96,590493.11,454938.05,548044.15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Алматы
5,6,Стандартный клиент,34.0,131929.0,2670256.87,0.159337,0.220022,0.0,3,890085.6,131759.369281,1828401.34,6432809.25,-4604407.91,1378299.83,0.0,109520.75,60443.57,1331551.75,3619748.11,629865.51,370190.08,481453.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Шымкент
6,7,Премиальный клиент,48.0,4040997.0,2701821.89,0.178182,0.087017,0.0,3,900607.3,162336.753215,1798855.62,8422453.4,-6623597.78,1197030.77,0.0,182859.58,132023.71,1572706.7,4110840.27,1193728.17,557691.12,987487.14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Алматы
7,8,Зарплатный клиент,33.0,1058403.0,3215125.84,0.161322,0.073257,0.0,3,1071709.0,37334.055039,2044952.12,8494299.18,-6449347.06,1349955.04,0.0,266820.85,109128.9,1730533.82,4048995.4,1162883.3,609707.88,942178.78,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Астана
8,9,Премиальный клиент,55.0,3084180.0,2823888.9,0.163584,0.079068,0.0,3,941296.3,101118.252239,1783301.11,8565700.19,-6782399.08,1205500.19,0.0,188381.32,92987.82,1825147.95,4138180.77,1115664.13,551803.57,934903.77,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Алматы
9,10,Зарплатный клиент,38.0,1277325.0,2919534.74,0.141674,0.06814,0.0,3,973178.2,38041.966619,1978310.07,8538828.01,-6560517.94,1293311.35,0.0,184539.23,208027.8,1774833.09,4160053.26,1251711.25,562785.97,789444.44,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Усть-Каменогорск


In [3]:
# Тезисные правила выгоды — как в ТЗ
TRAVEL_CATS = {"Путешествия","Отели","Такси"}
PREMIUM_4   = {"Ювелирные украшения","Косметика и Парфюмерия","Кафе и рестораны"}
ONLINE_CATS = {"Едим дома","Смотрим дома","Играем дома"}

def premium_tier(balance):
    if balance >= 6_000_000: return 0.04
    if balance >= 1_000_000: return 0.03
    return 0.02

def teacher_benefits(profile_row, df_tx_client):
    # суммы по категориям
    if df_tx_client.empty:
        spend_cat = {}
    else:
        spend_cat = df_tx_client.groupby("category")["amount_kzt"].sum().to_dict()
    total_spend = sum(spend_cat.values())
    avg_balance = float(profile_row.get("avg_monthly_balance_kzt", 0.0) or profile_row.get("avg_balance", 0.0) or 0.0)

    # travel 4%
    b_travel = 0.04 * sum(spend_cat.get(c,0.0) for c in TRAVEL_CATS)

    # premium: tier% базовый + дотяжка до 4% на прем-категории; кап 100k/мес → ~300k за 3 мес
    tier = premium_tier(avg_balance)
    base = tier * total_spend
    inc  = max(0.0, 0.04 - tier) * sum(spend_cat.get(c,0.0) for c in PREMIUM_4)
    b_premium = min(base + inc, 3*100_000)

    # credit: 10% top-3 + 10% online без двойного учёта
    top3 = [k for k,_ in sorted(spend_cat.items(), key=lambda kv: kv[1], reverse=True)[:3]]
    base_top3 = sum(spend_cat.get(c,0.0) for c in top3)
    online_sp = sum(spend_cat.get(c,0.0) for c in ONLINE_CATS)
    overlap = sum(spend_cat.get(c,0.0) for c in set(top3).intersection(ONLINE_CATS))
    b_credit = 0.10 * (base_top3 + online_sp - overlap)

    # FX: 1% экономии на сумме не-KZT
    total_kzt = df_tx_client["amount_kzt"].sum() if not df_tx_client.empty else 0.0
    fx_amt = df_tx_client.loc[df_tx_client["currency"].str.upper().ne("KZT"), "amount_kzt"].sum() if not df_tx_client.empty else 0.0
    b_fx = 0.01 * fx_amt

    # депозиты (3 мес на свободный остаток)
    if not df_tx_client.empty:
        dfc = df_tx_client.copy()
        dfc["ym"] = dfc["date"].dt.to_period("M")
        med_spend = float(dfc.groupby("ym")["amount_kzt"].sum().median())
    else:
        med_spend = 0.0
    free_bal = max(0.0, avg_balance - med_spend)
    months = 3/12
    b_dep_sav   = 0.165 * free_bal * months
    b_dep_nak   = 0.155 * free_bal * months
    b_dep_multi = 0.145 * free_bal * months if (fx_amt > 0.1*total_kzt and total_kzt>0) else 0.0

    benefits = {
        "Карта для путешествий": b_travel,
        "Премиальная карта": b_premium,
        "Кредитная карта": b_credit,
        "Обмен валют": b_fx,
        "Депозит Сберегательный": b_dep_sav,
        "Депозит Накопительный": b_dep_nak,
        "Депозит Мультивалютный": b_dep_multi,
        "Инвестиции": 0.001 * max(0.0, avg_balance),
        "Золотые слитки": 0.0,
        "Кредит наличными": (5_000.0 if avg_balance < 50_000 else 0.0),
    }
    top = sorted(benefits.items(), key=lambda kv: kv[1], reverse=True)
    return benefits, top

# Собираем teacher-метки (Top-1 и Top-4)
tx_groups = dict(tuple(tx.groupby("client_code"))) if not tx.empty else {}
teacher_rows = []
for _, p in clients.iterrows():
    code = str(p["client_code"])
    dfc = tx_groups.get(code, pd.DataFrame(columns=tx.columns))
    ben, top = teacher_benefits(p, dfc)
    teacher_rows.append({
        "client_code": code,
        "teacher_best": top[0][0] if top else None,
        "teacher_top4": [t[0] for t in top[:4]],
        **{f"teacher_benefit__{k}": v for k,v in ben.items()}
    })
teacher_df = pd.DataFrame(teacher_rows)
teacher_df.head(3)


Unnamed: 0,client_code,teacher_best,teacher_top4,teacher_benefit__Карта для путешествий,teacher_benefit__Премиальная карта,teacher_benefit__Кредитная карта,teacher_benefit__Обмен валют,teacher_benefit__Депозит Сберегательный,teacher_benefit__Депозит Накопительный,teacher_benefit__Депозит Мультивалютный,teacher_benefit__Инвестиции,teacher_benefit__Золотые слитки,teacher_benefit__Кредит наличными
0,1,Кредитная карта,"[Кредитная карта, Премиальная карта, Карта для...",25681.636,62913.4372,192283.424,0.0,0.0,0.0,0.0,92.643,0.0,0.0
1,2,Кредитная карта,"[Кредитная карта, Премиальная карта, Депозит С...",17515.7608,83841.8703,206182.847,0.0,31355.0523,29454.7461,0.0,1577.073,0.0,0.0
2,3,Кредитная карта,"[Кредитная карта, Премиальная карта, Карта для...",29143.1996,53313.902,171886.796,6369.138,0.0,0.0,0.0,63.116,0.0,0.0


In [4]:

# target = лучший продукт от teacher
data_ml = feat.merge(teacher_df[["client_code","teacher_best","teacher_top4"]], on="client_code", how="left")
data_ml["teacher_best"].value_counts()


teacher_best
Кредитная карта           58
Депозит Сберегательный     2
Name: count, dtype: int64

In [5]:
import numpy as np
import pandas as pd

TRAVEL_CATS = {"Путешествия","Отели","Такси"}
PREMIUM_4   = {"Ювелирные украшения","Косметика и Парфюмерия","Кафе и рестораны"}
ONLINE_CATS = {"Едим дома","Смотрим дома","Играем дома"}

ATM_FEE = 0.015        # 1.5% без премиалки
P2P_FEE = 0.005        # 0.5% без премиалки
CARD_FEE = 0.005       # 0.5% без премиалки
PREMIUM_FREE_CAP = 3_000_000  # KZT / month

def premium_tier(balance):
    if balance >= 6_000_000: return 0.04
    if balance >= 1_000_000: return 0.03
    return 0.02

def monthly_capped_saving(series_amount_kzt, fee_rate, cap):
    """Экономия комиссии помесячно с учётом cap."""
    if series_amount_kzt.empty: 
        return 0.0
    # series_amount_kzt: Series по месяцам (PeriodM) со значениями сумм KZT
    per_month = series_amount_kzt.copy()
    saved = (per_month.clip(upper=cap) * fee_rate).sum()
    return float(saved)

def teacher_benefits_v2(profile_row, df_tx_client, df_tr_client):
    # --- Покупки
    if df_tx_client.empty:
        spend_cat = {}
    else:
        spend_cat = df_tx_client.groupby("category")["amount_kzt"].sum().to_dict()
    total_spend = sum(spend_cat.values())

    # --- Базовые параметры
    avg_balance = float(profile_row.get("avg_monthly_balance_kzt", profile_row.get("avg_balance", 0.0)) or 0.0)

    # --- Travel 4%
    b_travel = 0.04 * sum(spend_cat.get(c,0.0) for c in TRAVEL_CATS)

    # --- Credit 10% top-3 + 10% online без двойного учёта; лёгкий буст, если есть рассрочки/погашения по КК
    top3 = [k for k,_ in sorted(spend_cat.items(), key=lambda kv: kv[1], reverse=True)[:3]]
    base_top3 = sum(spend_cat.get(c,0.0) for c in top3)
    online_sp = sum(spend_cat.get(c,0.0) for c in ONLINE_CATS)
    overlap = sum(spend_cat.get(c,0.0) for c in set(top3).intersection(ONLINE_CATS))
    boost = 1.10 if (not df_tr_client.empty and (
        (df_tr_client["type"].eq("installment_payment_out")).any() or
        (df_tr_client["type"].eq("cc_repayment_out")).any()
    )) else 1.0
    b_credit = 0.10 * (base_top3 + online_sp - overlap) * boost

    # --- FX 1%
    total_kzt = df_tx_client["amount_kzt"].sum() if not df_tx_client.empty else 0.0
    fx_amt = df_tx_client.loc[
        (df_tx_client["currency"].str.upper()!="KZT"), "amount_kzt"
    ].sum() if not df_tx_client.empty else 0.0
    b_fx = 0.01 * fx_amt

    # --- Премиальная: кешбэк (по tier) + дотяжка до 4% на прем-категории + экономия комиссий (ATM/P2P/CARD) с cap/мес
    tier = premium_tier(avg_balance)
    base_cb = tier * total_spend
    inc_cb  = max(0.0, 0.04 - tier) * sum(spend_cat.get(c,0.0) for c in PREMIUM_4)

    saved_fees = 0.0
    if not df_tr_client.empty:
        df = df_tr_client.copy()
        df["ym"] = df["date"].dt.to_period("M")
        # ATM
        atm_month = df.loc[df["type"].eq("atm_withdrawal")].groupby("ym")["amount_kzt"].sum()
        saved_fees += monthly_capped_saving(atm_month, ATM_FEE, PREMIUM_FREE_CAP)
        # P2P
        p2p_month = df.loc[df["type"].eq("p2p_out")].groupby("ym")["amount_kzt"].sum()
        saved_fees += monthly_capped_saving(p2p_month, P2P_FEE, PREMIUM_FREE_CAP)
        # CARD
        card_month = df.loc[df["type"].eq("card_out")].groupby("ym")["amount_kzt"].sum()
        saved_fees += monthly_capped_saving(card_month, CARD_FEE, PREMIUM_FREE_CAP)

    b_premium = min(base_cb + inc_cb, 3*100_000) + saved_fees  # лимит кешбэка + экономия комиссий

    # --- Депозиты: свободный остаток и выбор типа
    if not df_tx_client.empty:
        dfc = df_tx_client.copy()
        dfc["ym"] = dfc["date"].dt.to_period("M")
        med_spend = float(dfc.groupby("ym")["amount_kzt"].sum().median())
        vol = float(dfc.groupby("ym")["amount_kzt"].sum().std() or 0.0)
    else:
        med_spend = 0.0; vol = 0.0
    free_bal = max(0.0, avg_balance - med_spend)
    months = 3/12
    dep_sav   = 0.165 * free_bal * months
    dep_nak   = 0.155 * free_bal * months
    dep_multi = 0.145 * free_bal * months if (fx_amt > 0.1*total_kzt and total_kzt>0) else 0.0

    # «рекомендация по типу» — только для выбора текста, в итоговую метрику кладём все три
    dep_choice = "Депозит Сберегательный" if (free_bal>1_000_000 and vol<0.25*med_spend) else \
                 ("Депозит Мультивалютный" if dep_multi>0 else "Депозит Накопительный")

    benefits = {
        "Карта для путешествий": b_travel,
        "Премиальная карта": b_premium,
        "Кредитная карта": b_credit,
        "Обмен валют": b_fx,
        "Депозит Сберегательный": dep_sav,
        "Депозит Накопительный": dep_nak,
        "Депозит Мультивалютный": dep_multi,
        "Инвестиции": 0.001 * max(0.0, avg_balance),
        "Золотые слитки": 0.0,
        "Кредит наличными": (5_000.0 if (avg_balance < 50_000 and (free_bal < 20_000)) else 0.0),
    }
    top = sorted(benefits.items(), key=lambda kv: kv[1], reverse=True)
    return benefits, top, {"dep_choice": dep_choice, "saved_fees": saved_fees, "free_bal": free_bal,
                           "fx_amt": fx_amt, "top3_cats": top3, "online_sp": online_sp}


In [13]:
MONTHS_GEN = {1:"январе",2:"феврале",3:"марте",4:"апреле",5:"мае",6:"июне",7:"июле",8:"августе",9:"сентябре",10:"октябре",11:"ноябре",12:"декабре"}
def kzt(amount, decimals=0):
    a = float(amount)
    if decimals==0:
        s = f"{int(round(a)):,}".replace(",", " ")
        return f"{s} ₸"
    s = f"{a:,.{decimals}f}".replace(",", " ").replace(".", ",")
    return f"{s} ₸"

def month_of_max(df, cats):
    if df.empty: return "последние месяцы"
    d = df.copy()
    d["ym"] = d["date"].dt.to_period("M")
    d = d[d["category"].isin(cats)]
    if d.empty: return "последние месяцы"
    s = d.groupby("ym")["amount_kzt"].sum()
    ym = s.idxmax()
    return MONTHS_GEN.get(int(str(ym)[5:7]), "последние месяцы")

def top3_with_amounts(spend_cat):
    items = sorted(spend_cat.items(), key=lambda kv: kv[1], reverse=True)[:3]
    return [f"{k} ({kzt(v)})" for k,v in items]

def gen_push_v2(name, product, profile_row, df_tx_client, df_tr_client, ctx):
    # полезные агрегаты
    if df_tx_client.empty:
        spend_cat = {}; total_spend = 0.0
    else:
        spend_cat = df_tx_client.groupby("category")["amount_kzt"].sum().to_dict()
        total_spend = sum(spend_cat.values())

    if product == "Карта для путешествий":
        m = month_of_max(df_tx_client, TRAVEL_CATS)
        benefit = 0.04 * sum(spend_cat.get(c,0.0) for c in TRAVEL_CATS)
        return (f"{name}, в {m} у вас заметные траты на поездки и такси. "
                f"С картой для путешествий вернули бы ≈{kzt(benefit)} за 3 мес. "
                f"Открыть карту.")
    if product == "Премиальная карта":
        saved_fees = ctx.get("saved_fees", 0.0)
        tier = premium_tier(float(profile_row.get('avg_monthly_balance_kzt', profile_row.get('avg_balance',0))))
        return (f"{name}, у вас стабильный остаток и переводы/снятия. "
                f"Премиальная карта даст до 4% и сэкономит комиссии ≈{kzt(saved_fees)} за 3 мес. "
                f"Оформить сейчас.")
    if product == "Кредитная карта":
        top3 = top3_with_amounts(spend_cat)
        return (f"{name}, ваши топ-категории — {', '.join(top3)}. "
                f"Кредитная карта даст до 10% и рассрочку 3–24 мес без переплат. "
                f"Оформить карту.")
    if product == "Обмен валют":
        fx_amt = ctx.get("fx_amt", 0.0)
        return (f"{name}, платили в валюте на ≈{kzt(fx_amt)} за 3 мес. "
                f"В приложении выгодный обмен и авто-покупка по целевому курсу. "
                f"Настроить обмен.")
    if product in {"Депозит Сберегательный","Депозит Накопительный","Депозит Мультивалютный"}:
        free_bal = ctx.get("free_bal", 0.0)
        # прикинем доход именно выбранного продукта
        rate = {"Депозит Сберегательный":0.165,"Депозит Накопительный":0.155,"Депозит Мультивалютный":0.145}[product]
        income = rate * max(0.0, free_bal) * (3/12)
        return (f"{name}, свободно лежит ≈{kzt(free_bal)}. "
                f"На «{product}» за 3 мес получили бы ≈{kzt(income)}. "
                f"Открыть вклад.")
    if product == "Инвестиции":
        return (f"{name}, попробуйте инвестиции с низким порогом входа и без комиссий на старт. "
                f"Открыть счёт.")
    if product == "Кредит наличными":
        return (f"{name}, если нужен запас на крупные траты — оформите кредит наличными с гибкими выплатами. "
                f"Узнать лимит.")
    return (f"{name}, у вас есть возможность получить выгоду с нашим продуктом. Посмотреть детали. Открыть.")


In [14]:
# Индексация
tx_groups = dict(tuple(tx.groupby("client_code"))) if not tx.empty else {}
tr_groups = dict(tuple(tr.groupby("client_code"))) if not tr.empty else {}

rows = []
for _, p in clients.iterrows():
    code = str(p["client_code"])
    name = str(p["name"])
    df_tx = tx_groups.get(code, pd.DataFrame(columns=tx.columns))
    df_tr = tr_groups.get(code, pd.DataFrame(columns=tr.columns))

    ben, top, ctx = teacher_benefits_v2(p, df_tx, df_tr)
    product = top[0][0]
    push = gen_push_v2(name, product, p, df_tx, df_tr, ctx)
    rows.append({"client_code": code, "product": product, "push_notification": push})

result_df = pd.DataFrame(rows)
result_df.to_csv("outputs/result.csv", index=False, encoding="utf-8")
result_df.head(10)


Unnamed: 0,client_code,product,push_notification
0,1,Кредитная карта,"Айгерим, ваши топ-категории — Продукты питания..."
1,2,Кредитная карта,"Данияр, ваши топ-категории — Продукты питания ..."
2,3,Кредитная карта,"Сабина, ваши топ-категории — Продукты питания ..."
3,4,Кредитная карта,"Тимур, ваши топ-категории — Продукты питания (..."
4,5,Кредитная карта,"Камилла, ваши топ-категории — Продукты питания..."
5,6,Кредитная карта,"Аян, ваши топ-категории — Продукты питания (81..."
6,7,Кредитная карта,"Руслан, ваши топ-категории — Продукты питания ..."
7,8,Кредитная карта,"Мадина, ваши топ-категории — Продукты питания ..."
8,9,Кредитная карта,"Арман, ваши топ-категории — Продукты питания (..."
9,10,Кредитная карта,"Карина, ваши топ-категории — Продукты питания ..."


In [15]:
teacher_df["teacher_best"].value_counts().to_frame("count")

Unnamed: 0_level_0,count
teacher_best,Unnamed: 1_level_1
Кредитная карта,58
Депозит Сберегательный,2
