In [73]:
import pandas as pd
import numpy as np
import sqlalchemy as sql
from sqlalchemy import text
import matplotlib.pyplot as plt
import dataframe_image as dfi

In [74]:
accounts = ['fund2', 'fund3']
start_date = '2025-10-01'

In [75]:
def get_account_data(account, db):
    """
    Databases:
    balance -- fund2_balance
    trades -- fund2
    earnings -- fund2_earnings
    transaction_history -- fund2_transaction
    """
    table_name = {
        "balance": f"{account}_balance",
        "trades": f"{account}",
        "earnings": f"{account}_earnings",
        "transaction_history": f"{account}_transaction",
    }
    tb = table_name[f"{db}"]
    conn = sql.create_engine(f'mysql+mysqldb://247team:password@192.168.50.238:3306/{db}')
    query = f"SELECT * FROM {tb};"
    df = pd.read_sql_query(text(query), conn.connect())

    if db == "balance":
        df['datetime'] = pd.to_datetime(df['datetime'])
        df = df[["datetime", "overall_balance"]]
        df.columns = ['datetime', 'dollar']
        df["dollar"] = df["dollar"].astype(float)
        df["type"] = "balance"
        df["account"] = account
    elif db == "trades":
        df["time"] = pd.to_datetime(df["time"])
        df["realizePnl"] = df["realizedPnl"].astype(float) - df["commission"].astype(float)
        df = df[["time", "realizedPnl"]]
        df.columns = ["datetime", "dollar"]
        df["dollar"] = df["dollar"].astype(float)
        df["type"] = "realizedpnl"
        df["account"] = account
    elif db == "transaction_history":
        df["time"] = pd.to_datetime(df["time"])
        df = df[df["incomeType"].isin(["FUNDING_FEE"])]
        df = df[["time", "incomeType", "income"]]
        df.columns = ["datetime", "type", "dollar"]
        df["dollar"] = df["dollar"].astype(float)
        df["account"] = account
    elif db == "earnings":
        df["time"] = pd.to_datetime(df["time"])
        df = df[["time", "rewards"]]
        df.columns = ["datetime", "dollar"]
        df["dollar"] = df["dollar"].astype(float)
        df["type"] = "earnings"
        df["account"] = account
    else:
        df= None
    return df


In [76]:
data = []
initial_balance = {}
for account in accounts:
    for db in ["trades", "earnings", "transaction_history"]:
        temp = get_account_data(account, db)
        data.append(temp)
    balance = get_account_data(account, "balance")
    initial_balance[account] = balance.loc[0, "dollar"]

df = pd.concat(data, axis = 0)
df.sort_values("datetime", inplace=True)
df = df[df["datetime"] >= start_date]
df.reset_index(inplace=True, drop=True)
df.set_index("datetime", inplace = True)
df

Unnamed: 0_level_0,dollar,type,account
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-10-01 00:00:00,0.438838,FUNDING_FEE,fund3
2025-10-01 00:00:00,0.520656,FUNDING_FEE,fund3
2025-10-01 00:00:00,-0.075435,FUNDING_FEE,fund3
2025-10-01 00:00:00,-0.803001,FUNDING_FEE,fund3
2025-10-01 00:00:00,-3.619610,FUNDING_FEE,fund3
...,...,...,...
2025-10-10 06:41:42,2.031078,realizedpnl,fund2
2025-10-10 06:41:44,1.396550,realizedpnl,fund2
2025-10-10 06:41:44,0.493767,realizedpnl,fund2
2025-10-10 06:41:45,0.748757,realizedpnl,fund2


In [77]:
# # DON'T RUN THIS

# df["date"] = df.index.date
# df["dollar"] = df["dollar"].astype(float)

# results = []

# for date, group in df.groupby("date", sort=False):
#     # locate balance rows
#     balance_idx = group.index[group["type"] == "WITHDRAW"].tolist()
    
#     if not balance_idx:
#         # no balance → just one sum
#         results.append({
#             "date": date,
#             "section": "all",
#             "dollar": group["dollar"].sum()
#         })
#     else:
#         # before balance
#         first_balance = balance_idx[0]
#         before_sum = group.loc[:first_balance-1, "dollar"].sum()
#         balance_value = group.loc[first_balance, "dollar"]
#         after_sum = group.loc[first_balance+1:, "dollar"].sum()

#         results.extend([
#             {"date": date, "section": "before", "dollar": before_sum},
#             {"date": date, "section": "transfer", "dollar": balance_value},
#             {"date": date, "section": "after", "dollar": after_sum},
#         ])

# result_df = pd.DataFrame(results)
# print(result_df)


In [78]:
df["dollar"] = df["dollar"].astype(float)
df

Unnamed: 0_level_0,dollar,type,account
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-10-01 00:00:00,0.438838,FUNDING_FEE,fund3
2025-10-01 00:00:00,0.520656,FUNDING_FEE,fund3
2025-10-01 00:00:00,-0.075435,FUNDING_FEE,fund3
2025-10-01 00:00:00,-0.803001,FUNDING_FEE,fund3
2025-10-01 00:00:00,-3.619610,FUNDING_FEE,fund3
...,...,...,...
2025-10-10 06:41:42,2.031078,realizedpnl,fund2
2025-10-10 06:41:44,1.396550,realizedpnl,fund2
2025-10-10 06:41:44,0.493767,realizedpnl,fund2
2025-10-10 06:41:45,0.748757,realizedpnl,fund2


In [79]:
rf = df.groupby(pd.Grouper(freq='1h'))["dollar"].sum().to_frame(name = 'pnl')
rf

Unnamed: 0_level_0,pnl
datetime,Unnamed: 1_level_1
2025-10-01 00:00:00,-296.669794
2025-10-01 01:00:00,0.000000
2025-10-01 02:00:00,0.000000
2025-10-01 03:00:00,0.000000
2025-10-01 04:00:00,0.164380
...,...
2025-10-10 02:00:00,0.000000
2025-10-10 03:00:00,0.000000
2025-10-10 04:00:00,138.924795
2025-10-10 05:00:00,0.000000


In [80]:
rf.loc["2025-09-30 23:59:59"] = {"pnl": 0.0}
rf

Unnamed: 0_level_0,pnl
datetime,Unnamed: 1_level_1
2025-10-01 00:00:00,-296.669794
2025-10-01 01:00:00,0.000000
2025-10-01 02:00:00,0.000000
2025-10-01 03:00:00,0.000000
2025-10-01 04:00:00,0.164380
...,...
2025-10-10 03:00:00,0.000000
2025-10-10 04:00:00,138.924795
2025-10-10 05:00:00,0.000000
2025-10-10 06:00:00,-117.847370


In [81]:
rf.index = pd.to_datetime(rf.index)
rf.sort_index(inplace=True)

In [82]:
rf["running_bal"] = rf["pnl"].cumsum() + 93208.8505
rf

Unnamed: 0_level_0,pnl,running_bal
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-09-30 23:59:59,0.000000,93208.850500
2025-10-01 00:00:00,-296.669794,92912.180706
2025-10-01 01:00:00,0.000000,92912.180706
2025-10-01 02:00:00,0.000000,92912.180706
2025-10-01 03:00:00,0.000000,92912.180706
...,...,...
2025-10-10 02:00:00,0.000000,92782.790726
2025-10-10 03:00:00,0.000000,92782.790726
2025-10-10 04:00:00,138.924795,92921.715521
2025-10-10 05:00:00,0.000000,92921.715521


In [None]:
rf["peak"] = rf["running_bal"].cummax()
rf["dd"] = (rf["running_bal"] - rf["peak"]) / rf["peak"]
rf["dd"].min()      # no upnl

np.float64(-0.01340003496920215)

In [None]:
from __future__ import annotations

import json
import math
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Iterable, Literal, Tuple

import pandas as pd
from sqlalchemy import text
from sqlalchemy.engine import Engine

# Your config module (exactly as you pasted)
from api.utils import config as cfg

Mode = Literal["realized", "margin"]


@dataclass(frozen=True)
class Inputs:
    accounts: Tuple[str, ...]
    start_date: pd.Timestamp            # inclusive
    freq: str = "1H"                    # resample frequency
    timezone: str | None = None         # set if you want tz-aware handling
    out_dir: Path = Path("out")


# ------------------------
# Baseline helpers
# ------------------------

def _first_existing_path(candidates: Iterable[str]) -> Path | None:
    for p in candidates:
        if p and Path(p).exists():
            return Path(p)
    return None


def _load_json(path: Path) -> dict:
    with path.open("r", encoding="utf-8") as f:
        return json.load(f)


def _extract_per_account_number(
    j: dict,
    account: str,
    *,
    key_candidates: Tuple[str, ...] = ("balance", "overall_balance", "equity", "value", "dollar"),
) -> float | None:
    # top-level mapping
    if account in j and isinstance(j[account], (int, float, str)):
        try:
            return float(j[account])
        except (TypeError, ValueError):
            pass

    # nested dict
    if isinstance(j, dict):
        for _, container in j.items():
            if isinstance(container, dict) and account in container:
                try:
                    return float(container[account])
                except (TypeError, ValueError):
                    continue

    # list of dicts with an id field
    if isinstance(j, list):
        for row in j:
            if not isinstance(row, dict):
                continue
            name = (
                row.get(cfg.ACCOUNT_KEY_FIELD)
                or row.get("account")
                or row.get("name")
                or row.get("key")
            )
            if name == account:
                for k in key_candidates:
                    if k in row and isinstance(row[k], (int, float, str)):
                        try:
                            return float(row[k])
                        except (TypeError, ValueError):
                            continue
    return None


def load_baseline_balances(accounts: Iterable[str]) -> Dict[str, float]:
    path = _first_existing_path(cfg.baseline_balance_candidates())
    if path is None:
        raise FileNotFoundError(
            "No baseline balance JSON found. Checked: " + ", ".join(cfg.baseline_balance_candidates())
        )
    raw = _load_json(path)
    out: Dict[str, float] = {}
    for acc in accounts:
        val = _extract_per_account_number(raw, acc)
        if val is None or not math.isfinite(val):
            raise ValueError(f"Baseline balance missing/invalid for '{acc}' in {path}")
        out[acc] = float(val)
    return out


def load_baseline_unrealized(accounts: Iterable[str]) -> Dict[str, float]:
    path = _first_existing_path(cfg.baseline_unrealized_candidates())
    if path is None:
        # Legit: treat as zero
        return {acc: 0.0 for acc in accounts}
    raw = _load_json(path)
    out: Dict[str, float] = {}
    for acc in accounts:
        val = _extract_per_account_number(
            raw, acc, key_candidates=("unrealized", "upnl", "unrealizedProfit", "value", "dollar")
        )
        out[acc] = float(val) if val is not None and math.isfinite(val) else 0.0
    return out


def baselines_by_mode(accounts: Iterable[str], mode: Mode) -> tuple[Dict[str, float], Dict[str, float]]:
    """
    Returns (initial_equity, upnl_component) per account for the chosen mode.
    upnl_component is 0.0 for realized mode; unrealized.json values for margin mode.
    """
    base = load_baseline_balances(accounts)
    if mode == "realized":
        return base, {acc: 0.0 for acc in base}
    if mode == "margin":
        upnl = load_baseline_unrealized(accounts)
        return {k: base[k] + upnl.get(k, 0.0) for k in base}, upnl
    raise ValueError(f"Unknown mode: {mode}")


# ------------------------
# SQL loaders (schemas taken from your config comment)
# ------------------------

def _read_sql(engine: Engine, sql_text: str) -> pd.DataFrame:
    with engine.begin() as conn:
        return pd.read_sql_query(text(sql_text), conn)


def load_trades(account: str, engine: Engine) -> pd.DataFrame:
    df = _read_sql(
        engine,
        f"""
        SELECT time, realizedPnl, commission
        FROM trades.`{account}`;
        """
    )
    df["datetime"] = pd.to_datetime(df["time"], utc=False)
    df["dollar"] = df["realizedPnl"].astype(float) - df["commission"].astype(float)
    return df.loc[:, ["datetime", "dollar"]]


def load_earnings(account: str, engine: Engine) -> pd.DataFrame:
    df = _read_sql(
        engine,
        f"""
        SELECT time, rewards
        FROM earnings.`{account}_earnings`;
        """
    )
    df["datetime"] = pd.to_datetime(df["time"], utc=False)
    df["dollar"] = df["rewards"].astype(float)
    return df.loc[:, ["datetime", "dollar"]]


def load_funding_fees(account: str, engine: Engine) -> pd.DataFrame:
    df = _read_sql(
        engine,
        f"""
        SELECT time, incomeType, income
        FROM transaction_history.`{account}_transaction`
        WHERE incomeType = 'FUNDING_FEE';
        """
    )
    df["datetime"] = pd.to_datetime(df["time"], utc=False)
    df["dollar"] = df["income"].astype(float)
    return df.loc[:, ["datetime", "dollar"]]


# ------------------------
# PnL → balances → drawdown
# ------------------------

def build_pnl_series(
    account: str,
    engine: Engine,
    start_date: pd.Timestamp,
    freq: str,
    timezone: str | None,
) -> pd.Series:
    parts = [
        load_trades(account, engine),
        load_earnings(account, engine),
        load_funding_fees(account, engine),
    ]
    df = pd.concat(parts, ignore_index=True)
    if timezone:
        df["datetime"] = df["datetime"].dt.tz_localize(timezone, ambiguous="NaT", nonexistent="shift_forward")
    df = df[df["datetime"] >= pd.to_datetime(start_date)]
    df = df.sort_values("datetime")
    ts = (
        df.set_index("datetime")["dollar"]
        .astype(float)
        .resample(freq)
        .sum(min_count=1)
        .fillna(0.0)
    )
    ts.name = "pnl"
    return ts


def running_balance(pnl: pd.Series, initial_equity: float) -> pd.Series:
    out = pnl.cumsum() + float(initial_equity)
    out.name = "running_balance"
    return out


def drawdown_series(balance: pd.Series) -> pd.Series:
    peaks = balance.cummax()
    dd = (balance - peaks) / peaks.replace({0.0: pd.NA})
    dd = dd.fillna(0.0)
    dd.name = "drawdown"
    return dd


@dataclass(frozen=True)
class PerAccount:
    account: str
    mode: Mode
    initial_equity: float
    upnl_in_baseline: float
    latest_balance: float
    min_drawdown: float
    min_drawdown_time: pd.Timestamp | None


def compute_and_print(inputs: Inputs, mode: Mode) -> pd.DataFrame:
    engine = cfg.get_engine()
    initial_equity_map, upnl_map = baselines_by_mode(inputs.accounts, mode)

    rows: list[PerAccount] = []
    dd_wide = pd.DataFrame()

    for acc in inputs.accounts:
        pnl = build_pnl_series(acc, engine, inputs.start_date, inputs.freq, inputs.timezone)
        bal = running_balance(pnl, initial_equity_map[acc])
        dd = drawdown_series(bal)

        min_dd = float(dd.min()) if not dd.empty else 0.0
        min_dd_time = dd.idxmin() if not dd.empty else None
        latest_bal = float(bal.iloc[-1]) if not bal.empty else float(initial_equity_map[acc])

        rows.append(
            PerAccount(
                account=acc,
                mode=mode,
                initial_equity=float(initial_equity_map[acc]),
                upnl_in_baseline=float(upnl_map.get(acc, 0.0)),
                latest_balance=latest_bal,
                min_drawdown=min_dd,
                min_drawdown_time=min_dd_time,
            )
        )
        dd_wide[f"{acc}_dd"] = dd

    # Build tidy summary for printing
    summary = pd.DataFrame(
        [
            {
                "account": r.account,
                "mode": r.mode,
                "initial_equity": r.initial_equity,
                "upnl_in_baseline": r.upnl_in_baseline,
                "latest_balance": r.latest_balance,
                "min_drawdown": r.min_drawdown,
                "min_drawdown_time": r.min_drawdown_time,
            }
            for r in rows
        ]
    ).set_index("account").sort_index()

    # Output to console
    print(f"\n=== {mode.upper()} — per-account summary (start: {inputs.start_date.date()}, freq: {inputs.freq}) ===")
    # Nice formatting without truncation
    with pd.option_context("display.max_rows", None, "display.max_columns", None, "display.width", 180):
        print(summary)

    # Persist artifacts
    inputs.out_dir.mkdir(parents=True, exist_ok=True)
    summary.to_csv(inputs.out_dir / f"summary_{mode}.csv")
    dd_wide.to_csv(inputs.out_dir / f"drawdown_{mode}_wide.csv")

    return summary


# ------------------------
# Entrypoint
# ------------------------

if __name__ == "__main__":
    # Adjust accounts and start_date as needed
    ACCOUNTS: Tuple[str, ...] = ("fund2", "fund3")
    START_DATE = pd.Timestamp("2025-10-01")  # inclusive

    params = Inputs(
        accounts=ACCOUNTS,
        start_date=START_DATE,
        freq="1H",
        timezone=None,       # e.g., "UTC" if your DB times are UTC-naive
        out_dir=Path("out"),
    )

    # Realized: balance.json only (no unrealized)
    compute_and_print(params, mode="realized")

    # Margin: balance.json + unrealized.json
    compute_and_print(params, mode="margin")


: 

In [84]:
df.head(20)

Unnamed: 0_level_0,dollar,type,account
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-10-01 00:00:00,0.438838,FUNDING_FEE,fund3
2025-10-01 00:00:00,0.520656,FUNDING_FEE,fund3
2025-10-01 00:00:00,-0.075435,FUNDING_FEE,fund3
2025-10-01 00:00:00,-0.803001,FUNDING_FEE,fund3
2025-10-01 00:00:00,-3.61961,FUNDING_FEE,fund3
2025-10-01 00:00:00,-0.177376,FUNDING_FEE,fund2
2025-10-01 00:00:00,-0.267448,FUNDING_FEE,fund2
2025-10-01 00:00:00,0.122378,FUNDING_FEE,fund3
2025-10-01 00:00:00,0.285642,FUNDING_FEE,fund2
2025-10-01 00:00:00,-0.28697,FUNDING_FEE,fund2
