# Loss Pause Lookahead Analysis
Den här notebooken analyserar historiska trade-metrics för att jämföra olika lookahead-fönster (val 1) och ta fram fördjupad statistik efter förlustsekvenser (val 3).

## 1. Define Scenario Parameters
Sätter upp konfiguration, globbar CSV-filer i `data/` och delar hjälpfunktioner för båda alternativen.

In [None]:
from __future__ import annotations

import glob
import math
import os
import re
from dataclasses import dataclass
from typing import Dict, Iterable, List, Optional

try:
    import pandas as pd
    import numpy as np
except ImportError as exc:
    raise SystemExit("Installera pandas och numpy innan du kör notebooken: pip install pandas numpy") from exc

try:
    import matplotlib.pyplot as plt
except ImportError as exc:
    raise SystemExit("Installera matplotlib: pip install matplotlib") from exc

DATA_GLOB = os.path.join("data", "klines_analysis*.csv")
DEFAULT_SYMBOL = "BTCUSDT"
LOSS_THRESHOLD = 0.0008  # 0.08 %


def load_metric_frames(pattern: str = DATA_GLOB) -> pd.DataFrame:
    files = sorted(glob.glob(pattern))
    if not files:
        raise FileNotFoundError(
            f"Hittade inga CSV-filer med mönstret {pattern}. Kör historiska skriptet först."
        )
    frames: List[pd.DataFrame] = []
    skipped: List[str] = []
    for path in files:
        df = pd.read_csv(path)
        df["source_file"] = os.path.basename(path)
        lookahead = _infer_lookahead(path, default=None)
        if lookahead is None and "lookahead" in df.columns:
            unique_vals = df["lookahead"].dropna().unique()
            lookahead = int(unique_vals[0]) if len(unique_vals) == 1 else None
        if lookahead is None:
            skipped.append(os.path.basename(path))
            continue
        df["lookahead"] = lookahead
        frames.append(df)
    if not frames:
        raise ValueError(
            "Kunde inte tolka lookahead för någon fil. Kontrollera filnamn enligt *_L<number>.csv."
        )
    if skipped:
        print(f"⚠️ Hoppar över filer utan lookahead: {', '.join(skipped)}")
    combined = pd.concat(frames, ignore_index=True)
    combined["mfe_pct"] = combined["mfe_pct"].astype(float)
    combined["mae_pct"] = combined["mae_pct"].astype(float)
    return combined


def _infer_lookahead(filename: str, default: Optional[int]) -> Optional[int]:
    base = os.path.basename(filename)
    match = re.search(r"_L(\d+)", base)
    if match:
        return int(match.group(1))
    parts = base.split("_")
    for part in parts:
        clean = part.split(".")[0]
        if clean.startswith("L") and clean[1:].isdigit():
            return int(clean[1:])
    digits = re.findall(r"(\d+)", base)
    if len(digits) == 1:
        return int(digits[0])
    return default


metrics_df = load_metric_frames()
metrics_df.head()

ValueError: Kunde inte tolka lookahead från filnamnet eller filinnehållet. Döp filerna enligt mönstret *_L<number>.csv eller kör skriptet med '--csv data/klines_analysis_{lookahead}.csv'.

In [10]:
import glob, os
sorted(glob.glob(os.path.join('data','klines_analysis*.csv')))

['data\\klines_analysis.csv',
 'data\\klines_analysis__L10.csv',
 'data\\klines_analysis__L20.csv',
 'data\\klines_analysis__L30.csv']

In [11]:
import pandas as pd
pd.read_csv('data/klines_analysis.csv').head()

Unnamed: 0,open_time,close,max_future,min_future,mfe_pct,mae_pct,direction
0,1762535640000,101631.76,102736.84,101598.54,0.010873,0.000327,up
1,1762535700000,101760.51,102736.84,101600.0,0.009594,0.001577,up
2,1762535760000,101788.16,102736.84,101600.0,0.00932,0.001849,up
3,1762535820000,101752.82,102736.84,101600.0,0.009671,0.001502,down
4,1762535880000,101682.58,102736.84,101600.0,0.010368,0.000812,down


## 2. Implement Option 1 Logic
Option 1 jämför flera lookahead-fönster genom att sammanställa statistik för varje värde.

In [None]:
from dataclasses import dataclass


@dataclass
class LookaheadStats:
    lookahead: int
    count: int
    loss_ratio: float
    mae_mean: float
    mfe_mean: float
    mae_q75: float
    mae_q90: float
    mae_q95: float
    loss_mae_mean: float
    loss_mfe_mean: float


def summarize_by_lookahead(df: pd.DataFrame, loss_threshold: float) -> List[LookaheadStats]:
    results: List[LookaheadStats] = []
    for lookahead, group in df.groupby("lookahead"):
        loss_mask = group["mae_pct"] >= loss_threshold
        stats = LookaheadStats(
            lookahead=int(lookahead),
            count=len(group),
            loss_ratio=float(loss_mask.mean()),
            mae_mean=float(group["mae_pct"].mean()),
            mfe_mean=float(group["mfe_pct"].mean()),
            mae_q75=float(group["mae_pct"].quantile(0.75)),
            mae_q90=float(group["mae_pct"].quantile(0.90)),
            mae_q95=float(group["mae_pct"].quantile(0.95)),
            loss_mae_mean=float(group.loc[loss_mask, "mae_pct"].mean()),
            loss_mfe_mean=float(group.loc[loss_mask, "mfe_pct"].mean()),
        )
        results.append(stats)
    return sorted(results, key=lambda s: s.lookahead)


lookahead_summary = summarize_by_lookahead(metrics_df, LOSS_THRESHOLD)
lookahead_summary

## 3. Implement Option 3 Logic
Option 3 tar fram fördjupad efter-förlust-analys per lookahead och riktning för att styra adaptiv paus.

In [None]:
def compute_after_loss_profiles(df: pd.DataFrame, loss_threshold: float) -> pd.DataFrame:
    df = df.copy()
    df["is_loss"] = df["mae_pct"] >= loss_threshold
    filtered = df[df["is_loss"]]
    if filtered.empty:
        raise ValueError("Inga rader över förlusttröskeln – sänk loss_threshold.")
    agg = (
        filtered.groupby(["lookahead", "state"] if "state" in filtered.columns else ["lookahead"])
        .agg(
            mae_mean=("mae_pct", "mean"),
            mae_q75=("mae_pct", lambda s: s.quantile(0.75)),
            mae_q90=("mae_pct", lambda s: s.quantile(0.90)),
            mae_q95=("mae_pct", lambda s: s.quantile(0.95)),
            mfe_mean=("mfe_pct", "mean"),
            count=("mae_pct", "size"),
        )
        .reset_index()
    )
    if "state" not in agg.columns:
        agg["state"] = "ALL"
    return agg


loss_profiles = compute_after_loss_profiles(metrics_df, LOSS_THRESHOLD)
loss_profiles.head()

## 4. Run Comparative Experiments
Beräknar nyckeltal för båda alternativen och sammanställer i en tabell för vidare beslut.

In [None]:
import pandas as pd

summary_df = pd.DataFrame([s.__dict__ for s in lookahead_summary])
summary_df["pause_resume_pct_ideas"] = summary_df["mae_q75"]

loss_profile_pivot = (
    loss_profiles.pivot_table(
        index="lookahead",
        columns="state",
        values="mae_q75",
        aggfunc="first",
    )
    .rename(columns=lambda c: f"mae_q75_state_{c}")
    .reset_index()
)

combined_summary = summary_df.merge(loss_profile_pivot, on="lookahead", how="left")
combined_summary

## 5. Visualize Results
Visualiserar fördelningen av MFE/MAE samt föreslår kandidatvärden för `pause_resume_pct` baserat på 75:e percentilen.

In [None]:
fig, axes = plt.subplots(len(lookahead_summary), 2, figsize=(10, 4 * len(lookahead_summary)))
if len(lookahead_summary) == 1:
    axes = np.array([axes])

for idx, stats in enumerate(lookahead_summary):
    subset = metrics_df[metrics_df["lookahead"] == stats.lookahead]
    subset["mae_pct"].hist(ax=axes[idx, 0], bins=40, color="#d95f02")
    axes[idx, 0].set_title(f"MAE distribution (lookahead={stats.lookahead})")
    axes[idx, 0].axvline(stats.mae_q75, color="black", linestyle="--", label="75:e pct")
    axes[idx, 0].legend()

    subset["mfe_pct"].hist(ax=axes[idx, 1], bins=40, color="#1b9e77")
    axes[idx, 1].set_title(f"MFE distribution (lookahead={stats.lookahead})")
    axes[idx, 1].axvline(subset["mfe_pct"].quantile(0.75), color="black", linestyle="--", label="75:e pct")
    axes[idx, 1].legend()

plt.tight_layout()
plt.show()

recommended = combined_summary[["lookahead", "pause_resume_pct_ideas"]].copy()
recommended["pause_resume_pct_ideas_pct"] = recommended["pause_resume_pct_ideas"] * 100
recommended

### Nästa steg
- Kör historik-analysen för flera lookahead-värden (t.ex. `--lookahead-set 10 20 30 --csv data/klines_analysis_{lookahead}.csv`).
- Uppdatera `pause_resume_pct` i live-skriptet med tabellen ovan som riktmärke.
- Trimma även tidskomponenten (`LOSS_PAUSE_SEC`) baserat på hur snabbt MAE planar ut i `loss_profiles`. 