# Compare OANDA Replay vs. Backtest Data

Quick utility to see if OR stats and the 10:22 close differ between:

- OANDA replay fetch (e.g., `data/raw/replay_2020-01-15.csv`)
- Backtest/research feed for the same date (e.g., your historical CSV)

Steps:
1. Set the paths below for `replay_path` and `backtest_path` for the same date.
2. Run the cell to print OR high/low, 10:22 close, entry/exit presence, and count of rows.
3. Adjust date format/columns if your backtest file uses different column names.

In [None]:
import pandas as pd
import pytz

# EDIT THESE for the same date
replay_path = "../data/raw/replay_2020-01-15.csv"
backtest_year_path = "../data/raw/DAT_ASCII_NSXUSD_M1_2020.csv"  # annual file
target_date = "2020-01-15"

ENTRY = pd.Timestamp("10:22").time()
EXIT = pd.Timestamp("12:00").time()
OR_START, OR_END = "09:30", "10:00"
NY = pytz.timezone("America/New_York")


def load_replay(path):
    df = pd.read_csv(path)
    ts_col = "time_ny" if "time_ny" in df.columns else "datetime"
    df[ts_col] = pd.to_datetime(df[ts_col])
    return df.sort_values(ts_col).set_index(ts_col)


def load_backtest_day(year_path, date_str):
    # backtest format per config: '%Y%m%d %H%M%S' local NY
    df = pd.read_csv(year_path, sep=";", header=None)
    df.columns = ["datetime","open","high","low","close","volume"]
    df["datetime"] = pd.to_datetime(df["datetime"], format="%Y%m%d %H%M%S")
    df["datetime"] = df["datetime"].dt.tz_localize(NY)
    df = df.sort_values("datetime").set_index("datetime")
    day = pd.to_datetime(date_str).date()
    df_day = df.loc[df.index.date == day]
    if df_day.empty:
        raise ValueError(f"No rows for {date_str} in {year_path}")
    return df_day


def stats(df):
    or_slice = df.between_time(OR_START, OR_END, inclusive="both")
    win = df.between_time(OR_START, "12:00", inclusive="both")
    entry_row = win.loc[win.index.time == ENTRY]
    exit_row = win.loc[win.index.time == EXIT]
    return {
        "rows_total": len(df),
        "or_rows": len(or_slice),
        "win_rows": len(win),
        "or_high": or_slice["high"].max() if not or_slice.empty else None,
        "or_low": or_slice["low"].min() if not or_slice.empty else None,
        "entry_close": float(entry_row["close"].iloc[0]) if not entry_row.empty else None,
        "has_entry": not entry_row.empty,
        "has_exit": not exit_row.empty,
    }

replay_df = load_replay(replay_path)
bt_df = load_backtest_day(backtest_year_path, target_date)

print("Replay stats:", stats(replay_df))
print("Backtest stats:", stats(bt_df))

# Optional: show rows around 10:22 for both
print("
Replay 10:15-10:30:")
print(replay_df.between_time("10:15", "10:30").head(20))
print("
Backtest 10:15-10:30:")
print(bt_df.between_time("10:15", "10:30").head(20))
