In [3]:

# ==== Enhanced Trading Data Interactive Viewer (Always-on Statistics + 3-Button Toggle) ====
import os, re
from pathlib import Path
import pandas as pd
import numpy as np
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ipywidgets as w
from IPython.display import display, HTML
from datetime import datetime, date
from scipy import stats

# ---------------- Path and rendering ----------------
ROOT = Path(r"D:\Trading_Data\glassnode_data2")
pio.renderers.default = "plotly_mimetype"  # change to "browser" for external display

# ---------------- Tools: Time/Column parsing ----------------
def _find_time_col(df):
    if "t" in df.columns:
        return "t"
    for c in df.columns:
        if str(c).lower() in ("time","timestamp","date","datetime"):
            return c
    for c in df.columns:
        if re.search(r"(date|time|timestamp)", str(c), re.I):
            return c
    return None

def _find_value_col(df):
    if "v" in df.columns:
        return "v"
    tc = _find_time_col(df)
    for c in df.columns:
        if c == tc:
            continue
        if pd.api.types.is_numeric_dtype(df[c]):
            return c
    raise ValueError("Cannot find numeric column (expecting 'v')")

def _guess_epoch_unit(series):
    if series.dtype == "O" and series.astype(str).str.contains(r"\d{4}-\d{2}-\d{2}", regex=True).any():
        return None
    xx = pd.to_numeric(series, errors="coerce").dropna()
    if xx.empty:
        return None
    m = float(xx.abs().max())
    if m >= 1e15:    # microseconds
        return "us"
    elif m >= 1e12:  # milliseconds
        return "ms"
    else:            # seconds
        return "s"

def _parse_time_to_index(df):
    tc = _find_time_col(df)
    if tc is None:
        raise ValueError("Cannot find time column (expecting 't')")
    unit = _guess_epoch_unit(df[tc])
    if unit is None:
        idx = pd.to_datetime(df[tc], errors="coerce", utc=True)
    else:
        idx = pd.to_datetime(pd.to_numeric(df[tc], errors="coerce"), unit=unit, errors="coerce", utc=True)
    out = df.loc[~idx.isna(), :].copy()
    out.index = idx[~idx.isna()]
    out = out.sort_index()
    out = out[~out.index.duplicated(keep="last")]
    return out

def read_single_series(path):
    df = pd.read_csv(path)
    df = _parse_time_to_index(df)
    vcol = _find_value_col(df)
    s = pd.to_numeric(df[vcol], errors="coerce").dropna()
    return s

# ---------------- File scanning ----------------
def list_assets(root=ROOT):
    if not root.exists():
        return []
    return sorted([p.name for p in root.iterdir() if p.is_dir()])

def find_close_file(asset_folder):
    for p in asset_folder.glob("*market_price_usd_close_*.csv"):
        return p
    return None

def list_factor_files(asset_folder):
    close_file = find_close_file(asset_folder)
    return sorted([p for p in asset_folder.glob("*.csv") if p != close_file])

# ---------------- Indicators/Normalization/Resampling ----------------
def sma(s, n): return s.rolling(n, min_periods=n).mean()
def ema(s, n): return s.ewm(span=n, adjust=False).mean()
def mm(s, n):  return s.rolling(n, min_periods=n).median()

def pct_change_line(s, periods=1):
    return s.pct_change(periods=periods) * 100

def absolute_change_line(s, periods=1):
    return s.diff(periods=periods)

def normalize(s, how):
    if how == "zscore":
        m, sd = s.mean(), s.std(ddof=0)
        return (s - m) / sd if (sd and sd != 0) else s*0
    if how == "minmax":
        mn, mx = s.min(), s.max()
        span = mx - mn
        return (s - mn) / span if (span and span != 0) else s*0
    return s

def resample_close(s, freq):
    if freq in (None, "", "raw"):
        return s
    return s.resample(freq).last().dropna()

def resample_factor(s, freq, how="mean"):
    if freq in (None, "", "raw"):
        return s
    if how == "sum":
        return s.resample(freq).sum().dropna(how="all")
    if how == "last":
        return s.resample(freq).last().dropna(how="all")
    return s.resample(freq).mean().dropna(how="all")

# ---------------- Widgets ----------------
assets = list_assets()
if not assets:
    raise SystemExit(f"⚠ Cannot find asset folders, please check ROOT = {ROOT}")

asset_dd = w.Dropdown(options=assets, value=assets[0], description="Asset", layout=w.Layout(width="220px"))
freq_dd  = w.Dropdown(
    options=["raw","5min","15min","30min","1H","2H","4H","12H","1D","1W"],
    value="1D", description="Freq", layout=w.Layout(width="160px")
)
scale_dd = w.Dropdown(options=["linear","log"], value="linear", description="Scale", layout=w.Layout(width="140px"))
agg_dd   = w.Dropdown(options=["mean","last","sum"], value="mean", description="Factor Agg", layout=w.Layout(width="160px"))
norm_dd  = w.Dropdown(options=["none","zscore","minmax"], value="none", description="Normalize", layout=w.Layout(width="160px"))

hide_original_chk = w.Checkbox(value=False, description="Hide Original Factors", layout=w.Layout(width="200px"))

# Indicator switches + parameters
sma_chk = w.Checkbox(value=True, description="SMA")
sma_i   = w.IntSlider(value=20, min=2, max=400, step=1, description="SMA len", continuous_update=False, layout=w.Layout(width="320px"))
ema_chk = w.Checkbox(value=True, description="EMA")
ema_i   = w.IntSlider(value=50, min=2, max=400, step=1, description="EMA len", continuous_update=False, layout=w.Layout(width="320px"))
mm_chk  = w.Checkbox(value=False, description="MM")
mm_i    = w.IntSlider(value=100, min=2, max=600, step=1, description="MM len",  continuous_update=False, layout=w.Layout(width="320px"))

# Percentage and Absolute Change
pct_chk = w.Checkbox(value=False, description="Pct Change (%)")
pct_i   = w.IntSlider(value=1, min=1, max=50, step=1, description="Pct Periods", continuous_update=False, layout=w.Layout(width="320px"))
abs_chk = w.Checkbox(value=False, description="Abs Change")
abs_i   = w.IntSlider(value=1, min=1, max=50, step=1, description="Abs Periods", continuous_update=False, layout=w.Layout(width="320px"))

def _update_ma_sliders_state(*_):
    sma_i.disabled = not sma_chk.value
    ema_i.disabled = not ema_chk.value
    mm_i.disabled  = not mm_chk.value
    pct_i.disabled = not pct_chk.value
    abs_i.disabled = not abs_chk.value

_update_ma_sliders_state()
for wid in [sma_chk, ema_chk, mm_chk, pct_chk, abs_chk]:
    wid.observe(_update_ma_sliders_state, names="value")

# Search + multi-select factors (scrollable)
factor_filter = w.Text(placeholder="type to filter…", description="Search", layout=w.Layout(width="520px"))
factor_ms     = w.SelectMultiple(options=[], value=(), description="Factors",
                                 rows=14, layout=w.Layout(width="520px", height="300px"))

# Manual date input widgets
start_date_picker = w.DatePicker(description='Start Date', disabled=False, layout=w.Layout(width="200px"))
end_date_picker = w.DatePicker(description='End Date', disabled=False, layout=w.Layout(width="200px"))
apply_date_btn = w.Button(description="Apply Date Range", button_style='primary', layout=w.Layout(width="150px"))

# Horizontal line drawing - click on plot to add lines
click_instruction = w.HTML(value="<b>Click on the plot to add horizontal lines (factor scale)</b>")
clear_hlines_btn = w.Button(description="Clear H-Lines", button_style='warning', layout=w.Layout(width="120px"))

info_out  = w.Output()
date_out  = w.Output()
plot_out  = w.Output()

# State
_all_factor_options = []  # list[(label, path)]
_horizontal_lines = []    # drawn on factor axis
date_slider = None

def rebuild_factor_list(*_):
    global _all_factor_options
    asset_folder = ROOT / asset_dd.value
    files = list_factor_files(asset_folder)
    _all_factor_options = [(p.name, str(p)) for p in files]
    _apply_factor_filter()

def _apply_factor_filter(*_):
    q = factor_filter.value.strip().lower()
    if q:
        opts = [item for item in _all_factor_options if q in item[0].lower()]
    else:
        opts = list(_all_factor_options)

    old_vals = set(factor_ms.value)
    new_values = [v for (lbl, v) in opts if v in old_vals]
    factor_ms.options = opts
    try:
        factor_ms.value = tuple(new_values)
    except Exception:
        factor_ms.value = ()

def _sample_datetimes(index, max_points=800):
    idx = pd.to_datetime(index)
    if len(idx) <= 2:
        return list(idx.to_pydatetime())
    step = max(1, len(idx)//max_points)
    return list(idx[::step].to_pydatetime())

def rebuild_date_slider(*_):
    global date_slider
    date_out.clear_output()
    info_out.clear_output()

    asset_folder = ROOT / asset_dd.value
    close_file = find_close_file(asset_folder)
    if close_file is None:
        with info_out: print("⚠ Cannot find close file: *_market_price_usd_close_*.csv")
        date_slider = None
        return

    close = read_single_series(close_file)
    if len(close) == 0:
        with info_out: print(f"⚠ Read 0 records: {close_file.name}; please check t/v and time units")
        date_slider = None
        return

    opts = _sample_datetimes(close.index, max_points=800)
    with info_out:
        print(f"Available: {close.index.min().date()} → {close.index.max().date()} ({len(close):,} points)")

    # Set default date picker values
    min_date = close.index.min().date()
    max_date = close.index.max().date()
    start_date_picker.value = min_date
    end_date_picker.value = max_date

    if len(opts) < 2:
        with date_out:
            display(w.HTML("<b>Insufficient data points (< 2) — displaying full range, no date slider.</b>"))
        date_slider = None
        return

    if date_slider is not None:
        date_slider.unobserve_all()

    date_slider = w.SelectionRangeSlider(
        options=opts,
        value=(opts[0], opts[-1]),
        description="DateRange",
        layout=w.Layout(width="95%"),
        continuous_update=False
    )
    date_slider.observe(render, names="value")

    with date_out:
        display(date_slider)

def apply_manual_date_range(*_):
    if date_slider is None or start_date_picker.value is None or end_date_picker.value is None:
        return
    try:
        start_dt = pd.to_datetime(start_date_picker.value)
        end_dt = pd.to_datetime(end_date_picker.value)
        opts = list(date_slider.options)
        opt_dates = [pd.to_datetime(opt) for opt in opts]
        start_idx = min(range(len(opt_dates)), key=lambda i: abs((opt_dates[i] - start_dt).total_seconds()))
        end_idx = min(range(len(opt_dates)), key=lambda i: abs((opt_dates[i] - end_dt).total_seconds()))
        if start_idx <= end_idx:
            date_slider.value = (opts[start_idx], opts[end_idx])
        else:
            date_slider.value = (opts[end_idx], opts[start_idx])
    except Exception as e:
        with info_out:
            print(f"Error applying date range: {e}")

def add_horizontal_line_from_click(trace, points, selector):
    if points.point_inds:
        y_value = points.ys[0]
        _horizontal_lines.append(y_value)
        render()

def clear_horizontal_lines(*_):
    global _horizontal_lines
    _horizontal_lines = []
    render()

# === Always-on Statistics Panel (Factor / Pct Change / Abs Change) ===
stats_toggle = w.ToggleButtons(
    options=[("Factor Statistics","factor"),
             ("Pct Change Statistics","pct"),
             ("Abs Change Statistics","abs")],
    value="factor",
    description="Statistics:",
    layout=w.Layout(width="100%")
)
stats_panel_out = w.Output(layout=w.Layout(border="1px solid #d1d5db",
                                           padding="10px", min_height="240px"))

_last_factor_series = None  # updated in render()

def _summarize_series(s: pd.Series) -> pd.DataFrame:
    s = pd.to_numeric(s, errors="coerce")
    return pd.DataFrame({
        "count": [int(s.count())],
        "missing": [int(s.isna().sum())],
        "mean": [s.mean()],
        "std": [s.std()],
        "min": [s.min()],
        "25%": [s.quantile(0.25)],
        "median": [s.median()],
        "75%": [s.quantile(0.75)],
        "max": [s.max()],
    }).T.rename(columns={0: "value"})

def render_stats_panel(*_):
    stats_panel_out.clear_output()
    with stats_panel_out:
        if _last_factor_series is None or _last_factor_series.dropna().empty:
            display(HTML("<em>Select at least one factor to see statistics.</em>"))
            return
        base = _last_factor_series.dropna()
        sel = stats_toggle.value
        if sel == "pct":
            data = pct_change_line(base, pct_i.value).dropna()
            title = f"Pct Change Statistics (periods={pct_i.value})"
        elif sel == "abs":
            data = absolute_change_line(base, abs_i.value).dropna()
            title = f"Abs Change Statistics (periods={abs_i.value})"
        else:
            data = base
            title = "Factor Statistics"

        display(HTML(f"<h4 style='margin:4px 0 8px 0'>{title}</h4>"))
        if data.empty:
            display(HTML("<em>No data in current range.</em>"))
        else:
            display(_summarize_series(data))

stats_toggle.observe(render_stats_panel, names="value")

# ---------------- Core render ----------------
def render(*_):
    global _last_factor_series
    plot_out.clear_output(wait=True)
    with plot_out:
        try:
            asset_folder = ROOT / asset_dd.value
            close_file = find_close_file(asset_folder)
            if close_file is None:
                print("⚠ Cannot find close file"); return

            close_raw = read_single_series(close_file)
            if date_slider is not None:
                start, end = date_slider.value
                close_raw = close_raw.loc[(close_raw.index >= pd.to_datetime(start)) & (close_raw.index <= pd.to_datetime(end))]

            close = resample_close(close_raw, freq_dd.value)
            if len(close) == 0:
                print("⚠ No data after filtering (date/frequency caused), please relax conditions")
                _last_factor_series = None
                render_stats_panel()
                return

            fig = make_subplots(rows=1, cols=1, specs=[[{"secondary_y": True}]])

            # Price (primary y)
            fig.add_trace(
                go.Scatter(x=close.index, y=close.values, name="Close", mode="lines",
                           line=dict(color="blue", width=2)),
                secondary_y=False
            )

            # Price MAs
            if sma_chk.value and len(close) >= sma_i.value:
                s1 = sma(close, sma_i.value).dropna()
                if len(s1) > 0:
                    fig.add_trace(
                        go.Scatter(x=s1.index, y=s1.values, name=f"Close-SMA({sma_i.value})", mode="lines",
                                   line=dict(color="darkblue", dash="dash")),
                        secondary_y=False
                    )
            if ema_chk.value and len(close) >= ema_i.value:
                s2 = ema(close, ema_i.value).dropna()
                if len(s2) > 0:
                    fig.add_trace(
                        go.Scatter(x=s2.index, y=s2.values, name=f"Close-EMA({ema_i.value})", mode="lines",
                                   line=dict(color="navy", dash="dot")),
                        secondary_y=False
                    )
            if mm_chk.value and len(close) >= mm_i.value:
                s3 = mm(close, mm_i.value).dropna()
                if len(s3) > 0:
                    fig.add_trace(
                        go.Scatter(x=s3.index, y=s3.values, name=f"Close-MM({mm_i.value})", mode="lines",
                                   line=dict(color="royalblue", dash="dashdot")),
                        secondary_y=False
                    )

            # --- Factors on secondary y
            selected = list(factor_ms.value)
            _last_factor_series = None  # reset each render

            if selected:
                colors = ['green', 'purple', 'brown', 'pink', 'gray', 'olive', 'cyan', 'magenta', 'yellow', 'lime']
                color_idx = 0

                for fpath in selected:
                    name = Path(fpath).name
                    s = read_single_series(Path(fpath))

                    if date_slider is not None:
                        start, end = date_slider.value
                        s = s.loc[(s.index >= pd.to_datetime(start)) & (s.index <= pd.to_datetime(end))]

                    s = resample_factor(s, freq_dd.value, how=agg_dd.value)
                    s = s.reindex(close.index)  # align to price timeline
                    s = normalize(s, norm_dd.value)
                    s_clean = s.dropna()

                    # Set first valid factor series for stats
                    if _last_factor_series is None and len(s_clean) > 0:
                        _last_factor_series = s_clean.copy()

                    if len(s_clean) > 0:
                        factor_color = colors[color_idx % len(colors)]
                        color_idx += 1

                        if not hide_original_chk.value:
                            fig.add_trace(
                                go.Scatter(x=s_clean.index, y=s_clean.values, name=f"Factor:{name}", mode="lines",
                                           line=dict(color=factor_color)),
                                secondary_y=True
                            )

                        # Factor MAs
                        if sma_chk.value and len(s_clean) >= sma_i.value:
                            f_sma = sma(s_clean, sma_i.value).dropna()
                            if len(f_sma) > 0:
                                fig.add_trace(
                                    go.Scatter(x=f_sma.index, y=f_sma.values, name=f"{name}-SMA({sma_i.value})",
                                               mode="lines", line=dict(color=factor_color, dash='dash')),
                                    secondary_y=True
                                )
                        if ema_chk.value and len(s_clean) >= ema_i.value:
                            f_ema = ema(s_clean, ema_i.value).dropna()
                            if len(f_ema) > 0:
                                fig.add_trace(
                                    go.Scatter(x=f_ema.index, y=f_ema.values, name=f"{name}-EMA({ema_i.value})",
                                               mode="lines", line=dict(color=factor_color, dash='dot')),
                                    secondary_y=True
                                )
                        if mm_chk.value and len(s_clean) >= mm_i.value:
                            f_mm = mm(s_clean, mm_i.value).dropna()
                            if len(f_mm) > 0:
                                fig.add_trace(
                                    go.Scatter(x=f_mm.index, y=f_mm.values, name=f"{name}-MM({mm_i.value})",
                                               mode="lines", line=dict(color=factor_color, dash='dashdot')),
                                    secondary_y=True
                                )

                        # Pct / Abs change
                        if pct_chk.value:
                            pct_factor = pct_change_line(s_clean, pct_i.value).dropna()
                            if len(pct_factor) > 0:
                                fig.add_trace(
                                    go.Scatter(x=pct_factor.index, y=pct_factor.values,
                                               name=f"{name}-PctChange({pct_i.value})", mode="lines",
                                               line=dict(color=factor_color, dash='longdash', width=1.5)),
                                    secondary_y=True
                                )
                        if abs_chk.value:
                            abs_factor = absolute_change_line(s_clean, abs_i.value).dropna()
                            if len(abs_factor) > 0:
                                fig.add_trace(
                                    go.Scatter(x=abs_factor.index, y=abs_factor.values,
                                               name=f"{name}-AbsChange({abs_i.value})", mode="lines",
                                               line=dict(color=factor_color, dash='longdashdot', width=1.5)),
                                    secondary_y=True
                                )

            # Horizontal lines on secondary y
            for hline_val in _horizontal_lines:
                fig.add_hline(y=hline_val, line_dash="solid", line_color="red",
                              annotation_text=f"H-Line: {hline_val:.4f}", annotation_position="right")

            # Axes and layout
            fig.update_yaxes(title_text=f"Price (USD) - {scale_dd.value}", type=scale_dd.value, secondary_y=False)
            fig.update_yaxes(title_text=f"Factors ({norm_dd.value})", secondary_y=True)
            fig.update_layout(
                title=f"{asset_dd.value} | Freq={freq_dd.value} | Scale={scale_dd.value} | Normalize={norm_dd.value}",
                width=1200, height=800,
                margin=dict(l=60,r=60,t=60,b=40),
                legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
                hovermode="x unified"
            )

            print(f"Display range: {close.index.min().date()} → {close.index.max().date()} ({len(close):,} points)")
            if _horizontal_lines:
                print(f"Horizontal lines (factor scale): {[f'{x:.4f}' for x in _horizontal_lines]}")

            fig_widget = go.FigureWidget(fig)
            if len(fig_widget.data) > 0:
                fig_widget.data[0].on_click(add_horizontal_line_from_click)
            display(fig_widget)

            # Refresh stats panel (always visible)
            render_stats_panel()

        except Exception as e:
            import traceback
            print("⚠ Plot error:", e)
            traceback.print_exc()

# ---- Event binding ----
def on_asset_change(*_):
    rebuild_factor_list()
    rebuild_date_slider()
    render()

asset_dd.observe(on_asset_change, names="value")
for wid in [freq_dd, scale_dd, agg_dd, norm_dd, sma_i, ema_i, mm_i, pct_i, abs_i,
            sma_chk, ema_chk, mm_chk, pct_chk, abs_chk, hide_original_chk]:
    wid.observe(render, names="value")
factor_ms.observe(render, names="value")
factor_filter.observe(_apply_factor_filter, names="value")

apply_date_btn.on_click(apply_manual_date_range)
clear_hlines_btn.on_click(clear_horizontal_lines)

# ---- Startup ----
rebuild_factor_list()
rebuild_date_slider()

ui = w.VBox([
    w.HBox([asset_dd, freq_dd, scale_dd, agg_dd, norm_dd]),
    w.HBox([hide_original_chk]),
    w.HBox([sma_chk, sma_i]),
    w.HBox([ema_chk, ema_i]),
    w.HBox([mm_chk, mm_i]),
    w.HBox([pct_chk, pct_i]),
    w.HBox([abs_chk, abs_i]),
    w.VBox([factor_filter, factor_ms]),
    w.HBox([start_date_picker, end_date_picker, apply_date_btn]),
    click_instruction,
    w.HBox([clear_hlines_btn]),
    info_out,
    date_out,
    w.HTML("<h3>Statistics (Always Visible)</h3>"),
    stats_toggle,
    stats_panel_out,
    plot_out
])
display(ui)
render()


VBox(children=(HBox(children=(Dropdown(description='Asset', layout=Layout(width='220px'), options=('BTC', 'ETH…