In [None]:
import datetime
import pandas as pd
from scipy.stats import binomtest
import numpy as np
from tqdm import tqdm
import os
from collections import defaultdict
import matplotlib.pyplot as plt
from binance_historical_data import BinanceDataDumper


from collections import Counter


# Download data functions

In [None]:

def download_data_from_binance(symbol, temporalidad, start_year, end_year):
    dumper = BinanceDataDumper(
        path_dir_where_to_dump="data",
        asset_class="spot",
        data_type="klines",
        data_frequency=temporalidad
    )

    current = datetime.date(start_year, 1, 1)
    end = datetime.date(end_year, 12, 31)

    while current <= end:
        next_month = (current.replace(day=1) + datetime.timedelta(days=32)).replace(day=1)
        print(f"Bajando {current} a {next_month - datetime.timedelta(days=1)}")
        dumper.dump_data(
            tickers=[symbol],
            date_start=current,
            date_end=next_month - datetime.timedelta(days=1)
        )
        current = next_month


In [None]:
def parse_binance_files(folder_path):
    column_names = [
        'open_time', 'open', 'high', 'low', 'close', 'volume',
        'close_time', 'quote_asset_volume', 'number_of_trades',
        'taker_buy_volume_base', 'taker_buy_volume_quote', 'ignore'
    ]

    float_cols = ['open', 'high', 'low', 'close', 'volume',
                  'quote_asset_volume', 'taker_buy_volume_base', 'taker_buy_volume_quote']
    
    dfs = []
    files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(".csv")]

    for f in files:
        df = pd.read_csv(f, names=column_names)
        for unit in ['ms', 'us']:
            try:
                df['date'] = pd.to_datetime(df['open_time'], unit=unit)
                df['close_time'] = pd.to_datetime(df['close_time'], unit=unit)
                delta = df['date'].diff().dt.total_seconds().dropna()
                if delta.mode().iloc[0] == 900:
                    break
            except Exception:
                continue
        else:
            print(f"Error parseando archivo: {f}")
            continue

        df[float_cols] = df[float_cols].astype(float)
        df = df.drop(columns='ignore')
        dfs.append(df)

    df = pd.concat(dfs).sort_values('open_time').reset_index(drop=True)
    
    # Crear columna con hora
    df['hora'] = df['date'].dt.time

    # Calcular tendencia
    df['ema20'] = df['close'].ewm(span=20, adjust=False).mean()
    df['tendency'] = np.where(df['close'] > df['ema20'], 1, -1)
    df['prev_tendency'] = df['tendency'].shift(1)

    # Tipo de vela y tamaño
    df['type'] = np.where(df['close'] > df['open'], 'up', 'dw')
    df['size'] = abs(df['close'] - df['open'])

    # Engulf detection
    o1 = df['open'].shift(1)
    c1 = df['close'].shift(1)
    o2 = df['open'].shift(2)
    c2 = df['close'].shift(2)

    engulf_up = (
        (c1 > o1) & (c2 < o2) &
        (o1 < c2) & (c1 > o2)
    )

    engulf_dw = (
        (c1 < o1) & (c2 > o2) &
        (o1 > c2) & (c1 < o2)
    )

    df['engulf'] = 0
    df.loc[engulf_up, 'engulf'] = 1
    df.loc[engulf_dw, 'engulf'] = -1

    return df


In [None]:
def parse_binance_files(folder_path, expected_timeframe_seconds=None):

    column_names = [
        'open_time', 'open', 'high', 'low', 'close', 'volume',
        'close_time', 'quote_asset_volume', 'number_of_trades',
        'taker_buy_volume_base', 'taker_buy_volume_quote', 'ignore'
    ]

    float_cols = ['open', 'high', 'low', 'close', 'volume',
                  'quote_asset_volume', 'taker_buy_volume_base', 'taker_buy_volume_quote']
    
    dfs = []
    files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(".csv")]

    for f in files:
        df = pd.read_csv(f, names=column_names)
        for unit in ['ms', 'us']:
            try:
                df['date'] = pd.to_datetime(df['open_time'], unit=unit)
                df['close_time'] = pd.to_datetime(df['close_time'], unit=unit)
                delta = df['date'].diff().dt.total_seconds().dropna()
                mode_delta = delta.mode().iloc[0]

                if expected_timeframe_seconds is None or mode_delta == expected_timeframe_seconds:
                    break
            except Exception:
                continue
        else:
            print(f"Archivo descartado por timeframe: {f}")
            continue

        df[float_cols] = df[float_cols].astype(float)
        df = df.drop(columns='ignore')
        dfs.append(df)

    if not dfs:
        print("No se cargaron archivos.")
        return pd.DataFrame()

    df = pd.concat(dfs).sort_values('open_time').reset_index(drop=True)
    
    # Crear columna con hora
    df['hora'] = df['date'].dt.time

    # Calcular tendencia
    df['ema20'] = df['close'].ewm(span=20, adjust=False).mean()
    df['tendency'] = np.where(df['close'] > df['ema20'], 1, -1)
    df['prev_tendency'] = df['tendency'].shift(1)

    # Tipo de vela y tamaño
    df['type'] = np.where(df['close'] > df['open'], 'up', 'dw')
    df['size'] = abs(df['close'] - df['open'])

    # Engulf detection
    o1 = df['open'].shift(1)
    c1 = df['close'].shift(1)
    o2 = df['open'].shift(2)
    c2 = df['close'].shift(2)

    engulf_up = (
        (c1 > o1) & (c2 < o2) &
        (o1 < c2) & (c1 > o2)
    )

    engulf_dw = (
        (c1 < o1) & (c2 > o2) &
        (o1 > c2) & (c1 < o2)
    )

    df['engulf'] = 0
    df.loc[engulf_up, 'engulf'] = 1
    df.loc[engulf_dw, 'engulf'] = -1

    return df


# N-order matrix, with tendency

In [None]:
def order_n_matrix(df, order=1, time_filter=None, use_tendency=True, use_engulf=False, vis=True):
    df = df.copy()

    df['type'] = np.where(df['close'] > df['open'], 'up', 'dw')

    for i in range(1, order + 1):
        df[f'prev_{i}'] = df['type'].shift(i)

    required_cols = [f'prev_{i}' for i in range(1, order + 1)]
    if use_tendency:
        required_cols.append('prev_tendency')
    if use_engulf:
        required_cols.append('engulf')

    df_valid = df.dropna(subset=required_cols).copy()

    if order == 1:
        df_valid['pattern'] = df_valid['prev_1']
    else:
        pattern_cols = [f'prev_{i}' for i in range(order, 0, -1)]
        df_valid['pattern'] = df_valid[pattern_cols].agg('_'.join, axis=1)

    if use_tendency:
        df_valid['tendency'] = df_valid['prev_tendency'].astype(int)

    if use_engulf:
        df_valid['engulf'] = df_valid['engulf'].astype(int)

    if time_filter:
        h_open, m_open = time_filter["open"]
        h_close, m_close = time_filter["close"]
        df_valid = df_valid[
            (df_valid['hora'] >= datetime.time(h_open, m_open)) &
            (df_valid['hora'] <= datetime.time(h_close, m_close))
        ].copy()

    group_cols = ['pattern']
    if use_tendency:
        group_cols.append('tendency')
    if use_engulf:
        group_cols.append('engulf')
    group_cols.append('type')

    transition_counts = df_valid.groupby(group_cols).size().unstack(fill_value=0)
    transition_matrix = transition_counts.div(transition_counts.sum(axis=1), axis=0)

    if 'compute_tests' in globals():
        tests = transition_counts.apply(compute_tests, axis=1)
        transition_matrix = transition_matrix.join(tests)

    if vis:
        print("Matriz de transición (probabilidades):")
        print(transition_matrix.round(4))
        print("\nMatriz de transición (ocurrencias):")
        print(transition_counts.round(4))

    return transition_matrix, transition_counts, df_valid


In [None]:

# Calcular los tests y agregarlos como columnas
def compute_tests(row):
    up = row.get('up', 0)
    dw = row.get('dw', 0)
    total = up + dw

    # Test binomial (¿es el número de 'up' significativamente distinto de 50%?)
    binom_p = binomtest(up, n=total, p=0.5, alternative='two-sided').pvalue if total > 0 else np.nan

    return pd.Series({ 'binom_p': binom_p})

In [None]:
def find_significant_probabilities(transition_matrix, transition_counts, p_binom=0.05):

    # Filtrar por significancia estadística (binom_p < 0.05)
    significant_idx = transition_matrix[transition_matrix['binom_p'] < p_binom].index

    # Filtrar ambas matrices
    significant_probs = transition_matrix.loc[significant_idx]
    significant_counts = transition_counts.loc[significant_idx]

    print("Matriz de probabilidades (significativas):")
    print(significant_probs.round(4))

    print("\nMatriz de ocurrencias (correspondiente):")
    print(significant_counts)

    return significant_probs, significant_counts


# Estrategia

In [None]:
import numpy as np
import pandas as pd

def compute_max_drawdown(series):
    s = pd.Series(series, dtype=float)
    if s.empty:
        return {"mdd": 0.0, "mdd_pct": 0.0}
    peak = s.cummax()
    dd = peak - s
    mdd = float(dd.max())
    argmax_dd = int(np.nanargmax(dd.values))
    peak_at_mdd = float(peak.iloc[argmax_dd]) if mdd > 0 else float(peak.iloc[0])
    mdd_pct = float(mdd / peak_at_mdd) if peak_at_mdd != 0 else 0.0
    return {"mdd": mdd, "mdd_pct": mdd_pct}

def simulate_continuous_strategy(
    df,
    transition_matrix,
    pattern,
    tendency,
    max_adverse_move,
    minimum_range_movement,
    stake=1,
    vis=False,
    comission=0.002,
):
    min_range_before_pattern = minimum_range_movement
    orden = len(transition_matrix.index[0][0].split("_"))
    row = transition_matrix.loc[(pattern, tendency)]
    prob_up = row["up"]
    prob_dw = row["dw"]
    expected = "up" if prob_up > prob_dw else "dw"
    prob_expected = prob_up if prob_up > prob_dw else prob_dw

    capital = 0.0
    capital_adjusted = 0.0
    results = []

    for i in range(orden, len(df) - 1):
        actual_tendency = df.loc[i - 1, "tendency"]
        entry_time = df.loc[i + 1, "date"]
        pattern_i = "_".join(df.loc[i - orden : i - 1, "type"].values.tolist())

        if pattern_i == pattern and actual_tendency == tendency:
            if min_range_before_pattern is not None:
                open_max = df.loc[i - orden : i - 1, "open"].max()
                close_max = df.loc[i - orden : i - 1, "close"].max()
                open_min = df.loc[i - orden : i - 1, "open"].min()
                close_min = df.loc[i - orden : i - 1, "close"].min()
                prior_range = max(open_max, close_max) - min(open_min, close_min)
                if prior_range < min_range_before_pattern:
                    continue

            row_candle = df.iloc[i + 1]
            entry_price = row_candle["open"]
            exit_price = row_candle["close"]
            low = row_candle["low"]
            high = row_candle["high"]
            direction = 1 if expected == "up" else -1

            hit_stop = False
            if max_adverse_move is not None:
                if direction == 1 and low <= entry_price - max_adverse_move:
                    hit_stop = True
                elif direction == -1 and high >= entry_price + max_adverse_move:
                    hit_stop = True

            if hit_stop:
                pnl = -stake * max_adverse_move
            else:
                price_diff = (exit_price - entry_price) * direction
                pnl = stake * price_diff

            pnl_adjusted = pnl * (0.5 + prob_expected)

            capital = capital + pnl - comission
            capital_adjusted = capital_adjusted + pnl_adjusted - comission

            results.append(
                {
                    "index": i,
                    "entry_time": entry_time,
                    "entry_price": entry_price,
                    "exit_price": exit_price,
                    "pnl": pnl,
                    "capital": capital,
                    "capital_adjusted": capital_adjusted,
                }
            )

    df_results = pd.DataFrame(results)

    n_trades = len(df_results)
    if not df_results.empty:
        n_days = df_results["entry_time"].dt.date.nunique()
    else:
        n_days = 0
    if n_days > 0:
        avg_trades_per_day = n_trades / n_days
    else:
        avg_trades_per_day = 0.0

    if vis:
        print(f"Operaciones totales: {n_trades}")
        print(f"Promedio por día: {avg_trades_per_day:.2f}")
        print(f"Capital final: {capital:.2f}")

    if "capital" in df_results.columns:
        mdd_capital = compute_max_drawdown(df_results["capital"])
    else:
        mdd_capital = {"mdd": 0.0, "mdd_pct": 0.0}
    if "capital_adjusted" in df_results.columns:
        mdd_capital_adj = compute_max_drawdown(df_results["capital_adjusted"])
    else:
        mdd_capital_adj = {"mdd": 0.0, "mdd_pct": 0.0}

    metrics = {
        "mdd_capital": mdd_capital,
        "mdd_capital_adjusted": mdd_capital_adj,
        "n_trades": int(n_trades),
        "avg_trades_per_day": float(avg_trades_per_day),
        "final_capital": float(capital),
        "final_capital_adjusted": float(capital_adjusted),
    }
    df_results.attrs["metrics"] = metrics
    return df_results


In [None]:
def iterate_over_patterns(df, 
                    relevant_transition_matrix, 
                    max_adverse_move,
                    minimum_range_movement,
                    stake=1,
                    vis=True                    
                    ):

    survival_strategies =[]
    #print("Se mantienen solo las curvas cuya pendiente de ajuste lineal sea positiva, y que pasen más tiempo por encima del capital inicial")
    for index, row in relevant_transition_matrix.iterrows():
        pattern, tendency = index  # índice es una tupla
        print(f"\nEjecutando para patrón: {pattern}, tendencia: {tendency}")
        
        resultados = simulate_continuous_strategy(df, 
                                                relevant_transition_matrix,
                                                pattern,
                                                tendency,
                                                max_adverse_move,
                                                minimum_range_movement,
                                                stake,
                                                vis)
        
        capital_final = resultados.iloc[-1]['capital']
        
        #print(f"Ganancia: {capital_final:.2f}")
        
        x = np.arange(len(resultados))
        slope, _ = np.polyfit(x, resultados["capital"], 1)
        ratio_above = (resultados["capital"] > 0).mean()

        if slope > 0 and ratio_above > 0.5 and capital_final >0:
            plot_equity_curve(resultados, pattern, tendency)
            survival_strategies.append([pattern, 
                                        tendency,
                                        max_adverse_move, 
                                        minimum_range_movement,
                                        resultados])
        else:
            print("Pendiente negativa, o mucho tiempo debajo")
    
    return survival_strategies
        

# Cross validation

### Old versión, split de CV simple

In [None]:
def cross_validate_strategy(df, target_factor, order=3, stake=1, vis=True):
    df = df.sort_values('date').reset_index(drop=True)

    total_len = len(df)
    chunk_size = total_len // 4
    splits = [
        df.iloc[:chunk_size],
        df.iloc[chunk_size:chunk_size*2],
        df.iloc[chunk_size*2:chunk_size*3],
        df.iloc[chunk_size*3:]
    ]

    total_capital = 0
    performance_tracker = defaultdict(list)

    for i in range(4):
        fold_capital = 0
        test_df = splits[i].reset_index(drop=True)
        train_df = pd.concat([splits[j] for j in range(4) if j != i]).reset_index(drop=True)

        transition_matrix, _, _ = order_n_matrix(train_df, order=order, vis=False)
        relevant_transition_matrix = transition_matrix[(transition_matrix["dw"] > 
                                                target_factor) | 
                                                (transition_matrix["up"] >
                                                target_factor)]

        if vis:
            print(f"\n=== Fold {i+1} ===")
            print("Combinaciones patrón-tendencia seleccionadas:")
            for idx in relevant_transition_matrix.index:
                print(f"  {idx}")

        for index in relevant_transition_matrix.index:
            pattern, tendency = index
            resultados = simulate_continuous_strategy(test_df,
                                                    relevant_transition_matrix,
                                                    pattern,
                                                    tendency,
                                                    stake,
                                                    vis=False)
            capital_final = resultados.iloc[-1]['capital']
            ganancia = capital_final - 100

            performance_tracker[index].append(ganancia)

            if ganancia < 0 and vis:
                print(f"  Pérdida con patrón {pattern}, tendencia {tendency}: {ganancia:.2f}")

            fold_capital += ganancia
            total_capital += ganancia

        if vis:
            print(f"Capital generado en el fold {i+1}: {fold_capital:.2f}")

    patrones_sin_perdidas = [k for k, ganancias in performance_tracker.items() if all(g >= 0 for g in ganancias)]

    print("\n=== Ejecutando estrategia completa para patrones sin pérdidas ===")
    print("Combinaciones seleccionadas:")
    transition_matrix, _, _ = order_n_matrix(df, order=order, vis=False)

    combinaciones_finales = []
    total_final_capital = 0
    daily_pnls = defaultdict(float)

    for pattern, tendency in patrones_sin_perdidas:
        row = transition_matrix.loc[(pattern, tendency)]
        expected = 'up' if row['up'] > row['dw'] else 'dw'
        print(f"  ({pattern}, {tendency}, {expected})")
        combinaciones_finales.append((pattern, tendency, expected))

        resultados = simulate_continuous_strategy(df,
                                                transition_matrix,
                                                pattern,
                                                tendency,
                                                stake,
                                                vis=False)

        resultados['day'] = resultados['entry_time'].dt.date
        pnl_por_dia = resultados.groupby('day')['pnl'].sum()

        for day, pnl in pnl_por_dia.items():
            daily_pnls[day] += pnl

        capital_final = resultados.iloc[-1]['capital']
        ganancia = capital_final - 100
        total_final_capital += ganancia

    if combinaciones_finales:
        print(f"\nCapital total acumulado usando solo patrones sin pérdidas: {total_final_capital:.2f}")

        df_patterns = pd.DataFrame(combinaciones_finales, columns=["pattern", "tendency", "expected"])
        df_patterns.to_csv(f'patterns_{symbol}.csv', index=False)

        df_daily = pd.DataFrame(sorted(daily_pnls.items()), columns=['day', 'pnl'])
        df_daily['capital'] = 100 + df_daily['pnl'].cumsum()

        plt.figure(figsize=(10, 6))
        plt.plot(df_daily['day'], df_daily['capital'], label='Equity Curve')
        plt.title(f'Curva de equity diaria acumulada para {symbol}')
        plt.xlabel('Fecha')
        plt.ylabel('Capital')
        plt.grid(True)
        plt.legend()
        plt.tight_layout()
        plt.show()
    else:
        print(f"No se encontraron patrones sin pérdidas para {symbol}.")
    return 


### Walk forward cross validation

In [None]:
def walk_forward_cross_validation(df, 
                                target_prob=0.53,
                                order=3,
                                stake=1,
                                n_splits=5,
                                min_train_profit=0.0,
                                vis=False,
                                per_split_plots=False, 
                                per_pattern_plots=False):
    
    d = df.sort_values('date').reset_index(drop=True).copy()
    d['date'] = pd.to_datetime(d['date'], errors='coerce')
    total = len(d)
    if total < (n_splits + 1):
        print("Data insuficiente para los splits solicitados."); return [], [], pd.DataFrame()

    split_size = total // (n_splits + 1)
    equity_times, equity_vals = [], []
    capital = 100.0
    patterns_log_rows = []

    for s in range(n_splits):
        train_end = split_size * (s + 1)
        test_end  = split_size * (s + 2)
        train_df  = d.iloc[:train_end].reset_index(drop=True)
        test_df   = d.iloc[train_end:test_end].reset_index(drop=True)

        transition_matrix, _, _ = order_n_matrix(train_df, order=order, vis=False)
        relevant_tm = transition_matrix[(transition_matrix["dw"] > target_prob) | (transition_matrix["up"] > target_prob)]

        tr0, tr1 = train_df['date'].iloc[0], train_df['date'].iloc[-1]
        te0, te1 = (test_df['date'].iloc[0], test_df['date'].iloc[-1]) if not test_df.empty else (pd.NaT, pd.NaT)

        print(f"\n=== Walk-Forward Split {s+1} ===")
        print(f"Train: {tr0} → {tr1}")
        if not test_df.empty: print(f"Test:  {te0} → {te1}")
        print(f"Patrones seleccionados (por prob.): {len(relevant_tm)}")

        kept = []
        for (pattern, tendency) in relevant_tm.index:
            res_train = simulate_continuous_strategy(train_df, relevant_tm, pattern, tendency, stake, vis=False)
            train_gain = 0.0 if res_train.empty else (res_train.iloc[-1]['capital'] - 100.0)
            if train_gain > min_train_profit: kept.append((pattern, tendency))

        print(f"Patrones tras filtro de profit en TRAIN (> {min_train_profit:+.2f}): {len(kept)}")
        for (pattern, tendency) in kept:
            row = relevant_tm.loc[(pattern, tendency)]
            expected = 'up' if row['up'] > row['dw'] else 'dw'
            patterns_log_rows.append({'split': s+1,'train_start': tr0,'train_end': tr1,'test_start': te0,'test_end': te1,'pattern': pattern,'tendency': tendency,'expected': expected,'prob_up': float(row['up']),'prob_dw': float(row['dw'])})
            #print(f"  KEEP: {pattern}, {tendency}, expected={expected} (up={row['up']:.3f}, dw={row['dw']:.3f})")

        trades_list = []
        for (pattern, tendency) in kept:
            res_test = simulate_continuous_strategy(test_df, relevant_tm, pattern, tendency, stake, vis=False)
            if not res_test.empty:
                trades_list.append(res_test[['entry_time','pnl']])
                if per_pattern_plots:
                    rp = res_test[['entry_time','pnl']].copy()
                    rp['capital'] = 100 + rp['pnl'].cumsum()
                    # Prepend baseline a 100 para que SIEMPRE arranque en 100
                    baseline = rp.iloc[[0]].copy(); baseline['capital'] = 100; baseline['pnl'] = 0
                    rp = pd.concat([baseline, rp], ignore_index=True)
                    plt.figure(figsize=(9,4))
                    plt.plot(rp['entry_time'], rp['capital'], linewidth=2)
                    plt.title(f"Patrón {pattern} | Tendencia {tendency} | Split {s+1}")
                    plt.xlabel("Tiempo"); plt.ylabel("Capital"); plt.grid(True); plt.tight_layout(); plt.show()

        if trades_list:
            trades_df = pd.concat(trades_list).sort_values('entry_time')
            for _, r in trades_df.iterrows():
                capital += r['pnl']; equity_times.append(r['entry_time']); equity_vals.append(capital)
            if per_split_plots:
                tmp = trades_df.copy()
                tmp['capital'] = tmp['pnl'].cumsum()
                baseline = tmp.iloc[[0]].copy(); baseline['capital'] = 100; baseline['pnl'] = 0
                tmp = pd.concat([baseline, tmp], ignore_index=True)
                plt.figure(figsize=(9,4))
                plt.plot(tmp['entry_time'], tmp['capital'], linewidth=2)
                plt.title(f"Split {s+1} | Train: {tr0.date()}→{tr1.date()} | Test: {te0.date()}→{te1.date()}")
                plt.xlabel("Tiempo"); plt.ylabel("Capital"); plt.grid(True); plt.tight_layout(); plt.show()

    if vis and equity_vals:
        plt.figure(figsize=(10,5))
        plt.plot(equity_times, equity_vals, linewidth=2, label="Equity (Walk-Forward)")
        plt.title("Curva de equity global (walk-forward, inicio fijo, profit-filter)")
        plt.xlabel("Tiempo")
        plt.ylabel("Capital")
        plt.grid(True)
        plt.legend()
        plt.tight_layout()
        plt.show()

    patterns_log = pd.DataFrame(patterns_log_rows)
    return equity_times, equity_vals, patterns_log


In [None]:
def filtrar_equity_individual(output, pattern, tendency, min_ratio_above=0.5,
                            initial_capital=100, min_trades=20,
                            min_global_slope=0.0,
                            plot=False):
    if output.empty or len(output) < min_trades:
        return False, None

    output = output.sort_values("entry_time").copy()
    output["capital"] = initial_capital + output["pnl"].cumsum()

    # % de velas en ganancia
    ratio_above = (output["capital"] > initial_capital).mean()
    if ratio_above < min_ratio_above:
        return False, None

    # pendiente global
    x = np.arange(len(output))
    slope, _ = np.polyfit(x, output["capital"], 1)
    if slope < min_global_slope:
        return False, None

    if plot:
        plt.figure(figsize=(8, 4))
        plt.plot(output["entry_time"], output["capital"], label="Equity")
        plt.axhline(initial_capital, color="gray", linestyle="--")
        plt.title(f"Para {pattern} y tendencia {tendency}")
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.show()


    return True, {
        "final_capital": output["capital"].iloc[-1],
        "ratio_above": ratio_above,
        "global_slope": slope,
    }



### Rolling window cross validation

In [None]:
def rolling_walk_forward(df, target_factor, order=3, stake=1, train_bars=50000, test_bars=5000, vis=True, per_window_plots=False):
    d = df.sort_values('date').reset_index(drop=True).copy()
    d['date'] = pd.to_datetime(d['date'], errors='coerce')

    n = len(d)
    start = train_bars
    equity_times, equity_vals = [], []
    capital = 100

    while start < n - test_bars:
        train_df = d.iloc[start-train_bars:start].reset_index(drop=True)
        test_df  = d.iloc[start:start+test_bars].reset_index(drop=True)

        # Matriz SOLO con train
        transition_matrix, _, _ = order_n_matrix(train_df, order=order, vis=False)
        relevant_tm = transition_matrix[(transition_matrix["dw"] > target_factor) | (transition_matrix["up"] > target_factor)]

        # Mostrar info de la ventana y patrones seleccionados
        print(f"\n=== Window ===")
        print(f"Train: {train_df['date'].iloc[0]} → {train_df['date'].iloc[-1]}")
        print(f"Test:  {test_df['date'].iloc[0]} → {test_df['date'].iloc[-1]}")
        #print(f"Patrones seleccionados: {len(relevant_tm)}")
        for (pattern, tendency) in relevant_tm.index:
            row = relevant_tm.loc[(pattern, tendency)]
            expected = 'up' if row['up'] > row['dw'] else 'dw'
            print(f"  {pattern}, {tendency}, expected={expected}")

        trades = []
        for (pattern, tendency) in relevant_tm.index:
            res = simulate_continuous_strategy(test_df, relevant_tm, pattern, tendency, stake, vis=False)
            if not res.empty:
                trades.append(res[['entry_time','pnl']])

        if trades:
            trades = pd.concat(trades).sort_values('entry_time')
            for _, r in trades.iterrows():
                capital += r['pnl']
                equity_times.append(r['entry_time'])
                equity_vals.append(capital)

            if per_window_plots:
                tmp = trades.copy()
                tmp['capital'] = 100 + tmp['pnl'].cumsum()
                plt.figure(figsize=(9,4))
                plt.plot(tmp['entry_time'], tmp['capital'])
                plt.title(f"Train: {train_df['date'].iloc[0].date()}→{train_df['date'].iloc[-1].date()} | Test: {test_df['date'].iloc[0].date()}→{test_df['date'].iloc[-1].date()}")
                plt.xlabel("Tiempo"); plt.ylabel("Capital"); plt.grid(True); plt.tight_layout(); plt.show()

        start += test_bars

    if vis and equity_vals:
        plt.figure(figsize=(10,5))
        plt.plot(equity_times, equity_vals, label="Equity (Rolling WF)", linewidth=2)
        plt.title("Curva de equity global (rolling walk-forward)")
        plt.xlabel("Tiempo"); plt.ylabel("Capital"); plt.grid(True); plt.legend(); plt.tight_layout(); plt.show()

    return equity_times, equity_vals


# Visualization functions

In [None]:
def plot_equity_curve(df_results,
                    pattern=None,
                    tendency=None,
                    capital_cols=['capital']):
    
    if df_results.empty:
        print("No hay operaciones para graficar.")
        return

    plt.figure(figsize=(10, 5))

    for col in capital_cols:
        plt.plot(df_results[col], linewidth=2, label=col)

    title = "Evolución del capital"
    if pattern and tendency is not None:
        title += f" - Patrón: {pattern}, Tendencia: {tendency}"

    plt.title(title)
    plt.xlabel("Trade #")
    plt.ylabel("Capital")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()


In [None]:
def plot_multiple_equity_curves(df_results,
                                label,
                                pattern,
                                tendency):
    
    if df_results.empty:
        print("No hay operaciones para graficar.")
        return

    plt.plot(pd.DataFrame(df_results), linewidth=2, label=label)

    title = "Evolución del capital"
    if pattern and tendency is not None:
        title += f" - Patrón: {pattern}, Tendencia: {tendency}"

    plt.title(title)
    plt.xlabel("Trade #")
    plt.ylabel("Capital")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()


# Auxiliar functions

In [None]:

def simulate_continuous_strategy_dual(df, 
                                    transition_matrix,
                                    pattern,
                                    tendency,
                                    stake=1,
                                    time_filter=None,
                                    vis=True):
    d = df.copy()
    if 'date' in d.columns:
        d['date'] = pd.to_datetime(d['date'], errors='coerce')
    elif 'datetime' in d.columns:
        d['date'] = pd.to_datetime(d['datetime'], errors='coerce')
    else:
        d['date'] = pd.NaT

    row = transition_matrix.loc[(pattern, tendency)]
    prob_up = row['up']
    prob_dw = row['dw']
    expected = 'up' if prob_up > prob_dw else 'dw'
    direction = 1 if expected == 'up' else -1

    capital_close = 100
    capital_open = 100
    eq_close, eq_open = [], []
    times = []

    for i in range(3, len(d)-1):
        prev3 = d.loc[i-3, 'type']
        prev2 = d.loc[i-2, 'type']
        prev1 = d.loc[i-1, 'type']
        actual_tendency = d.loc[i-1, 'tendency']

        if f"{prev3}_{prev2}_{prev1}" == pattern and actual_tendency == tendency:
            t = d.loc[i+1, 'date']

            # Caso 1: entrada en close[i]
            entry_price_close = d.loc[i, 'close']
            exit_price_close = d.loc[i+1, 'close']
            pnl_close = stake * (exit_price_close - entry_price_close) * direction
            capital_close += pnl_close
            eq_close.append(capital_close)

            # Caso 2: entrada en open[i+1]
            entry_price_open = d.loc[i+1, 'open']
            exit_price_open = d.loc[i+1, 'close']
            pnl_open = stake * (exit_price_open - entry_price_open) * direction
            capital_open += pnl_open
            eq_open.append(capital_open)

            times.append(t)

    if vis:
        plt.figure(figsize=(10, 5))
        plt.plot(times, eq_close, label="Entrada close[i]", linewidth=2)
        plt.plot(times, eq_open, label="Entrada open[i+1]", linewidth=2)
        plt.title(f"Evolución del capital - Patrón: {pattern}, Tendencia: {tendency}")
        plt.xlabel("Tiempo")
        plt.ylabel("Capital")
        plt.grid(True)
        plt.legend()
        plt.tight_layout()
        plt.show()

    return eq_close, eq_open

