In [72]:
import pandas as pd

# --- Cargar datasets ---
df_ind = pd.read_csv('../Data/EURUSD_H4_2019-2025_processed.csv', parse_dates=['datetime'])
df_setups = pd.read_csv('../results/setups_20200301_20251017.csv',
                        parse_dates=['touch_date','entry_date','exit_date'])
df_macd = pd.read_csv('../Data/EURUSD_H4_2019-2025_with_macd.csv', parse_dates=['datetime'])

# --- Merge setups + indicadores (fecha más cercana hacia atrás) ---
df = pd.merge_asof(
    df_setups.sort_values('entry_date'),
    df_ind.sort_values('datetime'),
    left_on='entry_date',
    right_on='datetime',
    direction='backward'
)

# --- Detección flexible de columnas DI ---
plus_col = next((c for c in df.columns if 'plus_di' in c.lower()), None)
minus_col = next((c for c in df.columns if 'minus_di' in c.lower()), None)

if plus_col and minus_col:
    df['di_spread'] = abs(df[plus_col] - df[minus_col])
else:
    df['di_spread'] = None
    print('⚠️ Columnas de DI no encontradas, spread no calculado.')

# --- Añadir MACD ---
df = pd.merge(df,
              df_macd[['datetime','macd','macd_signal','macd_histogram']],
              on='datetime', how='left')

# --- Limpieza y orden final ---
cols_final = [
    'touch_date','entry_date','exit_date','direction','entry_price','sl_price','tp_price_ref',
    'exit_price','sl_pips','tp_pips_estimated','result_pips','rr_ratio_estimated','rr_ratio_real',
    'outcome','candles_away','candles_held','adx',plus_col,minus_col,'rsi','atr','di_spread',
    'datetime','macd','macd_signal','macd_histogram'
]
df = df[[c for c in cols_final if c in df.columns]].drop_duplicates(subset='entry_date').dropna().reset_index(drop=True)

print(f"✅ DataFrame final listo: {len(df)} filas, {len(df.columns)} columnas")
display(df.head())


✅ DataFrame final listo: 294 filas, 23 columnas


Unnamed: 0,touch_date,entry_date,exit_date,direction,entry_price,sl_price,tp_price_ref,exit_price,sl_pips,tp_pips_estimated,result_pips,rr_ratio_estimated,rr_ratio_real,outcome,candles_away,candles_held,plus_di_x,minus_di_x,di_spread,datetime,macd,macd_signal,macd_histogram
0,2020-03-04 16:00:00,2020-03-05 20:00:00,2020-03-06 08:00:00,SHORT,1.12291,1.12608,1.11421,1.12608,31.6,87.0,-31.6,2.75,1.0,LOSS,7,3,33.51,8.94,24.57,2020-03-05 20:00:00,0.004656,0.004604,5.2e-05
1,2020-03-12 12:00:00,2020-03-12 16:00:00,2020-03-12 16:00:00,LONG,1.11381,1.10861,1.1281,1.10861,52.0,142.9,-52.0,2.75,1.0,LOSS,1,0,15.51,31.83,16.32,2020-03-12 16:00:00,-0.001235,0.001273,-0.002509
2,2020-03-17 04:00:00,2020-03-17 12:00:00,2020-03-18 12:00:00,LONG,1.10079,1.09515,1.11628,1.09515,56.3,154.9,-56.3,2.75,1.0,LOSS,2,6,16.7,34.04,17.34,2020-03-17 12:00:00,-0.004047,-0.003225,-0.000822
3,2020-03-25 16:00:00,2020-03-26 16:00:00,2020-03-27 04:00:00,SHORT,1.10235,1.10791,1.08706,1.10791,55.6,152.9,-55.6,2.75,1.0,LOSS,6,3,31.45,11.4,20.05,2020-03-26 16:00:00,0.003631,0.000661,0.00297
4,2020-04-01 08:00:00,2020-04-03 08:00:00,2020-04-07 08:00:00,LONG,1.07932,1.07501,1.09116,1.08407,43.1,118.4,47.5,2.75,1.1,WIN,12,12,10.39,32.4,22.01,2020-04-03 08:00:00,-0.003968,-0.002189,-0.001779


In [74]:
### 9. Aplicación del filtro direccional |+DI − –DI| < 20

# Crear columna si no existe
if 'di_spread' not in df.columns:
    df['di_spread'] = abs(df['plus_di'] - df['minus_di'])

# Sin filtro
total_before = len(df)
winrate_before = (df['outcome'].eq('WIN').mean()) * 100

# Aplicar filtro
df_filtered = df[df['di_spread'] < 20]

# Métricas después del filtro
total_after = len(df_filtered)
winrate_after = (df_filtered['outcome'].eq('WIN').mean()) * 100

print(f"Total de setups antes del filtro: {total_before}")
print(f"Total después del filtro: {total_after}")
print(f"Win rate antes: {winrate_before:.2f}%")
print(f"Win rate después: {winrate_after:.2f}%")
print(f"Setups eliminados: {total_before - total_after} ({(1 - total_after/total_before)*100:.1f}%)")

Total de setups antes del filtro: 294
Total después del filtro: 200
Win rate antes: 34.69%
Win rate después: 41.50%
Setups eliminados: 94 (32.0%)


In [76]:
### 9.1 Métricas comparativas antes y después del filtro direccional

def calcular_metricas(df_in):
    total = len(df_in)
    wins = df_in[df_in['outcome'] == 'WIN']
    losses = df_in[df_in['outcome'] == 'LOSS']
    
    win_rate = len(wins) / total * 100 if total > 0 else 0
    avg_win = wins['result_pips'].mean()
    avg_loss = losses['result_pips'].mean()
    expectancy = (win_rate/100) * avg_win + (1 - win_rate/100) * avg_loss
    profit_factor = abs(avg_win / avg_loss) if avg_loss != 0 else 0
    rr_est = df_in['rr_ratio_estimated'].mean()

    return {
        'Trades': total,
        'WinRate(%)': round(win_rate, 2),
        'AvgWin(pips)': round(avg_win, 2),
        'AvgLoss(pips)': round(avg_loss, 2),
        'Expectancy(pips)': round(expectancy, 2),
        'ProfitFactor': round(profit_factor, 2),
        'RR_est': round(rr_est, 2)
    }

# Calcular métricas antes y después
metrics_before = calcular_metricas(df)
metrics_after = calcular_metricas(df_filtered)

# Combinar en tabla
comparison = pd.DataFrame([metrics_before, metrics_after], index=['Antes filtro', 'Después filtro'])
display(comparison)

Unnamed: 0,Trades,WinRate(%),AvgWin(pips),AvgLoss(pips),Expectancy(pips),ProfitFactor,RR_est
Antes filtro,294,34.69,50.86,-24.02,1.96,2.12,2.75
Después filtro,200,41.5,52.03,-24.18,7.44,2.15,2.75
