# üìä EDA AMRS BETA1 - Mean Reversion Strategy

**An√°lisis Exploratorio parametrizable para optimizaci√≥n de filtros**

## üéØ Objetivos:
- Analizar distribuci√≥n de |+DI - -DI| vs resultados WIN/LOSS
- Encontrar filtro DI √≥ptimo para maximizar expectancia
- Validar robustez cross-market (m√∫ltiples pares/timeframes)
- Generar recomendaciones para config.py

## ‚öôÔ∏è CONFIGURACI√ìN - Cambiar aqu√≠ para probar diferentes pares/timeframes

In [None]:
# ============================================================================
# CONFIGURACI√ìN - CAMBIAR AQU√ç PARA PROBAR DIFERENTES PARES/TIMEFRAMES
# ============================================================================

# Par y timeframe a analizar
SYMBOL = "GBPUSD"  # "EURUSD", "GBPUSD", "USDJPY", etc.
TIMEFRAME = "H4"   # "H1", "H4", "H6", "D1", etc.

# Filtros a probar (bins para an√°lisis)
DI_BINS = [0, 12, 15, 18, 20, 23, 25, 30, 50]
CURRENT_FILTER = 20  # Filtro actual en uso

print(f"üéØ Configuraci√≥n:")
print(f"   Par: {SYMBOL}")
print(f"   Timeframe: {TIMEFRAME}")
print(f"   Filtro actual: DI < {CURRENT_FILTER}")

## üìö Imports y Setup

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from scipy import stats
from datetime import datetime

# Configuraci√≥n
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 150)
plt.style.use('default')
sns.set_palette('husl')

print("‚úÖ Librer√≠as cargadas")

## üìÇ Data Loading - Carga simple y directa

In [None]:
# Cargar archivos directamente (cambiar rutas si es necesario)
df_market = pd.read_csv(f"../Data/{SYMBOL}_{TIMEFRAME}_2019-2025_processed.csv")
df_setups = pd.read_csv("../results/setups_20190101_20251017.csv")

# Convertir fechas
df_market['datetime'] = pd.to_datetime(df_market['datetime'])
df_setups['entry_date'] = pd.to_datetime(df_setups['entry_date'])

print(f"‚úÖ Datos cargados:")
print(f"   Market data: {len(df_market)} velas")
print(f"   Setups: {len(df_setups)} trades")
print(f"   Per√≠odo market: {df_market['datetime'].min()} - {df_market['datetime'].max()}")
print(f"   Win rate base: {(df_setups['outcome'] == 'WIN').mean() * 100:.1f}%")

## üîÑ Data Merging - Combinaci√≥n correcta de datos

In [None]:
# Merge de datos de mercado con setups en fecha de entrada
df = df_setups.merge(
    df_market[['datetime', 'plus_di', 'minus_di', 'adx', 'atr', 'rsi', 'ema20']], 
    left_on='entry_date', 
    right_on='datetime', 
    how='left'
)

# Crear columna DI spread
df['di_spread'] = abs(df['plus_di'] - df['minus_di'])

# Validar merge
print(f"üìä Merge completado:")
print(f"   Setups con indicadores: {len(df)}")
print(f"   Datos faltantes: {df['plus_di'].isna().sum()}")

# Mostrar primeras filas
display(df[['entry_date', 'direction', 'outcome', 'result_pips', 'plus_di', 'minus_di', 'di_spread']].head())

## üìà An√°lisis de DI Spread - Distribuci√≥n y win rates

In [None]:
# Estad√≠sticas b√°sicas DI spread
print(f"üìä Distribuci√≥n DI Spread |+DI - -DI|:")
print(f"   Min: {df['di_spread'].min():.2f}")
print(f"   Max: {df['di_spread'].max():.2f}")
print(f"   Media: {df['di_spread'].mean():.2f}")
print(f"   Mediana: {df['di_spread'].median():.2f}")
print(f"   P25: {df['di_spread'].quantile(0.25):.2f}")
print(f"   P75: {df['di_spread'].quantile(0.75):.2f}")

# Histograma y boxplot
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.hist(df['di_spread'], bins=30, alpha=0.7, color='skyblue', edgecolor='black')
plt.axvline(CURRENT_FILTER, color='red', linestyle='--', label=f'Filtro actual: {CURRENT_FILTER}')
plt.xlabel('|+DI - -DI|')
plt.ylabel('Frecuencia')
plt.title('Distribuci√≥n de DI Spread')
plt.legend()
plt.grid(alpha=0.3)

plt.subplot(1, 2, 2)
sns.boxplot(x='outcome', y='di_spread', data=df)
plt.axhline(CURRENT_FILTER, color='red', linestyle='--', label=f'Filtro actual: {CURRENT_FILTER}')
plt.title('DI Spread por Resultado')
plt.legend()
plt.grid(alpha=0.3)

plt.tight_layout()
plt.show()

## üéØ Win Rate por Bins - An√°lisis de umbrales √≥ptimos

In [None]:
# Crear bins de DI spread
df['di_bin'] = pd.cut(df['di_spread'], bins=DI_BINS, include_lowest=True)

# An√°lisis por bins
bin_analysis = df.groupby('di_bin').agg({
    'outcome': ['count', lambda x: (x == 'WIN').sum(), lambda x: (x == 'WIN').mean() * 100],
    'result_pips': ['mean', 'std']
}).round(2)

# Renombrar columnas
bin_analysis.columns = ['Total_Trades', 'Wins', 'Win_Rate_%', 'Avg_Pips', 'Std_Pips']
bin_analysis['Losses'] = bin_analysis['Total_Trades'] - bin_analysis['Wins']

# Reordenar columnas
bin_analysis = bin_analysis[['Total_Trades', 'Wins', 'Losses', 'Win_Rate_%', 'Avg_Pips', 'Std_Pips']]

print(f"üìä Win Rate por rangos de DI Spread:")
display(bin_analysis)

In [None]:
# Gr√°fico de win rate por bins
plt.figure(figsize=(12, 5))

# Win rate por bin
plt.subplot(1, 2, 1)
bin_labels = [str(b) for b in bin_analysis.index]
plt.bar(bin_labels, bin_analysis['Win_Rate_%'], color='lightgreen', alpha=0.7, edgecolor='black')
plt.axhline(df['outcome'].eq('WIN').mean() * 100, color='red', linestyle='--', label='Win Rate Overall')
plt.xlabel('Rangos DI Spread')
plt.ylabel('Win Rate (%)')
plt.title('Win Rate por Rangos de DI Spread')
plt.xticks(rotation=45)
plt.legend()
plt.grid(alpha=0.3)

# Distribuci√≥n de trades por bin
plt.subplot(1, 2, 2)
bottom = np.zeros(len(bin_analysis))
plt.bar(bin_labels, bin_analysis['Wins'], label='Wins', color='green', alpha=0.7)
plt.bar(bin_labels, bin_analysis['Losses'], bottom=bin_analysis['Wins'], label='Losses', color='red', alpha=0.7)
plt.xlabel('Rangos DI Spread')
plt.ylabel('N√∫mero de Trades')
plt.title('Distribuci√≥n de Wins vs Losses por Bin')
plt.xticks(rotation=45)
plt.legend()
plt.grid(alpha=0.3)

plt.tight_layout()
plt.show()

## üßÆ An√°lisis de Filtros - Comparaci√≥n de diferentes umbrales

In [None]:
def calculate_metrics(df_input):
    """
    Calcula m√©tricas completas para un DataFrame de setups
    """
    if len(df_input) == 0:
        return {
            'Trades': 0, 'Win_Rate_%': 0, 'Avg_Win_pips': 0, 
            'Avg_Loss_pips': 0, 'Expectancy_pips': 0, 'Profit_Factor': 0
        }
    
    wins = df_input[df_input['outcome'] == 'WIN']
    losses = df_input[df_input['outcome'] == 'LOSS']
    
    total = len(df_input)
    win_rate = len(wins) / total * 100
    avg_win = wins['result_pips'].mean() if len(wins) > 0 else 0
    avg_loss = losses['result_pips'].mean() if len(losses) > 0 else 0
    expectancy = (win_rate/100) * avg_win + (1 - win_rate/100) * avg_loss
    profit_factor = abs(avg_win / avg_loss) if avg_loss != 0 else 0
    
    return {
        'Trades': total,
        'Win_Rate_%': round(win_rate, 2),
        'Avg_Win_pips': round(avg_win, 2),
        'Avg_Loss_pips': round(avg_loss, 2),
        'Expectancy_pips': round(expectancy, 2),
        'Profit_Factor': round(profit_factor, 2)
    }

# Probar diferentes filtros
filter_thresholds = [12, 15, 18, 20, 23, 25, 30]
filter_results = []

for threshold in filter_thresholds:
    filtered_df = df[df['di_spread'] < threshold]
    metrics = calculate_metrics(filtered_df)
    metrics['Filter_Threshold'] = f"< {threshold}"
    filter_results.append(metrics)

# Agregar m√©tricas sin filtro
no_filter = calculate_metrics(df)
no_filter['Filter_Threshold'] = "Sin filtro"
filter_results.insert(0, no_filter)

# Crear DataFrame comparativo
comparison_df = pd.DataFrame(filter_results)
comparison_df = comparison_df[['Filter_Threshold', 'Trades', 'Win_Rate_%', 'Avg_Win_pips', 
                               'Avg_Loss_pips', 'Expectancy_pips', 'Profit_Factor']]

print(f"üìä Comparaci√≥n de filtros DI:")
display(comparison_df)

In [None]:
# Visualizaci√≥n de m√©tricas por filtro
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Expectancia por filtro
axes[0,0].plot(comparison_df['Filter_Threshold'], comparison_df['Expectancy_pips'], 
               marker='o', linewidth=2, markersize=8, color='blue')
axes[0,0].axhline(0, color='red', linestyle='--', alpha=0.5)
axes[0,0].set_title('Expectancia por Filtro DI')
axes[0,0].set_ylabel('Expectancia (pips/trade)')
axes[0,0].tick_params(axis='x', rotation=45)
axes[0,0].grid(alpha=0.3)

# Win Rate por filtro
axes[0,1].plot(comparison_df['Filter_Threshold'], comparison_df['Win_Rate_%'], 
               marker='s', linewidth=2, markersize=8, color='green')
axes[0,1].set_title('Win Rate por Filtro DI')
axes[0,1].set_ylabel('Win Rate (%)')
axes[0,1].tick_params(axis='x', rotation=45)
axes[0,1].grid(alpha=0.3)

# N√∫mero de trades por filtro
axes[1,0].bar(comparison_df['Filter_Threshold'], comparison_df['Trades'], 
              alpha=0.7, color='orange', edgecolor='black')
axes[1,0].set_title('N√∫mero de Trades por Filtro')
axes[1,0].set_ylabel('Trades')
axes[1,0].tick_params(axis='x', rotation=45)
axes[1,0].grid(alpha=0.3)

# Profit Factor por filtro
axes[1,1].plot(comparison_df['Filter_Threshold'], comparison_df['Profit_Factor'], 
               marker='^', linewidth=2, markersize=8, color='purple')
axes[1,1].axhline(1, color='red', linestyle='--', alpha=0.5, label='Breakeven')
axes[1,1].set_title('Profit Factor por Filtro DI')
axes[1,1].set_ylabel('Profit Factor')
axes[1,1].tick_params(axis='x', rotation=45)
axes[1,1].legend()
axes[1,1].grid(alpha=0.3)

plt.tight_layout()
plt.show()

## üèÜ Recomendaciones Finales

In [None]:
# Encontrar filtro √≥ptimo por expectancia
filtered_comparison = comparison_df[comparison_df['Filter_Threshold'] != 'Sin filtro']
best_filter_idx = filtered_comparison['Expectancy_pips'].idxmax()
best_filter = filtered_comparison.loc[best_filter_idx]

# Comparar con situaci√≥n actual
current_filter_row = comparison_df[comparison_df['Filter_Threshold'] == f'< {CURRENT_FILTER}']
current_metrics = current_filter_row.iloc[0] if not current_filter_row.empty else None

print(f"üèÜ AN√ÅLISIS DE OPTIMIZACI√ìN - {SYMBOL} {TIMEFRAME}")
print("=" * 60)

print(f"\nüìä FILTRO √ìPTIMO ENCONTRADO:")
print(f"   Threshold: {best_filter['Filter_Threshold']}")
print(f"   Expectancia: {best_filter['Expectancy_pips']:+.2f} pips/trade")
print(f"   Win Rate: {best_filter['Win_Rate_%']:.1f}%")
print(f"   Trades: {best_filter['Trades']:.0f}")
print(f"   Profit Factor: {best_filter['Profit_Factor']:.2f}")

if current_metrics is not None:
    print(f"\nüìà COMPARACI√ìN vs FILTRO ACTUAL (<{CURRENT_FILTER}):")
    exp_improvement = ((best_filter['Expectancy_pips'] - current_metrics['Expectancy_pips']) / 
                       abs(current_metrics['Expectancy_pips']) * 100) if current_metrics['Expectancy_pips'] != 0 else 0
    wr_improvement = best_filter['Win_Rate_%'] - current_metrics['Win_Rate_%']
    trades_change = best_filter['Trades'] - current_metrics['Trades']
    
    print(f"   Expectancia: {current_metrics['Expectancy_pips']:+.2f} ‚Üí {best_filter['Expectancy_pips']:+.2f} pips/trade ({exp_improvement:+.1f}%)")
    print(f"   Win Rate: {current_metrics['Win_Rate_%']:.1f}% ‚Üí {best_filter['Win_Rate_%']:.1f}% ({wr_improvement:+.1f}pp)")
    print(f"   Trades: {current_metrics['Trades']:.0f} ‚Üí {best_filter['Trades']:.0f} ({trades_change:+.0f})")

print(f"\n‚öôÔ∏è CONFIGURACI√ìN RECOMENDADA para config.py:")
optimal_value = int(best_filter['Filter_Threshold'].split('<')[1].strip())
print(f"   DI_SPREAD_MAX = {optimal_value}")

# Expectativa anual estimada
if TIMEFRAME == "H1":
    trades_per_year = best_filter['Trades'] / 6.8 * (365 * 24 / (365 * 24))  # Aproximaci√≥n
elif TIMEFRAME == "H4":
    trades_per_year = best_filter['Trades'] / 6.8 * (365 * 6 / (365 * 24))  # Aproximaci√≥n
else:
    trades_per_year = best_filter['Trades'] / 6.8  # Estimaci√≥n general

annual_expectancy = best_filter['Expectancy_pips'] * trades_per_year
print(f"\nüí∞ PROYECCI√ìN ANUAL:")
print(f"   ~{trades_per_year:.0f} trades/a√±o")
print(f"   ~{annual_expectancy:+.0f} pips/a√±o")

if best_filter['Expectancy_pips'] > 5:
    print(f"\nüü¢ VEREDICTO: EXCELENTE estrategia")
elif best_filter['Expectancy_pips'] > 2:
    print(f"\nüü° VEREDICTO: PROMETEDORA estrategia")
elif best_filter['Expectancy_pips'] > 0:
    print(f"\n‚ö†Ô∏è VEREDICTO: MARGINAL estrategia")
else:
    print(f"\n‚ùå VEREDICTO: NO VIABLE estrategia")

# Guardar an√°lisis
timestamp = datetime.now().strftime("%Y%m%d_%H%M")
output_file = f"../results/eda_analysis_{SYMBOL}_{TIMEFRAME}_{timestamp}.csv"
comparison_df.to_csv(output_file, index=False)
print(f"\nüíæ An√°lisis guardado: {output_file}")