In [18]:
import pandas as pd
import numpy as np
import os

input_dir = "rf_labeled_data/"
output_dir = "signal_data/"
os.makedirs(output_dir, exist_ok=True)

symbols = ['NASDAQ100', 'EURUSD']
timeframes = ['1m', '5m', '15m', '1h']

for symbol in symbols:
    for tf in timeframes:
        file = f"{input_dir}{symbol}_{tf}_rf_labeled.csv"
        if not os.path.exists(file):
            print(f"Missing file: {file}")
            continue

        df = pd.read_csv(file, parse_dates=['timestamp'])
        if 'close' not in df.columns or 'regime_pred' not in df.columns:
            print(f"Skipping {file} — missing columns.")
            continue

        # === VWAP + StdDev Bands ===
        df['vwap'] = (df['close'] * df['volume']).cumsum() / df['volume'].cumsum()
        df['std'] = df['close'].rolling(50).std()
        df['upper_band'] = df['vwap'] + df['std']
        df['lower_band'] = df['vwap'] - df['std']

        # === Signal Generation ===
        df['signal'] = 0

        # Range mean reversion
        df.loc[(df['regime_pred'] == 'range') & (df['close'] > df['upper_band']), 'signal'] = -1
        df.loc[(df['regime_pred'] == 'range') & (df['close'] < df['lower_band']), 'signal'] = 1

        # Trend continuation
        df.loc[(df['regime_pred'] == 'trend_up') & (df['close'] <= df['upper_band']), 'signal'] = 1
        df.loc[(df['regime_pred'] == 'trend_down') & (df['close'] >= df['lower_band']), 'signal'] = -1

        # === Optional: smooth noise ===
        # df['signal'] = df['signal'].rolling(3, center=True, min_periods=1).mean().round().fillna(0).astype(int)

        # === Save to new directory ===
        save_path = f"{output_dir}{symbol}_{tf}_signals.csv"
        df.to_csv(save_path, index=False)
        print(f"Saved signal file: {save_path}")

print("✅ All signal files generated and saved.")


Saved signal file: signal_data/NASDAQ100_1m_signals.csv
Saved signal file: signal_data/NASDAQ100_5m_signals.csv
Saved signal file: signal_data/NASDAQ100_15m_signals.csv
Saved signal file: signal_data/NASDAQ100_1h_signals.csv
Saved signal file: signal_data/EURUSD_1m_signals.csv
Saved signal file: signal_data/EURUSD_5m_signals.csv
Saved signal file: signal_data/EURUSD_15m_signals.csv
Saved signal file: signal_data/EURUSD_1h_signals.csv
✅ All signal files generated and saved.


In [19]:
import pandas as pd
import numpy as np
import os

signal_dir = "signal_data/"
output_dir = "backtest_results/"
os.makedirs(output_dir, exist_ok=True)

symbols = ['NASDAQ100', 'EURUSD']
timeframes = ['1m', '5m', '15m', '1h']

def sharpe_ratio(returns, risk_free=0.0):
    return np.sqrt(252) * (returns.mean() - risk_free) / returns.std() if returns.std() != 0 else 0

def max_drawdown(equity_curve):
    cummax = equity_curve.cummax()
    drawdown = equity_curve / cummax - 1
    return drawdown.min()

all_results = []

for symbol in symbols:
    for tf in timeframes:
        file = f"{signal_dir}{symbol}_{tf}_signals.csv"
        if not os.path.exists(file):
            print(f"Missing: {file}")
            continue

        df = pd.read_csv(file, parse_dates=['timestamp'])
        if 'signal' not in df.columns:
            print(f"Skipping {file} — no signal column.")
            continue

        # === Compute returns ===
        df['return'] = df['close'].pct_change()
        df['strategy_return'] = df['signal'].shift(1) * df['return']
        df['equity'] = (1 + df['strategy_return']).cumprod()

        # === Performance Metrics ===
        total_return = df['equity'].iloc[-1] - 1
        sharpe = sharpe_ratio(df['strategy_return'].dropna())
        mdd = max_drawdown(df['equity'])
        win_rate = (df['strategy_return'] > 0).mean()

        # Regime performance (if exists)
        if 'regime_pred' in df.columns:
            regime_perf = df.groupby('regime_pred')['strategy_return'].mean().to_dict()
        else:
            regime_perf = {}

        all_results.append({
            'symbol': symbol,
            'timeframe': tf,
            'total_return': total_return,
            'sharpe': sharpe,
            'max_drawdown': mdd,
            'win_rate': win_rate,
            **{f"{k}_perf": v for k, v in regime_perf.items()}
        })

        print(f"{symbol} - {tf} | Return: {total_return:.2%} | Sharpe: {sharpe:.2f} | MDD: {mdd:.2%} | Win: {win_rate:.2%}")

# === Summary ===
summary = pd.DataFrame(all_results)
summary.to_csv(f"{output_dir}summary_results.csv", index=False)
print("\n✅ Backtest complete. Results saved to backtest_results/summary_results.csv")
print(summary)


NASDAQ100 - 1m | Return: -25.55% | Sharpe: -0.01 | MDD: -41.48% | Win: 38.06%
NASDAQ100 - 5m | Return: -31.92% | Sharpe: -0.04 | MDD: -36.75% | Win: 34.30%
NASDAQ100 - 15m | Return: -27.27% | Sharpe: -0.06 | MDD: -36.23% | Win: 32.12%
NASDAQ100 - 1h | Return: -39.66% | Sharpe: -0.20 | MDD: -46.47% | Win: 29.16%
EURUSD - 1m | Return: 1.55% | Sharpe: 0.00 | MDD: -9.94% | Win: 31.93%
EURUSD - 5m | Return: -0.34% | Sharpe: 0.00 | MDD: -10.17% | Win: 33.61%
EURUSD - 15m | Return: 1.39% | Sharpe: 0.01 | MDD: -10.02% | Win: 34.10%
EURUSD - 1h | Return: -4.04% | Sharpe: -0.04 | MDD: -10.46% | Win: 34.83%

✅ Backtest complete. Results saved to backtest_results/summary_results.csv
      symbol timeframe  total_return    sharpe  max_drawdown  win_rate  \
0  NASDAQ100        1m     -0.255480 -0.013355     -0.414839  0.380582   
1  NASDAQ100        5m     -0.319191 -0.041931     -0.367506  0.342990   
2  NASDAQ100       15m     -0.272687 -0.059299     -0.362291  0.321154   
3  NASDAQ100        1h  