### Import necessary libraries

In [None]:
import pandas as pd
import pandas_ta as ta
import numpy as np
from ib_insync import *
from datetime import datetime
from datetime import time as time2
import time
import os

SYMBOL = 'QQQ'
DATA_FILE = f'../data/{SYMBOL}_5min.csv'
DATA_FILE = f'../data/qqq_5min_rth.csv'
DOWNLOAD_NEW_DATA = False  # Set to True to force download from IBKR
STARTING_CAPITAL = 10000

### Data Ingestion

In [18]:
async def download_data_from_ib():
    # Start the connection to TWS or IB Gateway
    ib = IB()
    await ib.connectAsync('127.0.0.1', 7497, clientId=1)

    # Define the contract
    contract = Stock(SYMBOL, 'SMART', 'USD')

    all_data = []
    end_date = datetime.now()

    # Download 20 years data
    for i in range(1, 41):
        endDateTime = end_date.strftime('%Y%m%d %H:%M:%S')

        # Request 5-minute historical data
        bars = ib.reqHistoricalData(
            contract,
            endDateTime=endDateTime,     # data up to this timestamp
            durationStr='6 M',           # 6 months per request
            barSizeSetting='5 mins',     # timeframe 5min
            whatToShow='TRADES',         
            useRTH=True,                 # regular trading hours only
            formatDate=1
        )

        if bars:
            # Convert the response to a DataFrame
            df = util.df(bars)
            print(f"Fetched {len(df)} bars ending on {df['date'].iloc[-1]}")
            all_data.append(df)
            end_date = pd.to_datetime(df['date'].iloc[0])
        
        time.sleep(1)

    ib.disconnect()

    if all_data:
        final_df = pd.concat(all_data, ignore_index=True)
        final_df = final_df.drop_duplicates(subset=['date'], keep='first')
        final_df = final_df.sort_values('date').reset_index(drop=True)
        return final_df
    
    return pd.DataFrame()

# --- Loading Logic ---
if DOWNLOAD_NEW_DATA or not os.path.exists(DATA_FILE):
    df = await download_data_from_ib()
    if not df.empty:
        # Create the 'data' folder if it doesn't exist
        os.makedirs('data', exist_ok=True)
        df.to_csv(DATA_FILE, index=False)
        print(f"Data saved to {DATA_FILE}")
else:
    print(f"Loading local data from {DATA_FILE}...")
    df = pd.read_csv(DATA_FILE)

df['date'] = pd.to_datetime(df['date'], utc=True).dt.tz_convert('America/New_York')


Loading local data from ../data/nas_ny.csv...


### Feature Engineering


In [19]:
# Calculate indicators using Pandas TA
df['ATR_14'] = ta.atr(df['high'], df['low'], df['close'], length=14)
df['SMA_200'] = ta.sma(df['close'], length=200)
df['WILLR_10'] = ta.willr(df['high'], df['low'], df['close'], length=10)

# Remove initial NaN values caused by the SMA200 calculation
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)

df.head()

Unnamed: 0,date,open,high,low,close,tick_volume,spread,real_volume,ATR_14,SMA_200,WILLR_10
0,2025-01-03 06:40:00-05:00,21052.5,21055.2,21049.2,21054.5,377,200,0,13.865241,21069.933,-63.466667
1,2025-01-03 06:45:00-05:00,21054.6,21055.7,21045.0,21047.6,470,200,0,13.639152,21069.2865,-77.704918
2,2025-01-03 06:50:00-05:00,21047.0,21047.5,21030.2,21031.9,812,230,0,13.907784,21068.4985,-95.072464
3,2025-01-03 06:55:00-05:00,21032.1,21039.2,21026.8,21037.7,482,240,0,13.800085,21067.672,-71.240106
4,2025-01-03 07:00:00-05:00,21037.8,21041.4,21029.9,21036.3,536,230,0,13.635794,21066.9035,-74.934037


### Backtest


In [20]:
def ibkr_commission(shares):
    total_fees = shares * 0.005
    return total_fees

def calculate_position_size(entry_price, stop_loss, account_size, risk_per_trade_pct, max_risk_dollars, leverage=4):
    """
    Calculate the number of contracts (or shares) to buy, taking into account:
    - risk per trade as a percentage,
    - leverage,
    - maximum allowed absolute loss in dollars.
    """

    # Risk per contract
    R = abs(entry_price - stop_loss)
    if R == 0 or R < 0.01:  # minimal symbolic risk to avoid division by zero
        return 0

    risk_dollars = account_size * risk_per_trade_pct
    allowed_risk = min(risk_dollars, max_risk_dollars)
    risk_based_size = allowed_risk / R
    leverage_based_size = (account_size * leverage) / entry_price
    position_size = int(min(risk_based_size, leverage_based_size))

    return position_size, R * position_size

def run_backtest(df, investment, risk_per_trade_pct, atr_multiplier, max_risk_dollars):
    equity = investment
    trades = []
    
    in_position = False
    entry_price = 0
    entry_date = None
    no_of_shares = 0
    trailing_stop_price = 0
    dollar_risk = 0
    entry_idx = 0
    fees = 0

    market_open = time2(9, 30)
    market_close = time2(16, 00)

    for i in range(1, len(df)):
        
        # --- EXIT LOGIC ---
        if in_position:
            exit_triggered = False
            exit_reason = ""

            if df['low'].iloc[i] <= trailing_stop_price:
                exit_triggered = True
                exit_reason = "TRAILING_STOP"
                exit_price = trailing_stop_price
            if df['WILLR_10'].iloc[i] > -20 and df['close'].iloc[i] < df['SMA_200'].iloc[i]:
                exit_triggered = True
                exit_reason = "WILLR_SMA"
                exit_price = df['close'].iloc[i]

            if exit_triggered:
                pnl = (exit_price - entry_price) * no_of_shares - fees
                equity += (no_of_shares * exit_price) - fees
                rr = pnl / dollar_risk
                
                trades.append({
                    "entry_date": entry_date,
                    "exit_date": df["date"].iloc[i],
                    "entry_price": entry_price,
                    "exit_price": exit_price,
                    "pnl": round(pnl, 2),
                    "R:R": rr,
                    "shares": no_of_shares,
                    "equity": equity,
                    "atr_at_entry": df['ATR_14'].iloc[entry_idx-1],
                    "sma200_at_entry": df['SMA_200'].iloc[entry_idx-1],
                    "willr10_at_entry": df['WILLR_10'].iloc[entry_idx-1],
                    "exit_reason": exit_reason,
                    "fees": fees,
                })
                
                in_position = False
                no_of_shares = 0
                continue # Skip to the next loop iteration after closing the position

            # Calculate potential new stop and update only if higher
            potential_stop = round(df['close'][i] - (df['ATR_14'].iloc[i] * atr_multiplier), 2)
            trailing_stop_price = max(trailing_stop_price, potential_stop)
        # --- ENTRY LOGIC ---
        if not in_position:
            current_time = df['date'].iloc[i].time()
            if market_open <= current_time <= market_close:

                if df['WILLR_10'].iloc[i] < -80 and df['close'].iloc[i] > df['SMA_200'].iloc[i]:
                    entry_date = df['date'].iloc[i]

                    entry_price = df['open'].iloc[i+1]
                    
                    atr_value = df['ATR_14'].iloc[i]
                    if atr_value <= 0: 
                        continue # Avoid division by zero if ATR is zero
                    
                    # --- POSITION SIZING BASED ON ATR AND RISK ---
                    risk_per_share = atr_value * atr_multiplier
                    # Set initial stop loss
                    trailing_stop_price = round(entry_price - risk_per_share, 2)

                    no_of_shares, dollar_risk = calculate_position_size(
                        entry_price=entry_price,
                        stop_loss=trailing_stop_price,
                        account_size=equity,
                        risk_per_trade_pct=risk_per_trade_pct,
                        max_risk_dollars=max_risk_dollars,
                        leverage=4
                    )

                    if no_of_shares > 0:
                        in_position = True
                        fees = ibkr_commission(no_of_shares) * 2
                        entry_idx = i + 1
                    
                    # Execute trade
                    equity -= (no_of_shares * entry_price) 
                
    # Close position if still open at the end
    if in_position:
        equity += (no_of_shares * df['close'].iloc[i])

    earning = round(equity - investment, 2)
    roi = round(earning / investment * 100, 2)

    print(f'EARNING: ${earning} ; ROI: {roi}%')
    return pd.DataFrame(trades)
   
# Execute backtest
trades_df = run_backtest(df, STARTING_CAPITAL, risk_per_trade_pct=0.02, atr_multiplier=10, max_risk_dollars=30000)
trades_df['exit_date'] = pd.to_datetime(trades_df['exit_date'], utc=True).dt.tz_convert('America/New_York')
trades_df.head()

EARNING: $-1006.2 ; ROI: -10.06%


Unnamed: 0,entry_date,exit_date,entry_price,exit_price,pnl,R:R,shares,equity,atr_at_entry,sma200_at_entry,willr10_at_entry,exit_reason,fees
0,2025-01-03 16:00:00-05:00,2025-01-07 08:30:00-05:00,21311.3,21555.0,243.69,1.344497,1,10243.69,18.124705,21074.879,-96.6,WILLR_SMA,0.01
1,2025-01-17 14:50:00-05:00,2025-01-21 10:50:00-05:00,21473.3,21433.7,-39.61,-0.203316,1,10204.08,19.481922,21265.408,-85.533454,TRAILING_STOP,0.01
2,2025-01-21 15:15:00-05:00,2025-01-23 04:15:00-05:00,21570.8,21754.92,184.11,0.993364,1,10388.19,18.534076,21538.4025,-93.303571,TRAILING_STOP,0.01
3,2025-01-23 15:00:00-05:00,2025-01-24 13:55:00-05:00,21802.1,21811.76,9.65,0.061239,1,10397.84,15.758428,21801.321,-96.354167,TRAILING_STOP,0.01
4,2025-01-29 09:30:00-05:00,2025-01-29 13:00:00-05:00,21450.7,21399.0,-51.71,-0.306303,1,10346.13,16.882216,21387.672,-85.32207,WILLR_SMA,0.01


#### Plot: Strategy vs Benchmark


In [None]:
import matplotlib.pyplot as plt

# Resample Buy & Hold to Daily timeframe for the chart
bh_resampled = df.resample('1D', on='date')['close'].last().dropna()
shares_bh = STARTING_CAPITAL / df.iloc[0]['close']
bh_equity = bh_resampled * shares_bh

# Chart creation
plt.figure(figsize=(16, 8))
plt.style.use('seaborn-v0_8-darkgrid')

# Plot Buy & Hold
plt.plot(bh_equity.index, bh_equity, 
         color='green', alpha=0.6, linewidth=1.5, label='Buy & Hold (Benchmark)')

# Plot Strategy
plt.step(trades_df['exit_date'], trades_df['equity'], 
         where='post', color='#0052CC', linewidth=2, label='Strategy')

# Initial Capital Line
plt.axhline(y=STARTING_CAPITAL, color='red', linestyle=':', alpha=0.5)

# Formatting
plt.title('Performance: Strategy vs Benchmark (QQQ)', fontsize=16, fontweight='bold', pad=15)
plt.xlabel('Year', fontsize=12)
plt.ylabel('Capital ($)', fontsize=12)
plt.legend(fontsize=12, loc='upper left', frameon=True, facecolor='white', framealpha=0.9)
plt.gca().yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))

plt.tight_layout()
plt.show()


### Stats

In [None]:
import quantstats as qs

# Normalize Dates
trades_df['exit_date'] = pd.to_datetime(trades_df['exit_date'], utc=True).dt.tz_convert(None)
start_date = df['date'].iloc[0].tz_convert(None)

# Build daily equity curve
equity_series = trades_df.set_index('exit_date')['equity']

equity_series[start_date] = STARTING_CAPITAL
equity_series = equity_series.sort_index()

# Resample to daily frequency, forward-filling gaps between trades
equity_daily = equity_series.resample('1D').last().ffill()

# Download Benchmark (QQQ)
benchmark = qs.utils.download_returns('QQQ')
benchmark = benchmark.loc[equity_daily.index.min() : equity_daily.index.max()]

# Align Strategy Calendar to Benchmark Calendar
equity_daily_aligned = equity_daily.reindex(benchmark.index).ffill()

# Compute Daily Returns
returns = equity_daily_aligned.pct_change().fillna(0)

# Generate QuantStats Performance Report
qs.reports.metrics(returns, benchmark=benchmark)
