In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

class ScalpingBacktest:
    def __init__(self, tickers, start_date, end_date, initial_capital=10000):
        self.tickers = tickers
        self.start_date = start_date
        self.end_date = end_date
        self.initial_capital = initial_capital
        self.trades = []
        
    def calculate_atr(self, df, period=14):
        high_low = df['High'] - df['Low']
        high_close = np.abs(df['High'] - df['Close'].shift())
        low_close = np.abs(df['Low'] - df['Close'].shift())
        ranges = pd.concat([high_low, high_close, low_close], axis=1)
        true_range = np.max(ranges, axis=1)
        return true_range.rolling(period).mean()
    
    def calculate_proper_rsi(self, series, period=14):
        delta = series.diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
        rs = gain / loss
        rsi = 100 - (100 / (1 + rs))
        return rsi
    
    def check_entry_signal(self, df, idx):
        """Check if entry conditions are met at given index"""
        if idx < 20:
            return False
        
        try:
            current = df.iloc[idx]
            
            # Calculate indicators up to current point
            df_subset = df.iloc[:idx+1].copy()
            df_subset['EMA9'] = df_subset['Close'].ewm(span=9, adjust=False).mean()
            df_subset['EMA20'] = df_subset['Close'].ewm(span=20, adjust=False).mean()
            df_subset['VWAP'] = (df_subset['Close'] * df_subset['Volume']).cumsum() / df_subset['Volume'].cumsum()
            df_subset['RSI'] = self.calculate_proper_rsi(df_subset['Close'], 14)
            df_subset['ATR'] = self.calculate_atr(df_subset)
            
            latest = df_subset.iloc[-1]
            
            # Extract scalar values
            ema9_val = float(latest['EMA9'])
            ema20_val = float(latest['EMA20'])
            close_val = float(latest['Close'])
            vwap_val = float(latest['VWAP'])
            rsi_val = float(latest['RSI'])
            atr_val = float(latest['ATR'])
            volume_val = float(latest['Volume'])
            
            # Check if we have valid data
            if pd.isna(rsi_val) or pd.isna(atr_val):
                return False
            
            # ‚úÖ RELAXED ENTRY CONDITIONS (More trades)
            
            # 1. Trend confirmation - RELAXED
            ema_bullish = ema9_val > ema20_val
            
            # 2. Price above VWAP - RELAXED (allow near VWAP)
            above_vwap = close_val > (vwap_val * 0.998)  # Within 0.2% of VWAP
            
            # 3. RSI in sweet spot - WIDER RANGE
            rsi_ok = 40 < rsi_val < 75  # Was: 45 < rsi < 70
            
            # 4. Recent momentum - RELAXED (allow small negative)
            if len(df_subset) >= 4:
                momentum_20min = (df_subset['Close'].iloc[-1] - df_subset['Close'].iloc[-4]) / df_subset['Close'].iloc[-4] * 100
                positive_momentum = momentum_20min > -0.5  # Was: > 0 (now allows small pullback)
            else:
                positive_momentum = False
            
            # 5. Volume - SIGNIFICANTLY RELAXED
            avg_vol = df_subset['Volume'].tail(20).mean()
            rel_vol = volume_val / avg_vol if avg_vol > 0 else 0
            volume_surge = rel_vol >= 1.2  # Was: >= 1.5 (30% easier to meet)
            
            # 6. Volatility - RELAXED
            atr_pct = (atr_val / close_val) * 100
            sufficient_volatility = atr_pct > 0.2  # Was: > 0.3
            
            # All conditions must be met
            return (ema_bullish and above_vwap and rsi_ok and positive_momentum 
                    and volume_surge and sufficient_volatility)
        
        except (KeyError, ValueError, TypeError, IndexError):
            return False
    
    def simulate_trade(self, df, entry_idx):
        """Simulate a trade from entry point"""
        try:
            entry_bar = df.iloc[entry_idx]
            
            # Calculate ATR at entry
            df_subset = df.iloc[:entry_idx+1].copy()
            df_subset['ATR'] = self.calculate_atr(df_subset)
            entry_atr = float(df_subset['ATR'].iloc[-1])
            
            entry_price = float(entry_bar['Close'])
            stop_loss = entry_price - (1.5 * entry_atr)
            target = entry_price + (2 * entry_atr)
            
            # Look forward for exit (max 20 minutes = 4 bars)
            exit_idx = None
            exit_price = None
            exit_reason = None
            bars_held = 0
            
            for i in range(entry_idx + 1, min(entry_idx + 5, len(df))):
                bar = df.iloc[i]
                bars_held += 1
                
                bar_low = float(bar['Low'])
                bar_high = float(bar['High'])
                bar_close = float(bar['Close'])
                
                # Check stop loss
                if bar_low <= stop_loss:
                    exit_idx = i
                    exit_price = stop_loss
                    exit_reason = 'STOP'
                    break
                
                # Check target
                if bar_high >= target:
                    exit_idx = i
                    exit_price = target
                    exit_reason = 'TARGET'
                    break
                
                # Time exit after 20 minutes (4 bars)
                if bars_held >= 4:
                    exit_idx = i
                    exit_price = bar_close
                    exit_reason = 'TIME'
                    break
            
            # If no exit found (end of data), exit at last available price
            if exit_idx is None:
                exit_idx = len(df) - 1
                exit_price = float(df.iloc[exit_idx]['Close'])
                exit_reason = 'EOD'
                bars_held = exit_idx - entry_idx
            
            # Calculate trade results
            pnl = exit_price - entry_price
            pnl_pct = (pnl / entry_price) * 100
            
            return {
                'entry_time': df.index[entry_idx],
                'exit_time': df.index[exit_idx],
                'entry_price': entry_price,
                'exit_price': exit_price,
                'stop_loss': stop_loss,
                'target': target,
                'pnl': pnl,
                'pnl_pct': pnl_pct,
                'bars_held': bars_held,
                'exit_reason': exit_reason,
                'atr': entry_atr
            }
        
        except (KeyError, ValueError, TypeError, IndexError):
            return None
    
    def run_backtest(self):
        """Run backtest on all tickers"""
        print(f"\n{'='*100}")
        print(f"üîÑ RUNNING BACKTEST: {self.start_date} to {self.end_date}")
        print(f"{'='*100}")
        
        # Validate dates
        try:
            start_dt = datetime.strptime(self.start_date, '%Y-%m-%d')
            end_dt = datetime.strptime(self.end_date, '%Y-%m-%d')
            
            if end_dt > datetime.now():
                print("\n‚ö†Ô∏è  WARNING: End date is in the future. Using today's date instead.")
                self.end_date = datetime.now().strftime('%Y-%m-%d')
                end_dt = datetime.now()
            
            days_back = (datetime.now() - start_dt).days
            if days_back > 60:
                print(f"\n‚ö†Ô∏è  WARNING: Start date is {days_back} days ago. yfinance 5-min data limited to ~60 days.")
                print("    Consider using a more recent start date for better data availability.\n")
        except ValueError:
            print("\n‚ùå Invalid date format. Use 'YYYY-MM-DD'")
            return None
        
        all_trades = []
        tickers_tested = 0
        tickers_with_data = 0
        
        print(f"Testing {len(self.tickers[:50])} tickers...\n")
        
        for ticker in self.tickers[:50]:
            try:
                tickers_tested += 1
                print(f"[{tickers_tested}/{min(50, len(self.tickers))}] Testing {ticker:6}...", end='\r')
                
                # Download data
                df = yf.download(ticker, start=self.start_date, end=self.end_date, 
                               interval='5m', progress=False)
                
                if df.empty or len(df) < 30:
                    continue
                
                tickers_with_data += 1
                
                # Scan for entry signals
                for idx in range(20, len(df) - 5):
                    if self.check_entry_signal(df, idx):
                        trade = self.simulate_trade(df, idx)
                        if trade:
                            trade['ticker'] = ticker
                            all_trades.append(trade)
                            idx += 5
                
            except Exception as e:
                continue
        
        print(f"\n\n‚úì Tested {tickers_tested} tickers | {tickers_with_data} had sufficient data | {len(all_trades)} trades found\n")
        
        self.trades = all_trades
        return self.analyze_results()
    
    def analyze_results(self):
        """Analyze backtest results"""
        if not self.trades:
            print("\n" + "="*100)
            print("‚ùå NO TRADES GENERATED")
            print("="*100)
            print("\nüîç POSSIBLE REASONS:")
            print("  1. Date range is in the future or too far back (>60 days)")
            print("  2. Market was closed during this period")
            print("  3. No stocks met all entry criteria (too restrictive)")
            print("  4. Insufficient intraday data available")
            print("\nüí° SUGGESTIONS:")
            print("  ‚Ä¢ Use dates within the last 30 days for best data")
            print("  ‚Ä¢ Try loosening filters (lower RSI threshold, reduce volume multiplier)")
            print("  ‚Ä¢ Test during market hours data (9:30 AM - 4:00 PM ET)")
            print("  ‚Ä¢ Increase the number of tickers tested")
            print("="*100)
            return None
        
        df_trades = pd.DataFrame(self.trades)
        
        # Calculate statistics
        total_trades = len(df_trades)
        winning_trades = len(df_trades[df_trades['pnl'] > 0])
        losing_trades = len(df_trades[df_trades['pnl'] <= 0])
        win_rate = (winning_trades / total_trades) * 100
        
        avg_win = df_trades[df_trades['pnl'] > 0]['pnl_pct'].mean() if winning_trades > 0 else 0
        avg_loss = df_trades[df_trades['pnl'] <= 0]['pnl_pct'].mean() if losing_trades > 0 else 0
        
        avg_pnl = df_trades['pnl_pct'].mean()
        total_pnl_pct = df_trades['pnl_pct'].sum()
        
        # Risk metrics
        sharpe_ratio = (df_trades['pnl_pct'].mean() / df_trades['pnl_pct'].std()) * np.sqrt(252) if len(df_trades) > 1 else 0
        max_drawdown = df_trades['pnl_pct'].cumsum().cummax() - df_trades['pnl_pct'].cumsum()
        max_dd = max_drawdown.max()
        
        # Profit factor
        gross_profit = df_trades[df_trades['pnl'] > 0]['pnl_pct'].sum()
        gross_loss = abs(df_trades[df_trades['pnl'] <= 0]['pnl_pct'].sum())
        profit_factor = gross_profit / gross_loss if gross_loss != 0 else 0
        
        # Exit reason breakdown
        exit_reasons = df_trades['exit_reason'].value_counts()
        
        # Display results
        print("\n" + "="*100)
        print("üìä BACKTEST RESULTS")
        print("="*100)
        print(f"\nüéØ PERFORMANCE METRICS")
        print(f"  Total Trades: {total_trades}")
        print(f"  Winners: {winning_trades} ({win_rate:.1f}%)")
        print(f"  Losers: {losing_trades} ({100-win_rate:.1f}%)")
        print(f"  Average Win: {avg_win:.2f}%")
        print(f"  Average Loss: {avg_loss:.2f}%")
        print(f"  Average Trade: {avg_pnl:.2f}%")
        print(f"  Profit Factor: {profit_factor:.2f}")
        print(f"  Sharpe Ratio: {sharpe_ratio:.2f}")
        print(f"  Max Drawdown: {max_dd:.2f}%")
        
        print(f"\nüí∞ P&L ANALYSIS")
        print(f"  Cumulative P&L: {total_pnl_pct:.2f}%")
        print(f"  Best Trade: {df_trades['pnl_pct'].max():.2f}%")
        print(f"  Worst Trade: {df_trades['pnl_pct'].min():.2f}%")
        
        print(f"\n‚è±Ô∏è EXIT BREAKDOWN")
        for reason, count in exit_reasons.items():
            pct = (count / total_trades) * 100
            print(f"  {reason}: {count} ({pct:.1f}%)")
        
        print(f"\nüèÜ TOP 10 TRADES")
        print("-"*100)
        top_trades = df_trades.nlargest(10, 'pnl_pct')[['ticker', 'entry_time', 'pnl_pct', 'exit_reason', 'bars_held']]
        for idx, trade in top_trades.iterrows():
            print(f"  {trade['ticker']:6} | {trade['entry_time']} | P&L: {trade['pnl_pct']:+6.2f}% | {trade['exit_reason']:6} | {trade['bars_held']} bars")
        
        print("\n" + "="*100)
        
        # Save results
        filename = f"backtest_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
        df_trades.to_csv(filename, index=False)
        print(f"\nüíæ Full trade log saved to: {filename}")
        
        return df_trades


# ============================================================================
# MAIN EXECUTION
# ============================================================================

if __name__ == "__main__":
    
    # ========== CUSTOMIZE YOUR SETTINGS HERE ==========
    
    # Select tickers to backtest
    test_tickers = [
        "NVDA", "AAPL", "MSFT", "TSLA", "AMD", "GOOGL", "META", "AMZN",
        "NFLX", "PLTR", "COIN", "HOOD", "RBLX", "SNOW", "CRWD", "AVGO",
        "QCOM", "AMAT", "INTC", "MU", "CRM", "NOW", "ORCL", "ADBE"
    ]
    
    # Set your backtest date range (MUST BE WITHIN LAST 60 DAYS!)
    # Using automatic recent dates
    END_DATE = datetime.now().strftime('%Y-%m-%d')
    START_DATE = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')
    
    # Or manually set specific dates (uncomment to use):
    # START_DATE = '2024-10-20'
    # END_DATE = '2024-11-07'
    
    # Initial capital
    INITIAL_CAPITAL = 10000
    
    # ========== END CUSTOMIZATION ==========
    
    print("\n" + "="*100)
    print("üîÑ STARTING BACKTEST - RELAXED PARAMETERS VERSION")
    print("="*100)
    print("\nüìã RELAXED FILTERS:")
    print("  ‚Ä¢ RSI Range: 40-75 (was 45-70)")
    print("  ‚Ä¢ Volume: 1.2x average (was 1.5x)")
    print("  ‚Ä¢ Momentum: >-0.5% (was >0%)")
    print("  ‚Ä¢ VWAP: Within 0.2% (was exact)")
    print("  ‚Ä¢ ATR: >0.2% (was >0.3%)")
    print("\n" + "="*100)
    
    print("\n" + "="*100)
    print("üîÑ STARTING BACKTEST - RELAXED PARAMETERS VERSION")
    print("="*100)
    print("\nüìã RELAXED FILTERS:")
    print("  ‚Ä¢ RSI Range: 40-75 (was 45-70)")
    print("  ‚Ä¢ Volume: 1.2x average (was 1.5x)")
    print("  ‚Ä¢ Momentum: >-0.5% (was >0%)")
    print("  ‚Ä¢ VWAP: Within 0.2% (was exact)")
    print("  ‚Ä¢ ATR: >0.2% (was >0.3%)")
    print("\n" + "="*100)
    print(f"üìÖ Date Range: {START_DATE} to {END_DATE}")
    
    # Calculate and display date info
    try:
        start_dt = datetime.strptime(START_DATE, '%Y-%m-%d')
        end_dt = datetime.strptime(END_DATE, '%Y-%m-%d')
        days_range = (end_dt - start_dt).days
        days_ago = (datetime.now() - start_dt).days
        
        print(f"üìä Period: {days_range} days | Data from {days_ago} days ago")
        
        if days_ago > 60:
            print("‚ö†Ô∏è  WARNING: Data may be limited (>60 days back)")
    except:
        pass
    
    print(f"üéØ Testing {len(test_tickers)} tickers")
    print(f"üí∞ Initial Capital: ${INITIAL_CAPITAL:,}")
    print("="*100)
    
    # Initialize and run backtest
    bt = ScalpingBacktest(
        tickers=test_tickers,
        start_date=START_DATE,
        end_date=END_DATE,
        initial_capital=INITIAL_CAPITAL
    )
    
    results = bt.run_backtest()
    
    if results is not None:
        print("\n" + "="*100)
        print("‚úÖ BACKTEST COMPLETE")
        print("="*100)
        print("\nüí° NEXT STEPS:")
        print("  1. Review win rate and profit factor")
        print("  2. Analyze which exit reasons are most profitable")
        print("  3. Consider adjusting filters if win rate < 50%")
        print("  4. Test on larger date ranges for statistical significance")
        print("  5. Paper trade the strategy before going live")
        print("\nüìù TO CUSTOMIZE:")
        print("  ‚Ä¢ Change START_DATE and END_DATE (must be within last 60 days)")
        print("  ‚Ä¢ Modify test_tickers to backtest different stocks")
        print("  ‚Ä¢ Adjust INITIAL_CAPITAL for position sizing")
        print("="*100)
    else:
        print("\n‚ö†Ô∏è  Backtest failed. Please check the error messages above.")


üîÑ STARTING BACKTEST - RELAXED PARAMETERS VERSION

üìã RELAXED FILTERS:
  ‚Ä¢ RSI Range: 40-75 (was 45-70)
  ‚Ä¢ Volume: 1.2x average (was 1.5x)
  ‚Ä¢ Momentum: >-0.5% (was >0%)
  ‚Ä¢ VWAP: Within 0.2% (was exact)
  ‚Ä¢ ATR: >0.2% (was >0.3%)


üîÑ STARTING BACKTEST - RELAXED PARAMETERS VERSION

üìã RELAXED FILTERS:
  ‚Ä¢ RSI Range: 40-75 (was 45-70)
  ‚Ä¢ Volume: 1.2x average (was 1.5x)
  ‚Ä¢ Momentum: >-0.5% (was >0%)
  ‚Ä¢ VWAP: Within 0.2% (was exact)
  ‚Ä¢ ATR: >0.2% (was >0.3%)

üìÖ Date Range: 2025-10-13 to 2025-11-12
üìä Period: 30 days | Data from 30 days ago
üéØ Testing 24 tickers
üí∞ Initial Capital: $10,000

üîÑ RUNNING BACKTEST: 2025-10-13 to 2025-11-12
Testing 24 tickers...

[24/24] Testing ADBE  ...

‚úì Tested 24 tickers | 24 had sufficient data | 0 trades found


‚ùå NO TRADES GENERATED

üîç POSSIBLE REASONS:
  1. Date range is in the future or too far back (>60 days)
  2. Market was closed during this period
  3. No stocks met all entry criteria (too restr