<a href="https://colab.research.google.com/github/NeilMitra/2WD-ObstacleAvoidingRobot/blob/master/Daily_Mover_Adjusted.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Algorithm

In [1]:
!pip install pandas numpy yfinance



In [15]:
# imports

import pandas as pd
import numpy as np
import yfinance as yf
import datetime
import time
import random
import os
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
import warnings
warnings.filterwarnings('ignore')

In [16]:
def get_top_100_sp500_by_market_cap():
    """
    Get the top 100 S&P 500 stocks by market capitalization with rate limit handling.
    Caches results to avoid repeated API calls.
    """
    cache_file = "sp500_top100_cache.csv"

    # Check if we have a recent cache file (less than 24 hours old)
    if os.path.exists(cache_file) and (datetime.datetime.now() -
            datetime.datetime.fromtimestamp(os.path.getmtime(cache_file))).total_seconds() < 86400:
        print(f"Loading top 100 stocks from cache file: {cache_file}")
        return pd.read_csv(cache_file)['Symbol'].tolist()

    # Get S&P 500 tickers
    sp500_url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    sp500_table = pd.read_html(sp500_url)[0]
    sp500_symbols = sp500_table['Symbol'].tolist()

    # Get market cap for all symbols with rate limiting
    market_caps = {}
    for i, symbol in enumerate(sp500_symbols):
        try:
            # Add random delay between requests to avoid rate limiting
            if i > 0 and i % 5 == 0:
                delay = random.uniform(1.0, 3.0)
                print(f"Sleeping for {delay:.2f} seconds to avoid rate limiting...")
                time.sleep(delay)

            ticker = yf.Ticker(symbol)
            market_cap = ticker.info.get('marketCap', 0)
            if market_cap:
                market_caps[symbol] = market_cap
                print(f"[{i+1}/{len(sp500_symbols)}] {symbol}: ${market_cap:,}")
            else:
                print(f"[{i+1}/{len(sp500_symbols)}] {symbol}: No market cap data available")
        except Exception as e:
            print(f"[{i+1}/{len(sp500_symbols)}] Error fetching data for {symbol}: {e}")
            # If we hit a rate limit, pause for a longer time
            if "rate limit" in str(e).lower():
                pause_time = random.uniform(10.0, 15.0)
                print(f"Rate limit detected. Pausing for {pause_time:.2f} seconds...")
                time.sleep(pause_time)

    # Sort by market cap and get top 100
    sorted_by_market_cap = sorted(market_caps.items(), key=lambda x: x[1], reverse=True)
    top_100 = [symbol for symbol, _ in sorted_by_market_cap[:100]]

    # Save to cache file
    pd.DataFrame({'Symbol': top_100}).to_csv(cache_file, index=False)
    print(f"Saved top 100 stocks to cache file: {cache_file}")

    return top_100

def get_trading_date(date, calendar):
    """
    Get the nearest trading date for a given date.
    If date is a weekend or holiday, return the previous trading day.
    """
    us_bd = CustomBusinessDay(calendar=calendar)
    dt = pd.Timestamp(date)

    # If it's a business day, return the date
    if dt in calendar:
        return dt

    # Otherwise, return the previous business day
    return dt - us_bd

def analyze_date_patterns(tickers, years=10):
    """
    Analyze date patterns for a list of ticker symbols over a specified number of years.
    Implements caching and rate limit handling.

    Parameters:
    tickers (list): List of ticker symbols
    years (int): Number of years to analyze

    Returns:
    tuple: (up_patterns, down_patterns) where each is a dictionary mapping dates to stocks
    """
    # Create directory for cached data
    cache_dir = "stock_data_cache"
    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)

    # Create a calendar for US trading days
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start=datetime.datetime.now() - datetime.timedelta(days=years*365),
                           end=datetime.datetime.now())

    # Initialize pattern dictionaries
    up_patterns = {}  # date -> [symbols that consistently go up]
    down_patterns = {}  # date -> [symbols that consistently go down]

    # Get current date
    now = datetime.datetime.now()

    # For each ticker, get historical data and analyze patterns
    for i, ticker in enumerate(tickers):
        print(f"[{i+1}/{len(tickers)}] Analyzing {ticker}...")

        try:
            # Define cache file path
            cache_file = os.path.join(cache_dir, f"{ticker}_data.csv")

            # Check if we have cached data and it's recent (less than 24 hours old)
            if os.path.exists(cache_file) and (datetime.datetime.now() -
                    datetime.datetime.fromtimestamp(os.path.getmtime(cache_file))).total_seconds() < 86400:
                print(f"  Loading cached data for {ticker}")
                historical_data = pd.read_csv(cache_file, index_col=0, parse_dates=True)
            else:
                # Implement rate limiting - add delay between batches of requests
                if i > 0 and i % 3 == 0:
                    delay = random.uniform(2.0, 5.0)
                    print(f"  Sleeping for {delay:.2f} seconds to avoid rate limiting...")
                    time.sleep(delay)

                # Get historical data for the past 'years' years
                start_date = (now - datetime.timedelta(days=years*365)).strftime('%Y-%m-%d')
                print(f"  Downloading data for {ticker} from {start_date} to now...")

                # Try with exponential backoff for rate limiting
                max_retries = 5
                retry_count = 0
                backoff_time = 1

                while retry_count < max_retries:
                    try:
                        historical_data = yf.download(ticker, start=start_date, progress=False)
                        break
                    except Exception as e:
                        if "rate limit" in str(e).lower() or "connection" in str(e).lower():
                            retry_count += 1
                            if retry_count >= max_retries:
                                raise Exception(f"Max retries exceeded for {ticker}")

                            backoff_time *= 2  # Exponential backoff
                            wait_time = backoff_time + random.uniform(0, 1)
                            print(f"  Rate limit hit. Retry {retry_count}/{max_retries}. Waiting {wait_time:.2f} seconds...")
                            time.sleep(wait_time)
                        else:
                            raise

                # Save to cache
                historical_data.to_csv(cache_file)
                print(f"  Saved data to cache: {cache_file}")

            if historical_data.empty:
                print(f"  No data available for {ticker}")
                continue

            # Create month-day column for grouping
            historical_data['month_day'] = historical_data.index.strftime('%m-%d')

            # Calculate daily returns
            if 'return' not in historical_data.columns:
                historical_data['return'] = historical_data['Close'].pct_change()

            # Group by month-day and analyze patterns
            for month_day, group in historical_data.groupby('month_day'):
                if len(group) >= years/2:  # Require at least half of the possible years to have data
                    positive_returns = (group['return'] > 0).sum()
                    total_returns = (~group['return'].isna()).sum()

                    # If stock goes up on this date at least 80% of the time
                    if total_returns > 0 and positive_returns / total_returns >= 0.8:
                        if month_day not in up_patterns:
                            up_patterns[month_day] = []
                        up_patterns[month_day].append((ticker, positive_returns, total_returns))

                    # If stock goes down on this date at least 80% of the time
                    if total_returns > 0 and positive_returns / total_returns <= 0.2:
                        if month_day not in down_patterns:
                            down_patterns[month_day] = []
                        down_patterns[month_day].append((ticker, total_returns - positive_returns, total_returns))

        except Exception as e:
            print(f"  Error analyzing {ticker}: {e}")

            # If we hit a rate limit or connection error, pause for longer
            if "rate limit" in str(e).lower() or "connection" in str(e).lower():
                pause_time = random.uniform(15.0, 30.0)
                print(f"  Rate limit or connection issue detected. Pausing for {pause_time:.2f} seconds...")
                time.sleep(pause_time)

    return up_patterns, down_patterns

def format_results(patterns, pattern_type):
    """
    Format pattern results for display.

    Parameters:
    patterns (dict): Dictionary mapping dates to stocks with patterns
    pattern_type (str): "UP" or "DOWN"

    Returns:
    DataFrame: Formatted results
    """
    results = []

    for date, stocks in patterns.items():
        for stock, matches, total in stocks:
            month, day = date.split('-')
            current_year = datetime.datetime.now().year

            # Create a list of dates for the past 10 years on which this pattern occurs
            historical_dates = []
            for year in range(current_year - 10, current_year + 1):
                try:
                    pattern_date = datetime.datetime(year, int(month), int(day))
                    day_of_week = pattern_date.strftime('%A')

                    # Check if it's a weekend
                    is_weekend = day_of_week in ['Saturday', 'Sunday']

                    historical_dates.append(f"{pattern_date.strftime('%Y-%m-%d')} ({day_of_week}){' - Weekend' if is_weekend else ''}")
                except ValueError:
                    # Skip invalid dates (e.g., February 29 in non-leap years)
                    pass

            results.append({
                'Stock': stock,
                'Pattern Date': date,
                'Pattern Type': pattern_type,
                'Success Rate': f"{matches}/{total} ({(matches/total)*100:.1f}%)",
                'Historical Dates': ', '.join(historical_dates)
            })

    if results:
        return pd.DataFrame(results)
    else:
        return pd.DataFrame(columns=['Stock', 'Pattern Date', 'Pattern Type', 'Success Rate', 'Historical Dates'])

def main():
    """
    Main function to run the stock date pattern screener.
    """
    print("==== Stock Date Pattern Screener ====")
    print("This script uses caching to minimize API requests and avoid rate limiting.")

    # Allow for batch mode to process stocks in smaller batches
    batch_size = 10  # Process 10 stocks at a time

    print("\nGetting top 100 S&P 500 stocks by market cap...")
    top_100_stocks = get_top_100_sp500_by_market_cap()
    print(f"Retrieved {len(top_100_stocks)} stocks.")

    # Process in batches to avoid rate limiting
    all_up_patterns = {}
    all_down_patterns = {}

    for i in range(0, len(top_100_stocks), batch_size):
        batch = top_100_stocks[i:i+batch_size]
        print(f"\nProcessing batch {i//batch_size + 1}/{(len(top_100_stocks) + batch_size - 1)//batch_size}")
        print(f"Analyzing date patterns for stocks {i+1}-{min(i+batch_size, len(top_100_stocks))} of {len(top_100_stocks)}...")

        up_patterns, down_patterns = analyze_date_patterns(batch)

        # Merge results
        for date, stocks in up_patterns.items():
            if date not in all_up_patterns:
                all_up_patterns[date] = []
            all_up_patterns[date].extend(stocks)

        for date, stocks in down_patterns.items():
            if date not in all_down_patterns:
                all_down_patterns[date] = []
            all_down_patterns[date].extend(stocks)

        # Save intermediate results
        print("Saving intermediate results...")
        intermediate_up = format_results(all_up_patterns, "UP")
        intermediate_down = format_results(all_down_patterns, "DOWN")

        intermediate_results = pd.concat([intermediate_up, intermediate_down]).reset_index(drop=True)
        if not intermediate_results.empty:
            intermediate_results.to_csv(f"stock_date_patterns_batch_{i//batch_size + 1}.csv", index=False)

        # Wait between batches to avoid rate limiting
        if i + batch_size < len(top_100_stocks):
            delay = random.uniform(5.0, 10.0)
            print(f"Waiting {delay:.2f} seconds before processing next batch...")
            time.sleep(delay)

    print("\nFormatting final results...")
    up_results = format_results(all_up_patterns, "UP")
    down_results = format_results(all_down_patterns, "DOWN")

    # Combine results
    all_results = pd.concat([up_results, down_results]).reset_index(drop=True)

    # Sort by pattern date
    all_results = all_results.sort_values(['Pattern Date', 'Stock']).reset_index(drop=True)

    print("\nTop date patterns found:")
    if not all_results.empty:
        # Print top 10 most consistent patterns
        display_results = all_results.copy()
        display_results['Success Rate Numeric'] = display_results['Success Rate'].apply(
            lambda x: float(x.split('(')[1].split('%')[0])
        )
        display_results = display_results.sort_values('Success Rate Numeric', ascending=False).head(10)
        display_results = display_results.drop('Success Rate Numeric', axis=1)
        print(display_results)

        # Save results to CSV
        output_file = "stock_date_patterns_final.csv"
        all_results.to_csv(output_file, index=False)
        print(f"\nFull results saved to {output_file}")
    else:
        print("No significant patterns found.")

    print("\nAnalysis complete!")

if __name__ == "__main__":
    main()

==== Stock Date Pattern Screener ====
This script uses caching to minimize API requests and avoid rate limiting.

Getting top 100 S&P 500 stocks by market cap...
Loading top 100 stocks from cache file: sp500_top100_cache.csv
Retrieved 100 stocks.

Processing batch 1/10
Analyzing date patterns for stocks 1-10 of 100...
[1/10] Analyzing AAPL...
  Loading cached data for AAPL
  Error analyzing AAPL: 'Index' object has no attribute 'strftime'
[2/10] Analyzing MSFT...
  Loading cached data for MSFT
  Error analyzing MSFT: 'Index' object has no attribute 'strftime'
[3/10] Analyzing NVDA...
  Loading cached data for NVDA
  Error analyzing NVDA: 'Index' object has no attribute 'strftime'
[4/10] Analyzing AMZN...
  Loading cached data for AMZN
  Error analyzing AMZN: 'Index' object has no attribute 'strftime'
[5/10] Analyzing GOOG...
  Loading cached data for GOOG
  Error analyzing GOOG: 'Index' object has no attribute 'strftime'
[6/10] Analyzing GOOGL...
  Loading cached data for GOOGL
  Error

KeyboardInterrupt: 

# Backtesting