In [7]:
indian_stocks = [
        'RELIANCE.NS',   # Reliance Industries
        'TCS.NS',        # Tata Consultancy Services
        'HDFCBANK.NS',   # HDFC Bank
        'INFY.NS',       # Infosys
        'HINDUNILVR.NS', # Hindustan Unilever
        'ICICIBANK.NS',  # ICICI Bank
        'SBIN.NS',       # State Bank of India
        'BAJFINANCE.NS', # Bajaj Finance
        'WIPRO.NS',      # Wipro
        'AXISBANK.NS'    # Axis Bank
    ]
us_stocks = ['AAPL', 'MSFT', 'GOOGL']

In [8]:
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import os
import time
from datetime import datetime

# --- Configuration ---
DATA_DIR = "hourly_data"  # Directory to store CSV files
START_DATE = datetime.today() - pd.DateOffset(years=1)  # One year back from today
END_DATE = datetime.today().strftime('%Y-%m-%d')  # Today's date

# --- Get S&P 500 Tickers ---
def get_sp500_tickers():
    url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    table = soup.find('table', {'id': 'constituents'})
    tickers = []
    
    for row in table.findAll('tr')[1:]:  # Skip header
        cells = row.findAll('td')
        if len(cells) > 0:
            ticker = cells[0].text.strip()
            tickers.append(ticker.replace('.', '-'))  # Fix tickers like BRK.B
    
    return tickers
def get_indian_tickers():
    """Predefined list of common Indian stocks (NSE)"""
    return indian_stocks
# --- Download Hourly Data ---
def download_hourly_data(tickers, max_retries=3):
    # Create data directory
    os.makedirs(DATA_DIR, exist_ok=True)
    # Get existing files to skip already downloaded tickers
    downloaded = set([f.split('.')[0] for f in os.listdir(DATA_DIR)])
    for i, ticker in enumerate(tickers):
        if ticker in downloaded:
            print(f"Skipping {ticker} (already downloaded)")
            continue
            
        print(f"Downloading {ticker} ({i+1}/{len(tickers)})")
        
        for attempt in range(max_retries):
            try:
                # Download data with 1-hour intervals
                data = yf.download(
                    ticker,
                    start=START_DATE,
                    end=END_DATE,
                    interval='1h',
                    progress=True
                )
                
                if not data.empty:
                    # Add technical features
                    #data['Returns'] = data['Adj Close'].pct_change()
                    #data['SMA_20'] = data['Adj Close'].rolling(20).mean()
                    
                    # Save to CSV
                    filename = os.path.join(DATA_DIR, f"{ticker}_hourly.csv")
                    data.to_csv(filename)
                    print(f"Saved {filename}")
                else:
                    print(f"No data found for {ticker}")
                
                break  # Success - exit retry loop
                
            except Exception as e:
                print(f"Attempt {attempt+1} failed for {ticker}: {str(e)}")
                if attempt < max_retries - 1:
                    time.sleep(2)  # Add delay between retries
                else:
                    print(f"Failed to download {ticker} after {max_retries} attempts")
        
        time.sleep(1)  # Be polite to Yahoo's servers

In [9]:

# Get all S&P 500 tickers
sp500_tickers = get_sp500_tickers()

  for row in table.findAll('tr')[1:]:  # Skip header
  cells = row.findAll('td')


In [10]:
sp500_tickers = us_stocks
indian_tickers =  get_indian_tickers()[:3]

In [11]:
download_hourly_data(indian_tickers)
download_hourly_data(sp500_tickers)

[*********************100%***********************]  1 of 1 completed


Downloading RELIANCE.NS (1/3)
Saved hourly_data/RELIANCE.NS_hourly.csv


[*********************100%***********************]  1 of 1 completed

Downloading TCS.NS (2/3)
Saved hourly_data/TCS.NS_hourly.csv



[*********************100%***********************]  1 of 1 completed

Downloading HDFCBANK.NS (3/3)
Saved hourly_data/HDFCBANK.NS_hourly.csv



[*********************100%***********************]  1 of 1 completed

Downloading AAPL (1/3)
Saved hourly_data/AAPL_hourly.csv



[*********************100%***********************]  1 of 1 completed

Downloading MSFT (2/3)
Saved hourly_data/MSFT_hourly.csv



[*********************100%***********************]  1 of 1 completed

Downloading GOOGL (3/3)





Saved hourly_data/GOOGL_hourly.csv
