In [None]:
import requests
import pandas as pd
import os
import time
import yfinance as yf

# Create directories for saving files
os.makedirs("stock_data", exist_ok=True)

# Step 1: Download Latest NSE 500 Stock List
def download_nse500_list():
    url = "https://nsearchives.nseindia.com/content/indices/ind_nifty500list.csv"
    file_path = "nifty500_stocks.csv"

    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0"}
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        with open(file_path, "wb") as file:
            file.write(response.content)
        print(f"NSE 500 stock list downloaded successfully: {file_path}")
        df = pd.read_csv(file_path)
        
        # Ensure column names are correctly formatted (remove any leading/trailing spaces)
        df.columns = df.columns.str.strip()

        return df
    else:
        print("Failed to download NSE 500 list.")
        return None

# Step 2: Filter Top 250-300 Stocks Sector-Wise
def filter_top_stocks(df):
    sector_counts = {
        "Financial Services": 30,
        "Information Technology": 25,
        "Healthcare": 25,
        "Consumer Goods": 30,
        "Automobile and Auto Components": 20,
        "Oil Gas & Consumable Fuels": 20,
        "Metals & Mining": 20,
        "Construction Materials": 15,
        "Power": 15,
        "Consumer Durables": 15,
        "Textiles": 10
    }

    selected_stocks = []
    for sector, count in sector_counts.items():
        sector_stocks = df[df["Industry"] == sector].head(count)
        selected_stocks.append(sector_stocks)

    top_stocks_df = pd.concat(selected_stocks)
    print(f"Selected {len(top_stocks_df)} stocks for AI model training.")
    return top_stocks_df

# Step 3: Download Historical Price Data using yfinance
def download_stock_data_yfinance(stock_symbol, session):
    try:
        stock = yf.Ticker(stock_symbol, session=session)
        data = stock.history(period="5y")  # Fetch last 5 years of data

        if not data.empty:
            data.to_csv(f"stock_data/{stock_symbol}_data.csv")
            print(f"Downloaded data for {stock_symbol}")
        else:
            print(f"No data available for {stock_symbol}")

    except Exception as e:
        print(f"Error fetching data for {stock_symbol}: {e}")

# Step 4: Handle IP Blocking by Using a Custom Session
def get_custom_yfinance_session():
    session = requests.Session()
    session.headers.update({"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"})
    return session

# Main Execution Flow
nse500_df = download_nse500_list()
if nse500_df is not None:
    top_stocks_df = filter_top_stocks(nse500_df)
    
    # Save selected stocks to CSV
    top_stocks_df.to_csv("top_250_stocks.csv", index=False)
    print("Saved selected stocks to top_250_stocks.csv.")

    # Get a session with a custom user-agent to avoid blocking
    session = get_custom_yfinance_session()

    # Download historical price data for each stock
    for symbol in top_stocks_df["Symbol"]:
        full_symbol = f"{symbol}.NS"  # NSE India format for yfinance
        download_stock_data_yfinance(full_symbol, session)

        # Avoid getting blocked by adding delay
        time.sleep(2)


NSE 500 stock list downloaded successfully: nifty500_stocks.csv
Selected 179 stocks for AI model training.
Saved selected stocks to top_250_stocks.csv.


Failed to get ticker '360ONE.NS' reason: Expecting value: line 1 column 1 (char 0)
$360ONE.NS: possibly delisted; No price data found  (period=5y)


No data available for 360ONE.NS


Failed to get ticker 'AUBANK.NS' reason: Expecting value: line 1 column 1 (char 0)
$AUBANK.NS: possibly delisted; No price data found  (period=5y)


No data available for AUBANK.NS


KeyboardInterrupt: 