In [1]:
import requests
import pandas as pd
import os
import time
import yfinance as yf

# Create directories for saving files
os.makedirs("stock_data", exist_ok=True)

# Step 1: Download Latest NSE 500 Stock List
def download_nse500_list():
    url = "https://nsearchives.nseindia.com/content/indices/ind_nifty500list.csv"
    file_path = "nifty500_stocks.csv"

    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0"}
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        with open(file_path, "wb") as file:
            file.write(response.content)
        print(f"NSE 500 stock list downloaded successfully: {file_path}")
        df = pd.read_csv(file_path)
        
        # Ensure column names are correctly formatted (remove any leading/trailing spaces)
        df.columns = df.columns.str.strip()

        return df
    else:
        print("Failed to download NSE 500 list.")
        return None

# Step 2: Filter Top 250-300 Stocks Sector-Wise
def filter_top_stocks(df):
    sector_counts = {
        "Financial Services": 30,
        "Information Technology": 25,
        "Healthcare": 25,
        "Consumer Goods": 30,
        "Automobile and Auto Components": 20,
        "Oil Gas & Consumable Fuels": 20,
        "Metals & Mining": 20,
        "Construction Materials": 15,
        "Power": 15,
        "Consumer Durables": 15,
        "Textiles": 10
    }

    selected_stocks = []
    for sector, count in sector_counts.items():
        sector_stocks = df[df["Industry"] == sector].head(count)
        selected_stocks.append(sector_stocks)

    top_stocks_df = pd.concat(selected_stocks)
    print(f"Selected {len(top_stocks_df)} stocks for AI model training.")
    return top_stocks_df

# Step 3: Download Historical Price Data using yfinance
def download_stock_data_yfinance(stock_symbol, session):
    try:
        stock = yf.Ticker(stock_symbol, session=session)
        data = stock.history(period="10y")  # Fetch last 5 years of data

        if not data.empty:
            data.to_csv(f"stock_data/{stock_symbol}_data.csv")
            print(f"Downloaded data for {stock_symbol}")
        else:
            print(f"No data available for {stock_symbol}")

    except Exception as e:
        print(f"Error fetching data for {stock_symbol}: {e}")

# Step 4: Handle IP Blocking by Using a Custom Session
def get_custom_yfinance_session():
    session = requests.Session()
    session.headers.update({"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"})
    return session

# Main Execution Flow
nse500_df = download_nse500_list()
if nse500_df is not None:
    top_stocks_df = filter_top_stocks(nse500_df)
    
    # Save selected stocks to CSV
    top_stocks_df.to_csv("top_250_stocks.csv", index=False)
    print("Saved selected stocks to top_250_stocks.csv.")

    # Get a session with a custom user-agent to avoid blocking
    session = get_custom_yfinance_session()

    # Download historical price data for each stock
    for symbol in top_stocks_df["Symbol"]:
        full_symbol = f"{symbol}.NS"  # NSE India format for yfinance
        download_stock_data_yfinance(full_symbol, session)

        # Avoid getting blocked by adding delay
        time.sleep(2)


NSE 500 stock list downloaded successfully: nifty500_stocks.csv
Selected 181 stocks for AI model training.
Saved selected stocks to top_250_stocks.csv.
Downloaded data for 360ONE.NS
Downloaded data for AUBANK.NS
Downloaded data for AADHARHFC.NS
Downloaded data for AAVAS.NS
Downloaded data for ABCAPITAL.NS
Downloaded data for ABSLAMC.NS
Downloaded data for ANANDRATHI.NS
Downloaded data for ANGELONE.NS
Downloaded data for APTUS.NS
Downloaded data for AIIL.NS
Downloaded data for AXISBANK.NS
Downloaded data for BSE.NS
Downloaded data for BAJFINANCE.NS
Downloaded data for BAJAJFINSV.NS
Downloaded data for BAJAJHLDNG.NS
Downloaded data for BAJAJHFL.NS
Downloaded data for BANDHANBNK.NS
Downloaded data for BANKBARODA.NS
Downloaded data for BANKINDIA.NS
Downloaded data for MAHABANK.NS
Downloaded data for CRISIL.NS
Downloaded data for CANFINHOME.NS
Downloaded data for CANBK.NS
Downloaded data for CGCL.NS
Downloaded data for CENTRALBK.NS
Downloaded data for CDSL.NS
Downloaded data for CHOLAHLDNG.

Could not get exchangeTimezoneName for ticker 'NMDC.NS' reason: 'chart'
$NMDC.NS: possibly delisted; no price data found  (period=10y)


No data available for NMDC.NS
Downloaded data for NSLNISP.NS
Downloaded data for NATIONALUM.NS
Downloaded data for SARDAEN.NS
Downloaded data for SAIL.NS
Downloaded data for TATASTEEL.NS
Downloaded data for VEDL.NS
Downloaded data for ACC.NS
Downloaded data for AMBUJACEM.NS
Downloaded data for DALBHARAT.NS
Downloaded data for GRASIM.NS
Downloaded data for INDIACEM.NS
Downloaded data for JKCEMENT.NS
Downloaded data for SHREECEM.NS
Downloaded data for RAMCOCEM.NS
Downloaded data for ULTRACEMCO.NS
Downloaded data for ACMESOLAR.NS
Downloaded data for ADANIENSOL.NS
Downloaded data for ADANIGREEN.NS
Downloaded data for ADANIPOWER.NS
Downloaded data for CESC.NS


$DUMMYSIEMS.NS: possibly delisted; no price data found  (period=10y) (Yahoo error = "No data found, symbol may be delisted")


No data available for DUMMYSIEMS.NS
Downloaded data for JSWENERGY.NS
Downloaded data for JPPOWER.NS
Downloaded data for NHPC.NS
Downloaded data for NLCINDIA.NS
Downloaded data for NTPCGREEN.NS
Downloaded data for NTPC.NS
Downloaded data for NAVA.NS
Downloaded data for POWERGRID.NS
Downloaded data for RPOWER.NS
Downloaded data for AMBER.NS
Downloaded data for ASIANPAINT.NS
Downloaded data for BATAINDIA.NS
Downloaded data for BERGEPAINT.NS
Downloaded data for BLUESTARCO.NS
Downloaded data for CAMPUS.NS
Downloaded data for CENTURYPLY.NS
Downloaded data for CERA.NS
Downloaded data for CROMPTON.NS
Downloaded data for DIXON.NS
Downloaded data for HAVELLS.NS
Downloaded data for KAJARIACER.NS
Downloaded data for KALYANKJIL.NS
Downloaded data for KANSAINER.NS
Downloaded data for PGEL.NS
Downloaded data for ALOKINDS.NS
Downloaded data for KPRMILL.NS
Downloaded data for PAGEIND.NS
Downloaded data for RAYMONDLSL.NS
Downloaded data for TRIDENT.NS
Downloaded data for VTL.NS
Downloaded data for WELSP