# Libraries

In [1]:
import pandas as pd

import requests
from bs4 import BeautifulSoup
from alpha_vantage.timeseries import TimeSeries
import yfinance as yf

from tqdm import tqdm

# Helpers

In [2]:
def crawl_top30_components_of_nifty50():
    response = requests.get('https://finance.yahoo.com/quote/%5ENSEI/components?p=%5ENSEI', headers={'user-agent': 'Mozilla 5.0'}).text
    bs4 = BeautifulSoup(response, 'lxml')
    bs4.select('section > div > table')[0].select('tr')[0].select('th')

    components = []
    for i, row in enumerate(bs4.select('section > div > table')[0].select('tr')):
        if i == 0:
            headers = [component.text for component in row.select('th')]
        else:
            components.append([component.text for component in row.select('td')])
    
    data = pd.DataFrame(components, columns=headers)
    
    return data

# Crawling

## Components of NSEI

In [3]:
symbols = crawl_top30_components_of_nifty50()

In [4]:
symbols

Unnamed: 0,Symbol,Company Name,Last Price,Change,% Change,Volume
0,ULTRACEMCO.NS,UltraTech Cement Limited,10016.05,1.25,+0.01%,163130.0
1,MARUTI.NS,Maruti Suzuki India Limited,10017.0,1.7,+0.02%,671886.0
2,TECHM.NS,Tech Mahindra Limited,1248.3,0.8,+0.06%,1756733.0
3,BAJFINANCE.NS,Bajaj Finance Limited,7711.15,5.6,+0.07%,1052331.0
4,BHARTIARTL.NS,Bharti Airtel Limited,1046.6,-1.3,-0.12%,4007957.0
5,COALINDIA.NS,Coal India Limited,384.25,-0.5,-0.13%,8367270.0
6,APOLLOHOSP.NS,Apollo Hospitals Enterprise Limited,5754.25,-7.95,-0.14%,144884.0
7,HDFCLIFE.NS,HDFC Life Insurance Company Limited,646.75,-1.2,-0.19%,1393692.0
8,TITAN.NS,Titan Company Limited,3712.55,-7.15,-0.19%,963392.0
9,TATACONSUM.NS,Tata Consumer Products Limited,1122.95,-2.6,-0.23%,1207096.0


## data

In [5]:
data = []
for symbol in tqdm(symbols.Symbol):
    data.append(yf.Ticker(symbol).history(start='1990-01-01').assign(symbol=symbol))

data = pd.concat(data)

 97%|█████████▋| 29/30 [00:16<00:00,  1.82it/s]MM.NS: No timezone found, symbol may be delisted
100%|██████████| 30/30 [00:16<00:00,  1.84it/s]


In [6]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,symbol,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2002-08-12 00:00:00+05:30,281.341091,284.926805,280.881383,284.191284,13300.0,0.0,0.0,ULTRACEMCO.NS,
2002-08-13 00:00:00+05:30,278.169098,287.777000,278.169098,285.846222,14200.0,0.0,0.0,ULTRACEMCO.NS,
2002-08-14 00:00:00+05:30,278.674771,286.857561,278.674771,286.122040,40000.0,0.0,0.0,ULTRACEMCO.NS,
2002-08-15 00:00:00+05:30,286.122040,286.122040,286.122040,286.122040,0.0,0.0,0.0,ULTRACEMCO.NS,
2002-08-16 00:00:00+05:30,285.018756,286.857587,283.225885,285.984131,30600.0,0.0,0.0,ULTRACEMCO.NS,
...,...,...,...,...,...,...,...,...,...
2024-01-01 00:00:00+05:30,3790.000000,3832.000000,3773.000000,3811.100098,825907.0,0.0,0.0,TCS.NS,
2024-01-02 00:00:00+05:30,3811.100098,3811.100098,3767.250000,3783.199951,1344068.0,0.0,0.0,TCS.NS,
2024-01-03 00:00:00+05:30,3767.000000,3771.850098,3687.050049,3691.750000,1803075.0,0.0,0.0,TCS.NS,
2024-01-04 00:00:00+05:30,3701.750000,3719.000000,3651.000000,3666.800049,3598144.0,0.0,0.0,TCS.NS,


## Save

In [7]:
data.to_pickle('../data/data.pickle')