# Imports

In [1]:
import yfinance as yf
import requests
import bs4
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tqdm
import datetime
import time
import glob
import os
from tqdm import tqdm
import webdriver_manager
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.firefox import GeckoDriverManager
from selenium.webdriver.chrome.service import Service as ChromeService
from IPython.display import clear_output

# Indices

## Download Index Data

In [30]:
indices = pd.read_csv("Data/Ticker_Data/indices.csv")
indices.head()

Unnamed: 0,Symbol,Name
0,^GSPC,S&P 500
1,^DJI,Dow Jones Industrial Average
2,^IXIC,NASDAQ Composite
3,^BUK100P,Cboe UK 100
4,^VIX,Vix


In [32]:
symbols = indices["Symbol"].values
names = indices["Name"].values
len(symbols)

24

In [34]:
data = yf.download(list(symbols), period="max", interval="1d", group_by="ticker", threads = True, auto_adjust = True)

[*********************100%***********************]  24 of 24 completed

5 Failed downloads:
- ^INDIAVIX: 1d data not available for startTime=-2208994789 and endTime=1671771919. Only 100 years worth of day granularity data are allowed to be fetched per request.
- ^NSEI: No timezone found, symbol may be delisted
- ^AXJO: No data found for this date range, symbol may be delisted
- ^N225: No data found for this date range, symbol may be delisted
- ^KS11: No data found for this date range, symbol may be delisted


In [35]:
def clean_data(ticker):
    index = data[ticker]
    index = index.dropna()
    index = index.reset_index()
    return index

In [42]:
data_dict = (dict(zip(symbols, map(clean_data, symbols))))

In [46]:
failed_indices = []
for i in tqdm.tqdm(range(len(symbols))):
    if data_dict[symbols[i]].empty:
        failed_indices.append(symbols[i])

100%|██████████| 24/24 [00:00<00:00, 40184.95it/s]


In [58]:
data_retry = yf.download(failed_indices, period="max", interval="1d", group_by="ticker", threads = True, auto_adjust = True)

[*********************100%***********************]  5 of 5 completed

1 Failed download:
- ^INDIAVIX: 1d data not available for startTime=-2208994789 and endTime=1671772519. Only 100 years worth of day granularity data are allowed to be fetched per request.


In [60]:
def clean_data_retry(ticker):
    index = data_retry[ticker]
    index = index.dropna()
    index = index.reset_index()
    return index

In [62]:
data_dict_retry = (dict(zip(failed_indices, map(clean_data_retry, failed_indices))))

In [71]:
for key in data_dict_retry.keys():
    data_dict[key] = data_dict_retry[key]

for key in tqdm.tqdm(data_dict.keys(), desc="Saving Data"):
    file_name = "Data/Indices/" + key.replace("^", "") + ".csv"
    data_dict[key].to_csv(file_name, index=False)

Saving Data: 100%|██████████| 24/24 [00:01<00:00, 17.21it/s]


# Stocks

## Download Index Data

### NASDAQ 200

In [118]:
indices = pd.read_csv("Data/nasdaq200.csv")
indices.head()

Unnamed: 0,Symbol,Name,Country,Sector,IPO Year,Industry
0,AAPL,Apple Inc. Common Stock,United States,Technology,1980.0,Computer Manufacturing
1,MSFT,Microsoft Corporation Common Stock,United States,Technology,1986.0,Computer Software: Prepackaged Software
2,GOOG,Alphabet Inc. Class C Capital Stock,United States,Technology,2004.0,Internet and Information Services
3,GOOGL,Alphabet Inc. Class A Common Stock,United States,Technology,2004.0,Internet and Information Services
4,AMZN,Amazon.com Inc. Common Stock,United States,Consumer Discretionary,1997.0,Catalog/Specialty Distribution


In [119]:
symbols = indices["Symbol"].values
names = indices["Name"].values
len(symbols)

200

In [120]:
data = yf.download(list(symbols), period="max", interval="1d", group_by="ticker", threads = True, auto_adjust = True)

[*********************100%***********************]  200 of 200 completed


In [121]:
def clean_data(ticker):
    index = data[ticker]
    index = index.dropna()
    index = index.reset_index()
    return index

In [122]:
data_dict = (dict(zip(symbols, map(clean_data, symbols))))

In [124]:
failed_indices = []
for i in tqdm.tqdm(range(len(symbols))):
    if data_dict[symbols[i]].empty:
        failed_indices.append(symbols[i])

100%|██████████| 200/200 [00:00<00:00, 105212.69it/s]


In [125]:
failed_indices

[]

In [126]:
if failed_indices:
    data_retry = yf.download(failed_indices, period="max", interval="1d", group_by="ticker", threads = True, auto_adjust = True)

In [127]:
def clean_data_retry(ticker):
    index = data_retry[ticker]
    index = index.dropna()
    index = index.reset_index()
    return index

In [128]:
data_dict_retry = (dict(zip(failed_indices, map(clean_data_retry, failed_indices))))

In [130]:
for key in data_dict_retry.keys():
    data_dict[key] = data_dict_retry[key]

for key in tqdm.tqdm(data_dict.keys(), desc="Saving Data"):
    file_name = "Data/nasdaq200/" + key.replace("^", "") + ".csv"
    data_dict[key].to_csv(file_name, index=False)

Saving Data: 100%|██████████| 200/200 [00:14<00:00, 13.89it/s]


In [155]:
len(glob.glob("Data/nasdaq200/*.csv"))

200

### Nifty 200

In [131]:
indices = pd.read_csv("Data/nifty200.csv")
indices.head()

Unnamed: 0,Symbol,Name,Industry
0,ABB,ABB India Ltd.,Capital Goods
1,ACC,ACC Ltd.,Construction Materials
2,AUBANK,AU Small Finance Bank Ltd.,Financial Services
3,ABBOTINDIA,Abbott India Ltd.,Healthcare
4,ADANIENT,Adani Enterprises Ltd.,Metals & Mining


In [134]:
symbols = indices["Symbol"].values
symbols = list(map(lambda x:x+".BO", symbols))
print(symbols[10])
names = indices["Name"].values
len(symbols)

ABCAPITAL.BO


200

In [None]:
data = yf.download(list(symbols), period="max", interval="1d", group_by="ticker", threads = True, auto_adjust = True)

In [136]:
def clean_data(ticker):
    index = data[ticker]
    index = index.dropna()
    index = index.reset_index()
    return index

In [137]:
data_dict = (dict(zip(symbols, map(clean_data, symbols))))

In [138]:
len(data_dict)

200

In [139]:
failed_indices = []
for i in tqdm.tqdm(range(len(symbols))):
    if data_dict[symbols[i]].empty:
        failed_indices.append(symbols[i])

100%|██████████| 200/200 [00:00<00:00, 129115.10it/s]


In [141]:
len(failed_indices)

37

In [144]:
if failed_indices:
    data_retry = yf.download(failed_indices, period="5y", interval="1d", group_by="ticker", threads = True, auto_adjust = True)

[*********************100%***********************]  37 of 37 completed

3 Failed downloads:
- DEEPAKNTR.BO: No data found, symbol may be delisted
- MCDOWELL-N.BO: No data found, symbol may be delisted
- GUJGASLTD.BO: No data found, symbol may be delisted


In [145]:
def clean_data_retry(ticker):
    index = data_retry[ticker]
    index = index.dropna()
    index = index.reset_index()
    return index

In [146]:
data_dict_retry = (dict(zip(failed_indices, map(clean_data_retry, failed_indices))))

In [149]:
for key in data_dict_retry.keys():
    data_dict[key] = data_dict_retry[key]

for key in tqdm.tqdm(data_dict.keys(), desc="Saving Data"):
    file_name = "Data/nifty200/" + key.replace(".BO", "") + ".csv"
    data_dict[key].to_csv(file_name, index=False)

Saving Data: 100%|██████████| 200/200 [00:07<00:00, 26.15it/s]


In [158]:
final_retry = ["DEEPAKNTR.NS", "MCDOWELL-N.NS","GUJGASLTD.NS"]
data_retry = yf.download(final_retry, period="max", interval="1d", group_by="ticker", threads = True, auto_adjust = True)

[*********************100%***********************]  3 of 3 completed


In [160]:
for t in final_retry:
    index = data_retry[t]
    index = index.dropna()
    index = index.reset_index()
    file_name = "Data/nifty200/" + t.replace(".NS", "") + ".csv"
    index.to_csv(file_name, index=False)

In [161]:
len(glob.glob("Data/nifty200/*.csv"))

200

# Commodities

## Download Commodities Data

In [49]:
commodities = pd.read_csv("Data/Ticker_Data/commodities.csv")
commodities.head()

Unnamed: 0,Ticker,Name
0,GC=F,Gold
1,SI=F,Silver
2,CL=F,Oil
3,NG=F,Gas
4,HG=F,Copper


In [56]:
symbols = commodities["Ticker"].values
names = commodities["Name"].values
sym_name = dict((symbol, name) for symbol, name in zip(symbols, names))
len(symbols)

14

In [51]:
data = yf.download(list(symbols), period="max", interval="1d", group_by="ticker", threads = True, auto_adjust = True)

[*********************100%***********************]  14 of 14 completed


In [52]:
def clean_data(ticker):
    index = data[ticker]
    index = index.dropna()
    index = index.reset_index()
    return index

In [59]:
data_dict = (dict(zip(symbols, map(clean_data, symbols))))

In [60]:
failed_indices = []
for i in tqdm(range(len(symbols))):
    if data_dict[symbols[i]].empty:
        failed_indices.append(symbols[i])

100%|██████████| 14/14 [00:00<00:00, 66126.41it/s]


In [61]:
if failed_indices:
    data_retry = yf.download(failed_indices, period="max", interval="1d", group_by="ticker", threads = True, auto_adjust = True)

In [62]:
def clean_data_retry(ticker):
    index = data_retry[ticker]
    index = index.dropna()
    index = index.reset_index()
    return index

In [63]:
data_dict_retry = (dict(zip(failed_indices, map(clean_data_retry, failed_indices))))

In [65]:
for key in data_dict_retry.keys():
    data_dict[key] = data_dict_retry[key]

for key in tqdm(data_dict.keys(), desc="Saving Data"):
    file_name = "Data/Commodities/" + sym_name[key] + ".csv"
    data_dict[key].to_csv(file_name, index=False)

Saving Data: 100%|██████████| 14/14 [00:01<00:00, 11.36it/s]


# Bonnds

## US Bonds

In [20]:
us_bonds = pd.read_csv("Data/Ticker_Data/us_bonds.csv")
us_bonds.head()

Unnamed: 0,Symbol,Name
0,^IRX,13 Week Treasury Bill
1,^FVX,Treasury Yield 5 Years
2,^TNX,Treasury Yield 10 Years
3,^TYX,Treasury Yield 30 Years


In [21]:
symbols = us_bonds["Symbol"].values
names = us_bonds["Name"].values
len(symbols)

4

In [22]:
data = yf.download(list(symbols), period="max", interval="1d", group_by="ticker", threads = True, auto_adjust = True)

[*********************100%***********************]  4 of 4 completed


In [23]:
def clean_data(ticker):
    index = data[ticker]
    index = index.dropna()
    index = index.reset_index()
    return index

In [24]:
data_dict = (dict(zip(symbols, map(clean_data, symbols))))

In [25]:
failed_indices = []
for i in tqdm(range(len(symbols))):
    if data_dict[symbols[i]].empty:
        failed_indices.append(symbols[i])

100%|██████████| 4/4 [00:00<00:00, 10492.32it/s]


In [26]:
if failed_indices:
    data_retry = yf.download(failed_indices, period="max", interval="1d", group_by="ticker", threads = True, auto_adjust = True)

In [27]:
def clean_data_retry(ticker):
    index = data_retry[ticker]
    index = index.dropna()
    index = index.reset_index()
    return index

In [28]:
data_dict_retry = (dict(zip(failed_indices, map(clean_data_retry, failed_indices))))

In [30]:
for key in data_dict_retry.keys():
    data_dict[key] = data_dict_retry[key]

for key in tqdm(data_dict.keys(), desc="Saving Data"):
    file_name = "Data/Bonds/US/" + key.replace("^", "") + ".csv"
    data_dict[key].to_csv(file_name, index=False)

Saving Data: 100%|██████████| 4/4 [00:00<00:00,  7.41it/s]


## Indian Bonds

In [109]:
TODAY = datetime.date.today()
TODAY = TODAY.strftime("%d-%m-%Y")
print(TODAY)

27-12-2022


In [157]:
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))

In [77]:
indian_bonds = pd.read_csv("Data/Ticker_Data/indian_bonds.csv")
indian_bonds.head()

Unnamed: 0,Name,URL
0,India 3M,https://in.investing.com/rates-bonds/india-3-m...
1,India 6M,https://in.investing.com/rates-bonds/india-6-m...
2,India 1Y,https://in.investing.com/rates-bonds/india-1-y...
3,India 5Y,https://in.investing.com/rates-bonds/india-5-y...
4,India 10Y,https://in.investing.com/rates-bonds/india-10-...


In [79]:
urls = indian_bonds["URL"].values
names = indian_bonds["Name"].values
len(urls)

9

In [146]:
def set_date(date):
    date_btn = driver.find_element(By.ID, "widgetFieldDateRange")
    date_btn.click()
    print("Setting Date to", date)
    start_date = driver.find_element(By.ID, "startDate")
    for _ in range(len(date)):
        start_date.send_keys(Keys.BACK_SPACE)
    start_date.send_keys(date)
    apply_btn = driver.find_element(By.ID, "applyBtn")
    apply_btn.click()

def find_table():
    print("Finding Table")
    table = driver.find_element(By.TAG_NAME, "table")
    table = table.get_attribute("innerHTML")
    table = "<table>"+table+"</table>"
    table = pd.read_html(table)[0]
    table["Date"] = pd.to_datetime(table["Date"])
    return table

In [169]:
def get_table(url):
    driver.get(url)
    set_date("1990")

    time.sleep(3)

    table = find_table()
    
    last = table["Date"][0] + datetime.timedelta(1)
    last = last.strftime("%d-%m-%Y")
    if last==TODAY:
        return table
        
    set_date(last)
    time.sleep(3)
    
    table_2 = find_table()
    table = pd.concat([table_2, table])
    return table

    

In [148]:
table = get_table(urls[0])

Setting Date to 1990
Finding Table
Setting Date to 16-03-2021
Finding Table


In [149]:
table

Unnamed: 0,Date,Price,Open,High,Low,Change %
0,2022-12-27,6.37,6.38,6.38,6.37,0.00%
1,2022-12-26,6.37,6.38,6.40,6.37,-0.31%
2,2022-12-23,6.39,6.39,6.40,6.37,0.16%
3,2022-12-22,6.38,6.43,6.43,6.37,-1.24%
4,2022-12-21,6.46,6.45,6.46,6.44,0.47%
...,...,...,...,...,...,...
4994,2002-01-30,6.58,6.58,6.58,6.58,0.30%
4995,2002-01-29,6.56,6.56,6.56,6.56,-0.15%
4996,2002-01-28,6.57,6.57,6.57,6.57,-0.45%
4997,2002-01-25,6.60,6.60,6.60,6.60,0.00%


In [159]:
bonds = {
    names[0]:table
}

In [172]:
for i in range(1, len(names)):
    url = urls[i]
    print(names[i])
    table = get_table(url)
    clear_output(wait=True)
    bonds[names[i]] = table

India 30Y
Setting Date to 1990
Finding Table
Setting Date to 28-12-2022
Finding Table


In [177]:
for key, value in bonds.items():
    values = value[::-1]
    values["Date"] = pd.to_datetime(values["Date"])
    values.columns = ["Date", "Close", "Open", "High", "Low", "Change"]
    values = values[["Date", "Open", "High", "Low", "Close"]]
    value.to_csv("Data/Bonds/Indian/"+key.split(" ")[-1]+".csv", index=False)

# Currency

## Download Currency Data

In [37]:
currency = pd.read_csv("Data/Ticker_Data/currency.csv")
currency.head()

Unnamed: 0,Symbol,Name
0,EURUSD=X,EUR/USD
1,JPY=X,USD/JPY
2,GBPUSD=X,GBP/USD
3,AUDUSD=X,AUD/USD
4,NZDUSD=X,NZD/USD


In [38]:
symbols = currency["Symbol"].values
names = currency["Name"].values
len(currency)

28

In [39]:
data = yf.download(list(symbols), period="max", interval="1d", group_by="ticker", threads = True, auto_adjust = True)

[*********************100%***********************]  26 of 26 completed


In [40]:
def clean_data(ticker):
    index = data[ticker]
    index = index.dropna()
    index = index.reset_index()
    return index

In [41]:
data_dict = (dict(zip(symbols, map(clean_data, symbols))))

In [42]:
failed_indices = []
for i in tqdm(range(len(symbols))):
    if data_dict[symbols[i]].empty:
        failed_indices.append(symbols[i])

100%|██████████| 28/28 [00:00<00:00, 70577.23it/s]


In [43]:
if failed_indices:
    data_retry = yf.download(failed_indices, period="max", interval="1d", group_by="ticker", threads = True, auto_adjust = True)

In [44]:
def clean_data_retry(ticker):
    index = data_retry[ticker]
    index = index.dropna()
    index = index.reset_index()
    return index

In [45]:
data_dict_retry = (dict(zip(failed_indices, map(clean_data_retry, failed_indices))))

In [48]:
for key in data_dict_retry.keys():
    data_dict[key] = data_dict_retry[key]

for key in tqdm(data_dict.keys(), desc="Saving Data"):
    file_name = "Data/Currency/" + key.replace("^", "") + ".csv"
    data_dict[key].to_csv(file_name, index=False)

Saving Data: 100%|██████████| 26/26 [00:01<00:00, 16.41it/s]


# Crypto

## Download Crypto Data

In [66]:
crypto = pd.read_csv("Data/Ticker_Data/crypto.csv")
crypto.head()

Unnamed: 0,Symbol,Name
0,BTC-USD,Bitcoin USD
1,ETH-USD,Ethereum USD
2,USDT-USD,Tether USD
3,USDC-USD,USD Coin USD
4,BNB-USD,BNB USD


In [67]:
symbols = crypto["Symbol"].values
names = crypto["Name"].values
len(symbols)

20

In [68]:
data = yf.download(list(symbols), period="max", interval="1d", group_by="ticker", threads = True, auto_adjust = True)

[*********************100%***********************]  20 of 20 completed


In [69]:
def clean_data(ticker):
    index = data[ticker]
    index = index.dropna()
    index = index.reset_index()
    return index

In [70]:
data_dict = (dict(zip(symbols, map(clean_data, symbols))))

In [71]:
failed_indices = []
for i in tqdm(range(len(symbols))):
    if data_dict[symbols[i]].empty:
        failed_indices.append(symbols[i])

100%|██████████| 20/20 [00:00<00:00, 57972.41it/s]


In [72]:
if failed_indices:
    data_retry = yf.download(failed_indices, period="max", interval="1d", group_by="ticker", threads = True, auto_adjust = True)

In [73]:
def clean_data_retry(ticker):
    index = data_retry[ticker]
    index = index.dropna()
    index = index.reset_index()
    return index

In [74]:
data_dict_retry = (dict(zip(failed_indices, map(clean_data_retry, failed_indices))))

In [75]:
for key in data_dict_retry.keys():
    data_dict[key] = data_dict_retry[key]

for key in tqdm(data_dict.keys(), desc="Saving Data"):
    file_name = "Data/Crypto/" + key.replace("^", "") + ".csv"
    data_dict[key].to_csv(file_name, index=False)

Saving Data: 100%|██████████| 20/20 [00:00<00:00, 69.21it/s]


# Other

In [3]:
data = yf.download(  # or pdr.get_data_yahoo(...
        # tickers list or string as well
        tickers = "SPY AAPL MSFT",

        # use "period" instead of start/end
        # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
        # (optional, default is '1mo')
        period = "max",

        # fetch data by interval (including intraday if period < 60 days)
        # valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
        # (optional, default is '1d')
        interval = "1d",

        # Whether to ignore timezone when aligning ticker data from 
        # different timezones. Default is True. False may be useful for 
        # minute/hourly data.
        ignore_tz = False,

        # group by ticker (to access via data['SPY'])
        # (optional, default is 'column')
        group_by = 'ticker',

        # adjust all OHLC automatically
        # (optional, default is False)
        auto_adjust = True,

        # attempt repair of missing data or currency mixups e.g. $/cents
        repair = False,

        # use threads for mass downloading? (True/False/Integer)
        # (optional, default is True)
        threads = True,
    )

[*********************100%***********************]  3 of 3 completed


In [4]:
data["AAPL"]

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1980-12-12 00:00:00-05:00,0.099874,0.100308,0.099874,0.099874,469033600
1980-12-15 00:00:00-05:00,0.095098,0.095098,0.094663,0.094663,175884800
1980-12-16 00:00:00-05:00,0.088149,0.088149,0.087715,0.087715,105728000
1980-12-17 00:00:00-05:00,0.089886,0.090320,0.089886,0.089886,86441600
1980-12-18 00:00:00-05:00,0.092492,0.092927,0.092492,0.092492,73449600
...,...,...,...,...,...
2022-12-16 00:00:00-05:00,136.690002,137.649994,133.729996,134.509995,160080100
2022-12-19 00:00:00-05:00,135.110001,135.199997,131.320007,132.369995,79592600
2022-12-20 00:00:00-05:00,131.389999,133.250000,129.889999,132.300003,77432800
2022-12-21 00:00:00-05:00,132.979996,136.809998,132.750000,135.449997,85928000


In [6]:
apple = data["AAPL"]
apple = apple.dropna()
apple = apple.reset_index()
apple.to_csv("Data/Stocks/nasdaq200/AAPL.csv", index=False)

In [7]:
aapl = yf.Ticker("AAPL")

In [14]:
nifty = yf.download("^NSEI", period="ytd", interval="1d")

[*********************100%***********************]  1 of 1 completed


In [15]:
nifty

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-01-03,17387.150391,17646.650391,17383.300781,17625.699219,17625.699219,200500
2022-01-04,17681.400391,17827.599609,17593.550781,17805.250000,17805.250000,247400
2022-01-05,17820.099609,17944.699219,17748.849609,17925.250000,17925.250000,251500
2022-01-06,17768.500000,17797.949219,17655.550781,17745.900391,17745.900391,236500
2022-01-07,17797.599609,17905.000000,17704.550781,17812.699219,17812.699219,239300
...,...,...,...,...,...,...
2022-12-19,18288.099609,18431.650391,18244.550781,18420.449219,18420.449219,154800
2022-12-20,18340.300781,18404.900391,18202.650391,18385.300781,18385.300781,169000
2022-12-21,18435.150391,18473.349609,18162.750000,18199.099609,18199.099609,187600
2022-12-22,18288.800781,18318.750000,18068.599609,18127.349609,18127.349609,195700
