In [2]:
import numpy as np
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
import scipy.stats as st
from scipy.stats import rankdata
import os
import datetime

In [3]:
def get_daily_ohlcv(tickers,years=23):
    outo=list()
    outh=list()
    outl=list()
    outc=list()
    outv=list()
    outdf=list()
    months=str(years*12)
    for ticker in tickers:
        try:
            tick = yf.Ticker(ticker)
            df=tick.history(period=months+"mo")
            df=df.reset_index()
            df.dropna(inplace=True)
            df['Symbol']=ticker
            del df['Dividends']
            del df['Stock Splits']
            df.to_csv('Data_800stocks/{}.csv'.format(ticker),index=False)
            outdf.append(df)
        except Exception as e:
            print(e)
            continue
    return outdf

In [4]:
data=pd.read_excel("ticker2.xlsx")

In [5]:
unique_ticker = [str(i) for i in list(data['Ticker'].unique())]

In [6]:
unique_ticker = pd.DataFrame(unique_ticker,columns=['Ticker'])
unique_ticker

Unnamed: 0,Ticker
0,MROTK IN Equity
1,BLSTR IN Equity
2,ITC IN Equity
3,LIIL IN Equity
4,NCRL IN Equity
...,...
1573,UJJIVANS IN Equity
1574,AACL IN Equity
1575,PSPPL IN Equity
1576,SWSOLAR IN Equity


In [7]:
data1=pd.read_excel("ISIN_final.xlsx")

In [8]:
df=pd.merge(unique_ticker, data1[["Ticker","Symbol"]], on="Ticker")

In [9]:
df1=df["Symbol"].tolist()
df2=[tick+'.NS' for tick in df1]
df2

['MRO-TEK.NS',
 'BLUESTARCO.NS',
 'ITC.NS',
 'LINDEINDIA.NS',
 'CUMMINSIND.NS',
 'INDHOTEL.NS',
 'ATFL.NS',
 'BALMLAWRIE.NS',
 'INDIANCARD.NS',
 'GILLETTE.NS',
 'EVEREADY.NS',
 'TATACOFFEE.NS',
 'RUCHI.NS',
 'BLUEDART.NS',
 'M&M.NS',
 'FOSECOIND.NS',
 'INGERRAND.NS',
 'GODREJCP.NS',
 'ZENSARTECH.NS',
 'ACCELYA.NS',
 'KOTHARIPRO.NS',
 'SINTEX.NS',
 'SREINFRA.NS',
 'SEAMECLTD.NS',
 'JBCHEPHARM.NS',
 'BSL.NS',
 'SAKHTISUG.NS',
 'ARCHIES.NS',
 'ZODIACLOTH.NS',
 'JAYSREETEA.NS',
 'SESHAPAPER.NS',
 'BSOFT.NS',
 '3MINDIA.NS',
 'BIRLACABLE.NS',
 'NRBBEARING.NS',
 'KSB.NS',
 'KAJARIACER.NS',
 'KAKATCEM.NS',
 'BATAINDIA.NS',
 'BAJFINANCE.NS',
 'BRITANNIA.NS',
 'LT.NS',
 'SBIN.NS',
 'TIPSINDLTD.NS',
 'HINDALCO.NS',
 'PNBGILTS.NS',
 'NXTDIGITAL.NS',
 'TATAMOTORS.NS',
 'CENTURYTEX.NS',
 'BAJAJHLDNG.NS',
 'CANFINHOME.NS',
 'GESHIP.NS',
 'GRAPHITE.NS',
 'INDIACEM.NS',
 'EIHOTEL.NS',
 'BIRLACORPN.NS',
 'RAYMOND.NS',
 'CEATLTD.NS',
 'MRF.NS',
 'SCI.NS',
 'NLCINDIA.NS',
 'HCC.NS',
 'DABUR.NS',
 'LICHSGF

In [9]:
%%time
df3=get_daily_ohlcv(df2)
df3

- GDL.NS: No data found, symbol may be delisted
'Dividends'
Wall time: 11min 9s


[           Date       Open       High        Low      Close  Volume  \
 0    2002-08-12  12.465753  12.465753  12.248326  12.320802   21196   
 1    2002-08-13  12.212089  12.538227  12.212089  12.320802   10678   
 2    2002-08-14  12.248325  12.357037  12.103375  12.175849   11222   
 3    2002-08-15  12.175849  12.175849  12.175849  12.175849       0   
 4    2002-08-16  12.393276  12.393276  11.958424  12.030900   14615   
 ...         ...        ...        ...        ...        ...     ...   
 4874 2022-03-30  53.000000  53.950001  50.799999  52.549999    5985   
 4875 2022-03-31  51.250000  52.799999  50.000000  50.400002    5710   
 4876 2022-04-01  52.450001  53.950001  50.000000  51.250000   13364   
 4877 2022-04-04  52.900002  56.349998  52.849998  56.349998   25896   
 4878 2022-04-05  59.000000  59.000000  52.950001  53.450001   18975   
 
           Symbol  
 0     MRO-TEK.NS  
 1     MRO-TEK.NS  
 2     MRO-TEK.NS  
 3     MRO-TEK.NS  
 4     MRO-TEK.NS  
 ...          

In [26]:
def aggregate(df, frequency):
    df.reset_index(inplace=True)
    df['days'] = 1
    weekly_df = df.resample(frequency, on='Date').agg({'Open':'first','High':'max', 'Low':'min','Close':'last','Volume':'sum'})
    weekly_df['Symbol'] = df['Symbol'].iloc[0]
    return weekly_df

In [34]:
import pandas as pd
import glob

path = r'C:\Users\arushi.roy\Desktop\work1\Task2\Data_800stocks' # use your path
all_files = glob.glob(path + "/*.csv")

li = list()

for filename in all_files:
    df = pd.read_csv(filename)
    df['Date'] = pd.to_datetime(df['Date'])
    df=df.set_index(['Date'])
    weekly_df = aggregate(df, 'W-Fri')
    weekly_df.to_csv("Data_weekly_800stocks/{}".format(filename.split('\\')[-1]),index=True)
    li.append(weekly_df)
    

frame = pd.concat(li, axis=0)

In [28]:
frame

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Symbol
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2005-04-22,520.664895,520.664895,423.591779,432.857849,3304177.0,3IINFOLTD.NS
2005-04-29,436.387824,449.007307,407.530609,412.119507,4757708.0,3IINFOLTD.NS
2005-05-06,450.066238,450.066238,409.030847,417.855652,1349423.0,3IINFOLTD.NS
2005-05-13,423.591797,425.356762,412.560766,415.031708,524773.0,3IINFOLTD.NS
2005-05-20,415.649457,451.389990,415.472949,445.036133,3099979.0,3IINFOLTD.NS
...,...,...,...,...,...,...
2022-03-11,1522.000000,1577.949951,1471.000000,1522.599976,206468.0,ZYDUSWELL.NS
2022-03-18,1535.599976,1548.750000,1481.050049,1497.599976,155121.0,ZYDUSWELL.NS
2022-03-25,1492.000000,1527.550049,1460.000000,1475.150024,437235.0,ZYDUSWELL.NS
2022-04-01,1475.150024,1565.000000,1442.050049,1559.599976,248758.0,ZYDUSWELL.NS
