In [1]:
import yfinance as yf
import math
import pandas as pd
import numpy as np
import datetime
from IPython.display import clear_output
import time


In [2]:
class StockFeatures:



    def __init__(self, ticker_symbol):
        self.ticker_symbol = ticker_symbol
        self.data = None

    def average_return(self,interval='1d',period='1y'):
        """
        Calculates the average log return based on the specified frequency.
        """
        data = yf.download(self.ticker_symbol,interval=interval,period=period)
        data['Log_Return'] = np.log(data['Close'] / data['Close'].shift(1))

        self.data = data
        mean = data["Log_Return"].mean()
        return mean*100

    def volatility(self, frequency='daily'):
        sd = self.data.Log_Return.std()
        return sd*100
    


In [3]:
stocks_list = pd.read_csv('EQUITY_L.csv')
symbols = list(stocks_list.SYMBOL)


In [4]:
stocks_list

Unnamed: 0,SYMBOL,NAME OF COMPANY,SERIES,DATE OF LISTING,PAID UP VALUE,MARKET LOT,ISIN NUMBER,FACE VALUE
0,20MICRONS,20 Microns Limited,EQ,06-OCT-2008,5,1,INE144J01027,5
1,21STCENMGM,21st Century Management Services Limited,BE,03-MAY-1995,10,1,INE253B01015,10
2,360ONE,360 ONE WAM LIMITED,EQ,19-SEP-2019,1,1,INE466L01038,1
3,3IINFOLTD,3i Infotech Limited,EQ,22-OCT-2021,10,1,INE748C01038,10
4,3MINDIA,3M India Limited,EQ,13-AUG-2004,10,1,INE470A01017,10
...,...,...,...,...,...,...,...,...
1965,ZOTA,Zota Health Care LImited,EQ,19-AUG-2019,10,1,INE358U01012,10
1966,ZUARI,Zuari Agro Chemicals Limited,EQ,27-NOV-2012,10,1,INE840M01016,10
1967,ZUARIIND,ZUARI INDUSTRIES LIMITED,EQ,12-APR-1995,10,1,INE217A01012,10
1968,ZYDUSLIFE,Zydus Lifesciences Limited,EQ,18-APR-2000,1,1,INE010B01027,1


In [5]:


def get_stock_vol(stock_symbol_list,interval,period):
  stock_vol_list = []
  error_list = []

  #ignore this. this is just for time calculating
  start_time = time.time()
  

  for stock in stock_symbol_list:
    stock_fe = StockFeatures(f'{stock}.NS')
    #print(f'Data for {stock} for interval - {interval} and period - {period}')
    i = stock_symbol_list.index(stock)

    time_elapsed = time.time() - start_time
    time_per_item = time_elapsed / (i + 1)
    items_remaining = len(stock_symbol_list) - (i + 1)
    time_remaining = items_remaining * time_per_item
    


    try:
      mean = stock_fe.average_return(interval=interval,period=period)
      sd = stock_fe.volatility()
      length = len(stock_fe.data)
     

      stock_vol_list.append({
          'name':stock,
          'avg_return': round(mean,4),
          'avg_volatility': round(sd,4),
          'len':length,
          'interval':interval,
          'period':period


      })


      print('Succesful :- ',f'{round((symbols.index(stock)+1)/len(symbols)*100,2)}% Completed','\n')
      print(f"Time elapsed: {str(datetime.timedelta(seconds=time_elapsed))} seconds")
      print(f"Time remaining: {str(datetime.timedelta(seconds=time_remaining))} seconds")
      clear_output(wait= True)

    except Exception as e:

      error_list.append({
          'name' : stock,
          'error' : e,
          'interval' : interval,
          'period' : period
      })
      print(f'Error in {stock} :- {round((symbols.index(stock)+1)/len(symbols)*100,2)}% Completed','\n')
      print(f"Time elapsed: {str(datetime.timedelta(seconds=time_elapsed))} seconds")
      print(f"Time remaining: {str(datetime.timedelta(seconds=time_remaining))} seconds")

  print(stock_vol_list)

  data = pd.DataFrame(stock_vol_list)
  error_data = pd.DataFrame(error_list)
  data.to_csv(f'stocks_sd_{interval}_{period}.csv',index=False)
  data.to_csv(f'error_stocks_sd_{interval}_{period}.csv',index=False) if not error_data.empty else print("No Error Found")

  return data,error_data

In [None]:
data,error = get_stock_vol(symbols,interval='1d',period='1y')

In [7]:
data_5m,error_5m = get_stock_vol(symbols,interval='1m',period='7d')

[{'name': '20MICRONS', 'avg_return': 0.0, 'avg_volatility': 0.2106, 'len': 1393, 'interval': '1m', 'period': '7d'}, {'name': '21STCENMGM', 'avg_return': 0.2224, 'avg_volatility': 0.6218, 'len': 53, 'interval': '1m', 'period': '7d'}, {'name': '360ONE', 'avg_return': 0.0027, 'avg_volatility': 0.1592, 'len': 2544, 'interval': '1m', 'period': '7d'}, {'name': '3IINFOLTD', 'avg_return': -0.001, 'avg_volatility': 0.1563, 'len': 2408, 'interval': '1m', 'period': '7d'}, {'name': '3MINDIA', 'avg_return': -0.0012, 'avg_volatility': 0.1065, 'len': 1771, 'interval': '1m', 'period': '7d'}, {'name': '3PLAND', 'avg_return': 0.0108, 'avg_volatility': 1.5886, 'len': 396, 'interval': '1m', 'period': '7d'}, {'name': '5PAISA', 'avg_return': 0.0013, 'avg_volatility': 0.1749, 'len': 2333, 'interval': '1m', 'period': '7d'}, {'name': '63MOONS', 'avg_return': -0.005, 'avg_volatility': 0.2117, 'len': 2049, 'interval': '1m', 'period': '7d'}, {'name': 'A2ZINFRA', 'avg_return': -0.0108, 'avg_volatility': 0.9895, 'l

In [10]:
sd_1d = pd.read_csv('stocks_sd_1d_1y.csv')
sd_1m = pd.read_csv('stocks_sd_1m_7d.csv')

In [17]:
sd_1d = sd_1d[sd_1d.len > 200].sort_values('avg_volatility',ascending=False).reset_index()
sd_1m = sd_1m[sd_1m.len > 2000].sort_values('avg_volatility',ascending=False).reset_index()

In [43]:

sd_1m = sd_1m[['name','avg_return','avg_volatility','len','interval','period',' SERIES_x']]


In [46]:
sd_1m = sd_1m[sd_1m[' SERIES_x']=='EQ']
sd_1m

Unnamed: 0,name,avg_return,avg_volatility,len,interval,period,SERIES_x
2,SHRENIK,0.0000,3.0202,2321,1m,7d,EQ
5,KBCGLOBAL,0.0047,1.4236,2261,1m,7d,EQ
7,GTLINFRA,0.0000,1.3160,2615,1m,7d,EQ
8,SUNDARAM,0.0054,1.0271,2280,1m,7d,EQ
9,SHAH,0.0031,0.8380,2231,1m,7d,EQ
...,...,...,...,...,...,...,...
991,ITC,0.0006,0.0544,2621,1m,7d,EQ
992,ICICIBANK,0.0009,0.0542,2622,1m,7d,EQ
993,ROUTE,-0.0002,0.0539,2488,1m,7d,EQ
994,RELIANCE,-0.0003,0.0538,2622,1m,7d,EQ


In [49]:
sd_1d = pd.merge(sd_1d,stocks_list,left_on='name',right_on='SYMBOL',how='inner')

In [54]:
sd_1d = sd_1d[['name','avg_return','avg_volatility','len','interval','period',' SERIES']]
sd_1d = sd_1d[sd_1d[' SERIES'] == 'EQ']


Unnamed: 0,name,avg_return,avg_volatility,len,interval,period,SERIES
6,MANORAMA,-0.3304,10.3458,245,1d,1y,EQ
7,NIITLTD,-0.4993,9.6912,246,1d,1y,EQ
8,CGCL,-0.4182,9.3350,246,1d,1y,EQ
16,ACL,-0.0400,4.6121,246,1d,1y,EQ
18,FCSSOFT,0.2688,4.4654,246,1d,1y,EQ
...,...,...,...,...,...,...,...
1841,BRITANNIA,0.0444,1.0891,245,1d,1y,EQ
1842,BATAINDIA,-0.0038,1.0842,246,1d,1y,EQ
1843,ASIANPAINT,0.0064,1.0479,246,1d,1y,EQ
1844,HINDUNILVR,-0.0521,1.0080,246,1d,1y,EQ


In [60]:
sd_1d.reset_index()['index'].drop(columns= ('index'),inplace=True)

In [63]:
sd_1d.reset_index()

Unnamed: 0,index,name,avg_return,avg_volatility,len,interval,period,SERIES
0,6,MANORAMA,-0.3304,10.3458,245,1d,1y,EQ
1,7,NIITLTD,-0.4993,9.6912,246,1d,1y,EQ
2,8,CGCL,-0.4182,9.3350,246,1d,1y,EQ
3,16,ACL,-0.0400,4.6121,246,1d,1y,EQ
4,18,FCSSOFT,0.2688,4.4654,246,1d,1y,EQ
...,...,...,...,...,...,...,...,...
1528,1841,BRITANNIA,0.0444,1.0891,245,1d,1y,EQ
1529,1842,BATAINDIA,-0.0038,1.0842,246,1d,1y,EQ
1530,1843,ASIANPAINT,0.0064,1.0479,246,1d,1y,EQ
1531,1844,HINDUNILVR,-0.0521,1.0080,246,1d,1y,EQ
