In [1]:
import os
import datetime
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import yfinance as yf
import stockstats
from stockstats import StockDataFrame

import warnings
warnings.filterwarnings('ignore')

In [2]:
data_path = 'intraday_data/'+str(datetime.date.today() + datetime.timedelta(-2))
if not os.path.exists(data_path):
    os.makedirs(data_path)

In [3]:
def DownloadStocksData(tic, interval='15m', cache=True):
    file_path = f'{data_path}/{tic}.csv'
    if cache and os.path.exists(file_path):
        df = pd.read_csv(file_path, index_col='Datetime')
        df.index = df.index.map(datetime.datetime.fromisoformat)
        return df
    
    end_date = datetime.date.today()
    start_date = end_date + datetime.timedelta(-59)
    df = yf.download(tic, start=start_date, end=end_date, group_by='ticker', interval=interval)
    df.to_csv(file_path)
    return df

In [4]:
def prepareMetrics(df):
    feature = df[['Close', 'Volume']]
    feature['day'] = feature.index.map(lambda x: x.date)
    feature = feature.groupby(by='day')
    return pd.concat([feature['Close'].apply(list), feature['Volume'].apply(list)], axis=1)

# def Normalize(x):
#     x = np.array(x)
#     x = (x - x.mean()) / x.std()
#     return x

In [5]:
nifty50 = pd.read_csv('nifty50.csv')
stock_list = nifty50['Symbol'][1:]
for x in stock_list:
    DownloadStocksData(x+'.NS')

In [18]:
pivot_hour = 14

scores = []
for stock in stock_list:
    df = DownloadStocksData(stock+'.NS', cache=True) 
    featureDf = df[df.index.hour < pivot_hour]
    targetDf = df[df.index.hour >= pivot_hour]

    features = prepareMetrics(featureDf)
    features['LastPrice'] = features['Close'].apply(lambda x: x[-1])
    
    targets = prepareMetrics(targetDf)
    del targets['Volume']
    targets['Close'] = targets['Close'].apply(max)
    
    ratios = (targets['Close'] - features['LastPrice']) / features['LastPrice']
    ratios = ratios * 100
    
    scores.append({ 
        'name': stock, 
        'volatility': 100 * sum(abs(ratios)>=1.) / len(ratios), 
        'above0.5%': 100 * sum(ratios>0.5) / len(ratios),
        'loss': 100 * sum(ratios<0) / len(ratios)
    })
    
sorted(scores, key=lambda x: -x['volatility'])

[{'name': 'ADANIPORTS',
  'volatility': 25.641025641025642,
  'above0.5%': 43.58974358974359,
  'loss': 25.641025641025642},
 {'name': 'TATASTEEL',
  'volatility': 20.512820512820515,
  'above0.5%': 35.8974358974359,
  'loss': 28.205128205128204},
 {'name': 'IOC',
  'volatility': 17.94871794871795,
  'above0.5%': 41.02564102564103,
  'loss': 15.384615384615385},
 {'name': 'HINDALCO',
  'volatility': 17.94871794871795,
  'above0.5%': 35.8974358974359,
  'loss': 23.076923076923077},
 {'name': 'JSWSTEEL',
  'volatility': 15.384615384615385,
  'above0.5%': 23.076923076923077,
  'loss': 28.205128205128204},
 {'name': 'ONGC',
  'volatility': 15.384615384615385,
  'above0.5%': 30.76923076923077,
  'loss': 30.76923076923077},
 {'name': 'EICHERMOT',
  'volatility': 15.384615384615385,
  'above0.5%': 33.333333333333336,
  'loss': 28.205128205128204},
 {'name': 'NTPC',
  'volatility': 15.384615384615385,
  'above0.5%': 33.333333333333336,
  'loss': 20.512820512820515},
 {'name': 'COALINDIA',
  'v

In [7]:
df = DownloadStocksData('TATACONSUM.NS', cache=True) 
featureDf = df[df.index.hour < pivot_hour]
targetDf = df[df.index.hour >= pivot_hour]

features = prepareMetrics(featureDf)
features['LastPrice'] = features['Close'].apply(lambda x: x[-1])

targets = prepareMetrics(targetDf)
del targets['Volume']
targets['Close'] = targets['Close'].apply(max)

ratios = (targets['Close'] - features['LastPrice']) / features['LastPrice']
ratios = ratios * 100

In [8]:
ratios

day
2022-04-08    1.311318
2022-04-11    1.037511
2022-04-12    0.741198
2022-04-13    0.970273
2022-04-18    1.491614
2022-04-19    0.285139
2022-04-20    1.003239
2022-04-21    1.615905
2022-04-22    1.234568
2022-04-25    0.875592
2022-04-26    1.167793
2022-04-27    0.099236
2022-04-28    1.963473
2022-04-29    0.439762
2022-05-02    1.673946
2022-05-04    0.268033
2022-05-05    0.083046
2022-05-06    1.094751
2022-05-09    1.116324
2022-05-10    0.519116
2022-05-11    0.958963
2022-05-12    1.327124
2022-05-13    0.897785
2022-05-16    0.448185
2022-05-17    1.027397
2022-05-18    1.380981
2022-05-19    0.102543
2022-05-20    0.849416
2022-05-23    0.087811
2022-05-24    0.111109
2022-05-25    0.748615
2022-05-26    2.292184
2022-05-27    0.586895
2022-05-30    1.205227
2022-05-31    1.455277
2022-06-01    0.479773
2022-06-02    0.911091
2022-06-03    0.594256
dtype: float64

In [10]:
features

Unnamed: 0_level_0,Close,Volume,DayNo
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-04-07,"[2600.64990234375, 2592.10009765625, 2593.5500...","[230703, 378118, 187267, 228971, 154787, 28901...",7
2022-04-08,"[2579.39990234375, 2589.89990234375, 2594.8000...","[178744, 303806, 360256, 159839, 190480, 13317...",8
2022-04-11,"[2592.550048828125, 2588.35009765625, 2590.0, ...","[277726, 277840, 207251, 144034, 71487, 78098,...",11
2022-04-12,"[2591.60009765625, 2584.39990234375, 2573.0500...","[181799, 337618, 232696, 307430, 190392, 18958...",12
2022-04-13,"[2581.800048828125, 2584.75, 2579.050048828125...","[506305, 313464, 232661, 183496, 154940, 99860...",13
2022-04-18,"[2545.050048828125, 2545.14990234375, 2538.0, ...","[192272, 286002, 161990, 191353, 149730, 13657...",18
2022-04-19,"[2563.699951171875, 2570.0, 2582.800048828125,...","[210464, 494755, 492018, 556828, 412126, 47019...",19
2022-04-20,"[2709.39990234375, 2717.949951171875, 2722.699...","[1097092, 1410121, 966842, 404396, 285426, 258...",20
2022-04-21,"[2765.199951171875, 2767.5, 2764.0, 2768.60009...","[925698, 819792, 551576, 235983, 338333, 19472...",21
2022-04-22,"[2757.64990234375, 2752.800048828125, 2757.300...","[274052, 199353, 197968, 127364, 107540, 15034...",22


In [2]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split