In [1]:
%config Completer.use_jedi = False

In [2]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import metrics

In [3]:
file='NSEI.csv'

In [4]:
df = pd.read_csv(file,index_col="Date",parse_dates=True)
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-04-01,5249.200195,5298.600098,5249.200195,5290.5,5290.5,0.0
2010-04-05,5291.399902,5377.549805,5291.399902,5368.399902,5368.399902,0.0
2010-04-06,5369.649902,5388.649902,5351.700195,5366.0,5366.0,0.0
2010-04-07,5365.700195,5399.649902,5345.049805,5374.649902,5374.649902,0.0
2010-04-08,5376.299805,5383.649902,5290.25,5304.450195,5304.450195,0.0


In [5]:
df = df.dropna(how='any',axis=0) 

In [6]:
def movement(close):
    if movement.close is None:
        movement.close = close
    diff = close-movement.close
    movement.close = close
    return 1 if diff>0 else -1

movement.close = None

In [7]:
df['trend'] = df.apply(lambda x:movement(x['Close']),axis=1)

In [8]:
year_list =[]
for year in range(2010,2021):
    dec = df[df['trend']==-1][str(year)+'-04-01':str(year+1)+'-03-31']['Close'].count()
    inc = df[df['trend']==1][str(year)+'-04-01':str(year+1)+'-03-31']['Close'].count()
    inc_percent = inc/(inc+dec)*100
    inc_percent = round(inc_percent,2)
    year_list.append({
        'year':year,
        'increase':inc,
        'inc%':inc_percent,
        'decrease':dec,
        'dec%':100-inc_percent
    })

In [9]:
summary_df = pd.DataFrame(year_list)
summary_df.set_index('year',inplace=True)
summary_df.loc['Total']= summary_df.sum()
summary_df.loc['Total']['inc%'] = np.NAN
summary_df.loc['Total']['dec%'] = np.NAN
summary_df

Unnamed: 0_level_0,increase,inc%,decrease,dec%
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010,132.0,52.17,121.0,47.83
2011,110.0,45.45,132.0,54.55
2012,127.0,52.48,115.0,47.52
2013,132.0,53.66,114.0,46.34
2014,131.0,54.58,109.0,45.42
2015,119.0,48.77,125.0,51.23
2016,135.0,54.88,111.0,45.12
2017,129.0,52.65,116.0,47.35
2018,137.0,55.92,108.0,44.08
2019,121.0,49.39,124.0,50.61


In [10]:
def sma(n):
    return np.array(df['Close'].rolling(n).mean())

df['sma_10'] = sma(10)
df['sma_20'] = sma(20)
df['sma_50'] = sma(50)

In [11]:
def wma(n):
    arr = []
    for i in range(n-1):
        arr.append(np.NAN)
    weights = np.arange(1,n+1)
    for i in range(n-1,df.Close.count()):
        window = np.array(df.iloc[i-n+1:i+1]['Close'])
        acc = window*weights
#         print(window,weights)
        arr.append(acc.sum()/n/(n+1)*2)
    return np.array(arr)

df['wma_5'] = wma(5)

In [12]:
def momentum(n):
    arr = []
    for i in range(n-1):
        arr.append(np.NAN)
    for i in range(n-1,df.Close.count()):
        arr.append(df.iloc[i]['Close']-df.iloc[i-n+1]['Close'])
    return np.array(arr)

df['momentum_10'] = momentum(10)

In [13]:
# The Williams %R and the Fast Stochastic Oscillator end up being almost the exact same indicator.
# The only difference between the two is how the indicators are scaled

# def williams_r(n):
#     arr = []
#     for i in range(n-1):
#         arr.append(np.NAN)
#     for i in range(n-1,df.Close.count()):
#         l = df.iloc[i-n+1:i]['Close'].min()
#         h = df.iloc[i-n+1:i]['Close'].max()
#         c = df.iloc[i-1]['Close']
#         k = (h-c)/(h-l)*-100
#         arr.append(k)
#     return np.array(arr)

# df['williams_r'] = williams_r(15)

In [14]:
def stck(n):
    arr = []
    for i in range(n-1):
        arr.append(np.NAN)
    for i in range(n-1,df.Close.count()):
        l = df.iloc[i-n+1:i+1]['Close'].min()
        h = df.iloc[i-n+1:i+1]['Close'].max()
        c = df.iloc[i]['Close']
        k = (c-l)/(h-l)*100
        arr.append(k)
    return np.array(arr)

df['stck'] = stck(14)

In [15]:
def stcd(n):
    return np.array(df['stck'].rolling(n).mean())

df['stcd'] = stcd(3)

In [16]:
def cci(n):
    arr = []
    for i in range(n-1):
        arr.append(np.NAN)
    for i in range(n-1,df.Close.count()):
        h = np.array(df.iloc[i-n+1:i+1]['High'])
        l = np.array(df.iloc[i-n+1:i+1]['Low'])
        c = np.array(df.iloc[i-n+1:i+1]['Close'])
        m = (h+l+c)/3
        d = abs(m-m.mean())
        _cci = (m[-1]-m.mean())/0.015/d.mean()
        arr.append(_cci)
    return np.array(arr)

df['cci_14'] = cci(14)
df['cci_21'] = cci(21)
df['cci_50'] = cci(50)

In [17]:
n = 14
def rma(x, n, y0):
    a = (n-1) / n
    ak = a**np.arange(len(x)-1, -1, -1)
    return np.r_[np.full(n, np.nan), y0, np.cumsum(ak * x) / ak / n + y0 * a**np.arange(1, len(x)+1)]

df['change'] = df['Close'].diff()
df['gain'] = df.change.mask(df.change < 0, 0.0)
df['loss'] = -df.change.mask(df.change > 0, -0.0)
df['avg_gain'] = rma(df.gain[n+1:].to_numpy(), n, np.nansum(df.gain.to_numpy()[:n+1])/n)
df['avg_loss'] = rma(df.loss[n+1:].to_numpy(), n, np.nansum(df.loss.to_numpy()[:n+1])/n)
df['rs'] = df.avg_gain / df.avg_loss
df['rsi_14'] = 100 - (100 / (1 + df.rs))

In [18]:
def ema(n):
    return np.array(df['Close'].ewm(span=n,adjust=False,ignore_na=True).mean())

df['ema_20'] = ema(20)
df['ema_50'] = ema(50)

macd = ema(12) - ema(26)
df['macd'] = macd
exp = df['macd'].ewm(span=n,adjust=False,ignore_na=True).mean()
df['exp'] = exp

In [19]:
indicators = df[['sma_10','wma_5','momentum_10','stck','stcd','rsi_14','ema_20','ema_50','macd','exp','cci_14','cci_21','cci_50','trend']]
indicators

Unnamed: 0_level_0,sma_10,wma_5,momentum_10,stck,stcd,rsi_14,ema_20,ema_50,macd,exp,cci_14,cci_21,cci_50,trend
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2010-04-01,,,,,,,5290.500000,5290.500000,0.000000,0.000000,,,,-1
2010-04-05,,,,,,,5297.919038,5293.554898,6.214237,0.828565,,,,1
2010-04-06,,,,,,,5304.402939,5296.395883,10.820679,2.160847,,,,-1
2010-04-07,,,,,,,5311.093126,5299.464668,14.996422,3.872257,,,,1
2010-04-08,,5343.076692,,,,,5310.460466,5299.660178,12.497140,5.022241,,,,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-23,14871.840039,14739.223372,-283.650391,37.353746,30.127302,49.291263,14860.747498,14626.442215,-4.875541,70.515964,-53.684525,-38.239461,6.963110,1
2021-03-24,14816.940039,14684.070182,-625.399414,0.000000,21.105110,42.848913,14831.095392,14623.420967,-27.671392,57.424317,-111.351773,-110.649706,-37.485726,-1
2021-03-25,14731.950098,14565.543620,-706.049804,0.000000,12.451249,38.289239,14782.886345,14611.714278,-63.124877,41.351091,-161.479707,-177.035298,-88.105775,-1
2021-03-26,14679.585059,14523.346810,-422.200195,21.461294,7.153765,43.545582,14756.640008,14607.619592,-75.632073,25.753336,-104.071631,-119.227234,-62.271734,1


In [20]:
indicators.count()

sma_10         2685
wma_5          2690
momentum_10    2685
stck           2681
stcd           2679
rsi_14         2680
ema_20         2694
ema_50         2694
macd           2694
exp            2694
cci_14         2681
cci_21         2674
cci_50         2645
trend          2694
dtype: int64

In [21]:
indicators = indicators.dropna(how='any',axis=0) 

In [22]:
indicators.count()

sma_10         2645
wma_5          2645
momentum_10    2645
stck           2645
stcd           2645
rsi_14         2645
ema_20         2645
ema_50         2645
macd           2645
exp            2645
cci_14         2645
cci_21         2645
cci_50         2645
trend          2645
dtype: int64

In [23]:
indicators.drop(columns='trend').describe()

Unnamed: 0,sma_10,wma_5,momentum_10,stck,stcd,rsi_14,ema_20,ema_50,macd,exp,cci_14,cci_21,cci_50
count,2645.0,2645.0,2645.0,2645.0,2645.0,2645.0,2645.0,2645.0,2645.0,2645.0,2645.0,2645.0,2645.0
mean,8321.94845,8333.409202,32.657335,59.415354,59.425484,54.154687,8303.34791,8250.264506,25.765568,25.619369,19.350198,23.230316,34.897643
std,2485.256663,2494.077298,296.673529,37.561276,34.944314,12.488198,2465.464784,2413.729929,117.551862,106.515835,106.0686,108.655335,112.935668
min,4667.675,4625.610124,-3005.549805,0.0,0.0,12.941799,4753.103289,4855.949583,-1005.83746,-772.093159,-314.380437,-377.993237,-403.735743
25%,5897.95498,5900.506575,-106.15039,23.057501,26.102853,45.467887,5874.685784,5851.103895,-28.021717,-25.611419,-69.980268,-63.178499,-44.224531
50%,8199.395117,8205.479948,46.550293,69.235359,68.922283,55.026519,8206.877023,8242.003133,35.644696,35.394475,39.428904,44.844699,60.575528
75%,10475.695117,10501.906641,187.450195,97.589681,92.857498,63.412394,10492.635738,10490.967441,86.598831,78.184574,102.976288,106.321424,119.651333
max,15160.59502,15251.270052,1538.700196,100.0,100.0,82.169304,14957.464768,14626.442215,357.070016,324.517714,360.65071,352.533134,344.544991


In [24]:
indicators.to_csv('indicators/'+file)