## Market price prediction using Technical Indicators 

In [88]:
import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
import pandas as pd
import matplotlib.pyplot as plt 
import statsmodels.formula.api as sm
from sklearn.model_selection import train_test_split
import numpy as np
from statsmodels.stats.stattools import durbin_watson
from statsmodels.regression.linear_model import OLS

In [61]:
dataset=pd.read_csv("../Datasets/BSE.csv",
                    usecols=[ 'open', 'high', 'low',  
        'volume', 'SMA', 'EMA','WMA', 'DEMA', 'TEMA', 'TRIMA', 'KAMA', 
        'MAMA-FAMA', 'MAMA-MAMA', 'T3','MACD-MACD', 'MACD-MACD_Hist',
        'MACD-MACD_Signal', 'MACDEXT-MACD','MACDEXT-MACD_Hist', 'MACDEXT-MACD_Signal',
        'SlowD', 'SlowK','STOCHF-FastD', 'STOCKF-FastK', 'RSI', 'STOCHRSI-FastD',
       'STOCHRSI-FastK', 'WILLR', 'ADX', 'ADXR', 'APO', 'PPO']
                   )
dataset.head()

Unnamed: 0,open,high,low,volume,SMA,EMA,WMA,DEMA,TEMA,TRIMA,...,STOCHF-FastD,STOCKF-FastK,RSI,STOCHRSI-FastD,STOCHRSI-FastK,WILLR,ADX,ADXR,APO,PPO
0,36304.4297,36389.2188,36082.9688,23300,35600.2137,35626.1063,35688.0792,35939.9651,36045.0672,35488.1156,...,89.8132,86.9458,68.7552,100.0,100.0,-13.0542,18.9468,19.0408,535.907,1.531
1,35997.2891,36253.8516,35946.2383,40500,35484.3457,35475.3678,35538.973,35708.2344,35755.7004,35453.6277,...,85.4471,93.8546,64.6462,80.2607,100.0,-6.1454,16.8818,18.0749,505.4852,1.4478
2,35635.5195,35822.1602,35605.3398,27500,35417.6309,35359.3853,35433.5806,35528.0176,35521.7972,35447.7939,...,70.7071,88.6392,58.9192,46.9273,100.0,-11.3608,15.0797,17.6229,496.6389,1.4246
3,35394.7695,35555.1602,35262.9688,19100,35338.698,35298.0221,35379.6131,35442.7651,35411.2731,35429.2964,...,42.7894,73.8474,54.5052,13.594,40.782,-26.6597,14.9778,18.2013,506.4294,1.4544
4,35118.0898,35397.2383,34896.0703,19200,35327.9699,35276.5227,35367.4677,35431.9313,35404.1068,35395.6194,...,22.294,49.6347,48.817,12.1071,0.0,-40.5319,16.4131,20.0469,492.9727,1.4155


In [62]:
dataset.shape

(2695, 32)

In [63]:
X=dataset.iloc[:,:].values
Y= pd.read_csv("../Datasets/BSE.csv",
               usecols=['close']).iloc[:,:].values
# Now that we have set our X and Y 

### The Durbin Watson autocorrelation testing 

Brief notes on Durbin watson test. **Source**: [Wikipedia article on Durbin Watson](https://en.wikipedia.org/wiki/Durbin%E2%80%93Watson_statistic)

\begin{equation}
\frac{\sum_{t=2}^T((e_t - e_{t-1})^2))}{\sum_{t=1}^Te_t^2}
\end{equation}

**Here** 
**T**- Number of observations where e_t is the residual associated with the observation at time t with respect to an OLS regression. 

Durbin watson measures similarity of data over time series t
The Hypotheses for the Durbin Watson test are:
H0 = no first order autocorrelation.
H1 = first order correlation exists.

The Durbin Watson test reports a test statistic, with a value from 0 to 4, where:

   1. 2- is no autocorrelation.
   2. 0 to <2 is positive autocorrelation (common in time series data).
   3. \>2 to 4 is negative autocorrelation (less common in time series data).


In [64]:
import math
def average(x):
    assert len(x) > 0
    return float(sum(x)) / len(x)
def pearson_def(x, y):
    assert len(x) == len(y)
    n = len(x)
    assert n > 0
    avg_x = average(x)
    avg_y = average(y)
    diffprod = 0
    xdiff2 = 0
    ydiff2 = 0
    for idx in range(n):
        xdiff = x[idx] - avg_x
        ydiff = y[idx] - avg_y
        diffprod += xdiff * ydiff
        xdiff2 += xdiff * xdiff
        ydiff2 += ydiff * ydiff
    return diffprod / math.sqrt(xdiff2 * ydiff2)

In [65]:
# Everything vs closing prices Pearson correlation test
a= dataset.columns 
closingpearson= list()
for i in range(0,32):
    print(a[i]+" vs closing ---> "+str(pearson_def(X[:,i], Y[:,0])))
    closingpearson.append(pearson_def(X[:,i], Y[:,0]))

open vs closing ---> 0.9995536498595958
high vs closing ---> 0.9997959539123246
low vs closing ---> 0.9998344415913696
volume vs closing ---> -0.21500308287111963
SMA vs closing ---> 0.9705842378194656
EMA vs closing ---> 0.9764887442717711
WMA vs closing ---> 0.9760276451544717
DEMA vs closing ---> 0.9825054983212432
TEMA vs closing ---> 0.9864254649764018
TRIMA vs closing ---> 0.9656792547768334
KAMA vs closing ---> 0.9430966863164165
MAMA-FAMA vs closing ---> 0.9051751767593803
MAMA-MAMA vs closing ---> 0.9436593421446502
T3 vs closing ---> 0.9644817216437319
MACD-MACD vs closing ---> 0.26331058709187316
MACD-MACD_Hist vs closing ---> 0.160244285781585
MACD-MACD_Signal vs closing ---> 0.2208518232179358
MACDEXT-MACD vs closing ---> 0.15087348703680958
MACDEXT-MACD_Hist vs closing ---> 0.0619415891661173
MACDEXT-MACD_Signal vs closing ---> 0.1257721100594846
SlowD vs closing ---> 0.07972685241512724
SlowK vs closing ---> 0.08438360809596918
STOCHF-FastD vs closing ---> 0.084383608095

In [67]:
a=dataset.columns 
for i in range(0,32):
    for j in range(i+1,32):
        print(a[i]+" vs "+a[j]+"---->"+str(pearson_def(X[:,i],X[:,j])))

open vs high---->0.9998489236771398
open vs low---->0.9997425057790835
open vs volume---->-0.21403185593022903
open vs SMA---->0.971055084179541
open vs EMA---->0.9769924947688328
open vs WMA---->0.9765025463620001
open vs DEMA---->0.9829687900952694
open vs TEMA---->0.9868842027656654
open vs TRIMA---->0.9661418803161927
open vs KAMA---->0.9434855845171758
open vs MAMA-FAMA---->0.9052395600208177
open vs MAMA-MAMA---->0.9440987544120555
open vs T3---->0.9649901411663201
open vs MACD-MACD---->0.26293384621538446
open vs MACD-MACD_Hist---->0.15966884722986052
open vs MACD-MACD_Signal---->0.2207364940051338
open vs MACDEXT-MACD---->0.15008545994941846
open vs MACDEXT-MACD_Hist---->0.06100199787112238
open vs MACDEXT-MACD_Signal---->0.12554237124198833
open vs SlowD---->0.0774949819472908
open vs SlowK---->0.07723599414287173
open vs STOCHF-FastD---->0.07723599414287173
open vs STOCKF-FastK---->0.07537922393942449
open vs RSI---->0.27122858687949486
open vs STOCHRSI-FastD---->0.0452443999

TEMA vs MAMA-MAMA---->0.9610198257788937
TEMA vs T3---->0.9853285514479548
TEMA vs MACD-MACD---->0.26650235104454734
TEMA vs MACD-MACD_Hist---->0.12358673711220998
TEMA vs MACD-MACD_Signal---->0.24590436527421944
TEMA vs MACDEXT-MACD---->0.16737103261622707
TEMA vs MACDEXT-MACD_Hist---->0.07322237685640227
TEMA vs MACDEXT-MACD_Signal---->0.13639926383638404
TEMA vs SlowD---->0.07111388489524752
TEMA vs SlowK---->0.05483392916370099
TEMA vs STOCHF-FastD---->0.05483392916370099
TEMA vs STOCKF-FastK---->0.03154263713961058
TEMA vs RSI---->0.2356271061456948
TEMA vs STOCHRSI-FastD---->0.026270622785866535
TEMA vs STOCHRSI-FastK---->0.011675128975488289
TEMA vs WILLR---->0.07293246264386571
TEMA vs ADX---->0.12860579735152022
TEMA vs ADXR---->0.149178743559401
TEMA vs APO---->0.16737103261622707
TEMA vs PPO---->0.14257519649585407
TRIMA vs KAMA---->0.9713221613795778
TRIMA vs MAMA-FAMA---->0.9300767191984058
TRIMA vs MAMA-MAMA---->0.9718295304319768
TRIMA vs T3---->0.9987597804264446
TRIMA 

MACDEXT-MACD_Signal vs STOCHRSI-FastD---->-0.12334174057253243
MACDEXT-MACD_Signal vs STOCHRSI-FastK---->-0.10489371152907567
MACDEXT-MACD_Signal vs WILLR---->-0.044363404329331606
MACDEXT-MACD_Signal vs ADX---->-0.09192467355625385
MACDEXT-MACD_Signal vs ADXR---->-0.15052363194061696
MACDEXT-MACD_Signal vs APO---->0.7912856751696964
MACDEXT-MACD_Signal vs PPO---->0.7544929554101066
SlowD vs SlowK---->0.8886067984756486
SlowD vs STOCHF-FastD---->0.8886067984756486
SlowD vs STOCKF-FastK---->0.5336481534802848
SlowD vs RSI---->0.6662141317861848
SlowD vs STOCHRSI-FastD---->0.8416350090468291
SlowD vs STOCHRSI-FastK---->0.5205073725963952
SlowD vs WILLR---->0.7506359316759045
SlowD vs ADX---->-0.029043321959699137
SlowD vs ADXR---->-0.018112851583417924
SlowD vs APO---->0.10461623784505165
SlowD vs PPO---->0.10696775086110369
SlowK vs STOCHF-FastD---->1.0
SlowK vs STOCKF-FastK---->0.7973821436767013
SlowK vs RSI---->0.63057220139766
SlowK vs STOCHRSI-FastD---->0.8623068537379951
SlowK vs 

In [None]:
pearsonmatrix= np.empty([32, 32])
for i in range(0,32):
    for j in range(0,32):
        pearsonmatrix[i][j]=pearson_def(X[:,i],X[:,j])

In [68]:
pearsonmatrix

array([[ 1.        ,  0.99984892,  0.99974251, ...,  0.14700858,
         0.15008546,  0.12676591],
       [ 0.99984892,  1.        ,  0.99974922, ...,  0.14793185,
         0.14964603,  0.12618845],
       [ 0.99974251,  0.99974922,  1.        , ...,  0.14618918,
         0.15206395,  0.12859993],
       ...,
       [ 0.14700858,  0.14793185,  0.14618918, ...,  1.        ,
        -0.15841157, -0.17458039],
       [ 0.15008546,  0.14964603,  0.15206395, ..., -0.15841157,
         1.        ,  0.94649618],
       [ 0.12676591,  0.12618845,  0.12859993, ..., -0.17458039,
         0.94649618,  1.        ]])

In [92]:
durbin_watson(X[:,14])

0.14620311093339552