In [None]:
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

In [None]:
import math

In [None]:
data = pd.read_csv('BSE_30.csv')

In [None]:
data['Symbol'].value_counts()

AXISBANK      2469
ITC           2469
SUNPHARMA     2469
YESBANK       2469
WIPRO         2469
ADANIPORTS    2469
MARUTI        2469
INFY          2469
HDFC          2469
KOTAKBANK     2469
INDUSINDBK    2469
LT            2469
TATAMOTORS    2469
ONGC          2469
RELIANCE      2469
TCS           2469
TATASTEEL     2469
BHARTIARTL    2469
HEROMOTOCO    2469
HDFCBANK      2469
POWERGRID     2469
ICICIBANK     2469
SBIN          2469
HINDUNILVR    2469
ASIANPAINT    2469
DRREDDY       2469
M&M           2469
BAJAJ-AUTO    2456
TATAMTRDVR    2345
COALINDIA     1852
Name: Symbol, dtype: int64

### Preparing the data

In [None]:
def RSI(series, period):
    delta = series.diff().dropna()
    u = delta * 0
    d = u.copy()
    u[delta > 0] = delta[delta > 0]
    d[delta < 0] = -delta[delta < 0]
    u[u.index[period-1]] = np.mean( u[:period] ) #first value is sum of avg gains
    u = u.drop(u.index[:(period-1)])
    d[d.index[period-1]] = np.mean( d[:period] ) #first value is sum of avg losses
    d = d.drop(d.index[:(period-1)])
    rs = pd.DataFrame.ewm(u, com=period-1, adjust=False).mean()/pd.DataFrame.ewm(d, com=period-1, adjust=False).mean()
    return 100 - (100/(1 + rs))

In [None]:
def prepare_data(my_data, x):
    my_data['RSI'] = RSI(my_data['Close'], 14)
    my_data['MACD'] = pd.DataFrame.ewm(my_data['Close'], span = 12).mean() - pd.DataFrame.ewm(my_data['Close'], span = 26).mean()
    my_data['MA5'] = pd.DataFrame.rolling(my_data['Close'], window = 6).mean()
    my_data['MA12'] = pd.DataFrame.rolling(my_data['Close'], window = 13).mean()
    my_data['MA5_disc'] = (my_data['Close'] - my_data['MA5'])/(my_data['MA5'] - (my_data['Close'])/6)
    my_data['MA12_disc'] = (my_data['Close'] - my_data['MA12'])/(my_data['MA12'] - (my_data['Close'])/13)
    my_data['PROC'] = 100*pd.DataFrame.pct_change(my_data['Close'], periods = 14)
    my_data['SOC'] = 100*(my_data['Close'] - pd.DataFrame.rolling(my_data['Low'], window = 14).min())/(pd.DataFrame.rolling(my_data['High'], window = 14).max() - pd.DataFrame.rolling(my_data['Low'], window = 14).min())
    my_data['Y'] = pd.DataFrame.pct_change(my_data['Close'])
    my_data.loc[my_data['Y']> x,'Y'] = 1
    my_data.loc[my_data['Y']< -1*x,'Y'] = -1
    my_data.loc[abs(my_data['Y']) != 1, 'Y'] = 0
    my_data.loc[my_data['MA5_disc']> 0.014,'MA5_disc'] = 1
    my_data.loc[my_data['MA5_disc']< -1*0.014,'MA5_disc'] = -1
    my_data.loc[abs(my_data['MA5_disc']) != 1, 'MA5_disc'] = 0    
    my_data.loc[my_data['MA12_disc']> 0.018,'MA12_disc'] = 1
    my_data.loc[my_data['MA12_disc']< -1*0.018,'MA12_disc'] = -1
    my_data.loc[abs(my_data['MA12_disc']) != 1, 'MA12_disc'] = 0
    my_data.dropna(inplace = True)
    return my_data[['RSI','MACD', 'MA5', 'MA12','MA5_disc', 'MA12_disc', 'PROC', 'SOC', 'Y']]


In [None]:
my_data = data.loc[data.Symbol == "ADANIPORTS"]

### Using grid search to find the optimum value of x which splits the decision variable equally

In [None]:
counts = pd.DataFrame()
for x in [0,0.2,0.4,0.6,0.8,1.2,1.4,1.6,1.8]:
    temp = prepare_data(my_data, 0.01*x)
    counts['x ='+str(x) + '%'] = temp['Y'].value_counts()/temp['Y'].count()*100  
counts

Unnamed: 0,x =0%,x =0.2%,x =0.4%,x =0.6%,x =0.8%,x =1.2%,x =1.4%,x =1.6%,x =1.8%
-1.0,49.850619,44.611421,40.950324,37.244676,33.624836,26.83678,23.638778,20.935412,18.287763
1.0,49.253094,45.556033,41.987041,37.809648,33.537385,26.924769,24.966799,22.316258,19.811744
0.0,0.896287,9.832546,17.062635,24.945676,32.837779,46.238451,51.394422,56.74833,61.900493


In [None]:
my_data = prepare_data(my_data, 0.008)

### EDA

In [None]:
print(my_data['MA5_disc'].value_counts())
print(my_data['MA12_disc'].value_counts())

 1.0    790
 0.0    785
-1.0    642
Name: MA5_disc, dtype: int64
 1.0    848
 0.0    712
-1.0    657
Name: MA12_disc, dtype: int64


In [None]:
my_data.head()

Unnamed: 0,RSI,MACD,MA5,MA12,MA5_disc,MA12_disc,PROC,SOC,Y
154,43.317337,-0.854014,58.161667,61.605385,1.0,1.0,-5.71345,56.265477,1.0
155,40.395297,-0.652916,59.020001,61.079232,1.0,1.0,-9.355018,46.557707,-1.0
156,39.288585,-0.538481,59.961667,60.594616,1.0,1.0,-10.976313,42.793453,-1.0
157,40.805287,-0.402618,61.188334,60.413847,1.0,1.0,-8.301447,46.062398,1.0
158,38.54285,-0.385684,62.411667,60.059231,-1.0,0.0,-5.862337,53.469405,-1.0


In [None]:
my_data.describe()

Unnamed: 0,RSI,MACD,MA5,MA12,MA5_disc,MA12_disc,PROC,SOC,Y
count,2217.0,2217.0,2217.0,2217.0,2217.0,2217.0,2217.0,2217.0,2217.0
mean,52.112713,0.99381,313.974102,313.489922,0.043302,0.074876,2.095299,53.357607,0.012179
std,12.346387,8.167138,141.41399,141.648457,0.814424,0.837299,10.683175,29.983597,0.828386
min,16.915279,-23.104885,26.427234,26.798631,-1.0,-1.0,-24.699421,1.313549,-1.0
25%,43.088494,-3.865393,205.215998,203.354923,-1.0,-1.0,-4.687719,26.351094,-1.0
50%,52.129092,1.371149,312.925003,314.819233,0.0,0.0,1.077009,55.895988,0.0
75%,61.773123,6.319819,431.796336,431.030771,1.0,1.0,7.450301,81.216924,1.0
max,87.138946,23.501777,588.87384,576.668006,1.0,1.0,74.767033,99.132144,1.0


In [None]:
my_data.to_csv('Tesla.csv')

### Discretisation techniques

In [None]:
my_data = pd.read_csv('Tesla.csv')

In [None]:
my_data['RSI_bin'] = pd.qcut(my_data['RSI'], q=5)
my_data['MA5_bin'] = pd.cut(my_data['MA5'], bins=10)
my_data['MACD_bin'] = pd.qcut(my_data['MACD'], q=5)
my_data['MA12_bin'] = pd.cut(my_data['MA12'], bins=10)
my_data['PROC_bin'] = pd.qcut(my_data['PROC'], q=5)
my_data['SOC_bin'] = pd.qcut(my_data['SOC'], q=5)

In [None]:
my_data[['RSI_bin','MA5_bin','MACD_bin', 'MA12_bin', 'PROC_bin', 'SOC_bin','Y']].to_csv('dataset.csv', index = False)

In [None]:
pCorrs = my_data[['RSI','MACD', 'MA5', 'MA12', 'PROC', 'SOC', 'Y']].corr(method = 'pearson').round(2)
plt.figure(figsize=(12,8))    
ax = sns.heatmap(pCorrs, cmap= plt.cm.RdBu)

# Correlation

In [None]:
pCorrs = my_data[['RSI','MACD', 'MA5', 'MA12', 'PROC', 'SOC', 'Y']].corr(method = 'kendall').round(2)
plt.figure(figsize=(12,8))    
ax = sns.heatmap(pCorrs, cmap= plt.cm.RdBu)

# Classifiers

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

In [None]:
my_data = pd.read_csv('MA1.8b.csv')
my_data.head()

Unnamed: 0,RSI,MACD,MA5_disc,MA12_disc,PROC,SOC,Y
0,67.973357,2.762031,0,1,15.349,68.220347,0
1,62.032024,2.402436,-1,0,9.979782,52.005648,-1
2,60.330327,1.991939,-1,0,7.056258,45.924845,0
3,51.790592,1.047833,-1,-1,0.491943,18.179167,-1
4,48.784944,0.060636,-1,-1,-5.593844,5.184122,-1


In [None]:
rfc = MLPClassifier()
x_train, x_test, y_train, y_test = train_test_split(my_data[['RSI', 'PROC']], my_data['Y'], test_size = 0.2)
rfc.fit(x_train,y_train)
y_pred = rfc.predict(x_test)
print(classification_report(y_test,y_pred))
print(accuracy_score(y_test,y_pred))

              precision    recall  f1-score   support

          -1       1.00      0.02      0.04        88
           0       0.60      0.99      0.75       278
           1       0.67      0.04      0.07       103

    accuracy                           0.60       469
   macro avg       0.76      0.35      0.29       469
weighted avg       0.69      0.60      0.47       469

0.6012793176972282


In [None]:
dataset = my_data.copy()

In [None]:
dataset.head()

Unnamed: 0,RSI,MACD,MA5,MA12,MA5_disc,MA12_disc,PROC,SOC,Y,RSI_bin,MA5_bin,MACD_bin,MA12_bin,PROC_bin,SOC_bin
154,43.317337,-0.854014,58.161667,61.605385,1.0,1.0,-5.71345,56.265477,1.0,"(42.662, 49.34]","(57.781, 96.2]","(-2.399, -0.0188]","(59.545, 97.057]","(-27.871000000000002, -5.712]","(43.623, 65.649]"
155,40.395297,-0.652916,59.020001,61.079232,1.0,1.0,-9.355018,46.557707,-1.0,"(20.249, 42.662]","(57.781, 96.2]","(-2.399, -0.0188]","(59.545, 97.057]","(-27.871000000000002, -5.712]","(43.623, 65.649]"
156,39.288585,-0.538481,59.961667,60.594616,1.0,1.0,-10.976313,42.793453,-1.0,"(20.249, 42.662]","(57.781, 96.2]","(-2.399, -0.0188]","(59.545, 97.057]","(-27.871000000000002, -5.712]","(22.449, 43.623]"
157,40.805287,-0.402618,61.188334,60.413847,1.0,1.0,-8.301447,46.062398,1.0,"(20.249, 42.662]","(57.781, 96.2]","(-2.399, -0.0188]","(59.545, 97.057]","(-27.871000000000002, -5.712]","(43.623, 65.649]"
158,38.54285,-0.385684,62.411667,60.059231,-1.0,0.0,-5.862337,53.469405,-1.0,"(20.249, 42.662]","(57.781, 96.2]","(-2.399, -0.0188]","(59.545, 97.057]","(-27.871000000000002, -5.712]","(43.623, 65.649]"


## Discretisation

In [None]:
dataset['RSI_bin'].unique()

[(42.662, 49.34], (20.249, 42.662], (49.34, 55.233], (61.804, 84.614], (55.233, 61.804]]
Categories (5, interval[float64]): [(20.249, 42.662] < (42.662, 49.34] < (49.34, 55.233] < (55.233, 61.804] < (61.804, 84.614]]

In [None]:
dict1 = {pd._libs.interval.Interval(61.287, 84.614):4, pd._libs.interval.Interval(54.876, 61.287):3, pd._libs.interval.Interval(48.988, 54.876):2, pd._libs.interval.Interval(41.987, 48.988):1, pd._libs.interval.Interval(16.924, 41.987):0}

In [None]:
dataset['RSI_bin'].replace(dict1, inplace = True)

In [None]:
dataset['MA5_bin'].unique()

[(57.781, 96.2], (96.2, 134.238], (134.238, 172.276], (172.276, 210.314], (210.314, 248.352], (248.352, 286.39], (286.39, 324.428], (324.428, 362.466], (362.466, 400.504], (400.504, 438.542]]
Categories (10, interval[float64]): [(57.781, 96.2] < (96.2, 134.238] < (134.238, 172.276] < (172.276, 210.314] ... (286.39, 324.428] < (324.428, 362.466] < (362.466, 400.504] < (400.504, 438.542]]

In [None]:
dict2 = {pd._libs.interval.Interval(171.388, 209.553):3, pd._libs.interval.Interval(56.512, 95.058):0, pd._libs.interval.Interval(95.058, 133.223):1,pd._libs.interval.Interval(133.223, 171.388):2,  pd._libs.interval.Interval(209.553, 247.718):4, pd._libs.interval.Interval(247.718, 285.882):5,pd._libs.interval.Interval(285.882, 324.047):6,pd._libs.interval.Interval(324.047, 362.212):7, pd._libs.interval.Interval(362.212, 400.377):8, pd._libs.interval.Interval(400.377, 438.542):9}
dataset['MA5_bin'].replace(dict2,inplace = True)

In [None]:
dataset['MA12_bin'].unique()

In [None]:
dict3 = {pd._libs.interval.Interval(56.236, 94.082):0, pd._libs.interval.Interval(94.082, 131.553):1, pd._libs.interval.Interval(131.553, 169.024):2, pd._libs.interval.Interval(169.024, 206.496):3,pd._libs.interval.Interval(206.496, 243.967):4, pd._libs.interval.Interval(243.967, 281.438):5,pd._libs.interval.Interval(281.438, 318.909):6, pd._libs.interval.Interval(318.909, 356.381):7, pd._libs.interval.Interval(356.381, 393.852):8,pd._libs.interval.Interval(393.852, 431.323):9}
dataset['MA12_bin'].replace(dict3,inplace = True)

In [None]:
dataset['MACD_bin'].unique()

In [None]:
dict4 = {pd._libs.interval.Interval(-13.443999999999999, -2.748):0,pd._libs.interval.Interval(-2.748, -0.265):1,pd._libs.interval.Interval(-0.265, 1.84):2,pd._libs.interval.Interval(1.84, 4.344):3,pd._libs.interval.Interval(4.344, 14.56):4}
dataset['MACD_bin'].replace(dict4,inplace = True)

In [None]:
dataset['PROC_bin'].unique()

In [None]:
dict5 = {pd._libs.interval.Interval(-34.361999999999995, -6.243):0, pd._libs.interval.Interval(-6.243, -1.251):1, pd._libs.interval.Interval(-1.251, 2.975):2, pd._libs.interval.Interval(2.975, 7.864):3,pd._libs.interval.Interval(7.864, 59.885):4}
dataset['PROC_bin'].replace(dict5,inplace = True)

In [None]:
dataset['SOC_bin'].unique()

In [None]:
dict6 = {pd._libs.interval.Interval(0.864, 20.597):0, pd._libs.interval.Interval(20.597, 42.183):1, pd._libs.interval.Interval(42.183, 64.074):2, pd._libs.interval.Interval(64.074, 81.623):3,pd._libs.interval.Interval(81.623, 98.404):4}
dataset['SOC_bin'].replace(dict6,inplace = True)

In [None]:
dataset.to_csv('dataset_.csv')

In [None]:
dataset.head()

In [None]:
rfc = MLPClassifier()
x_train, x_test, y_train, y_test = train_test_split(dataset[['RSI_bin','MACD_bin', 'MA5_disc', 'MA12_disc', 'PROC_bin', 'SOC_bin']], my_data['Y'], test_size = 0.33)
rfc.fit(x_train,y_train)
y_pred = rfc.predict(x_test)
print(classification_report(y_test,y_pred))
print(accuracy_score(y_test,y_pred))

## LSTMs

In [None]:
dataset = data.loc[data.Symbol == "ADANIPORTS"]

In [None]:
close_data = dataset.filter(['Close'])
close_data = close_data.values

In [None]:
training_data_len = math.ceil( len(dataset)*.8)

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1)) 
scaled_data = scaler.fit_transform(close_data)

In [None]:
train_data = scaled_data[0:training_data_len  , : ]
x_train=[]
y_train = []
for i in range(60,len(train_data)):
    x_train.append(train_data[i-60:i,0])
    y_train.append(train_data[i,0])

In [None]:
x_train, y_train = np.array(x_train), np.array(y_train)
unique, counts = np.unique(y_train, return_counts=True)
print(unique)
print(counts)

[0.         0.00231902 0.00279794 ...        nan        nan        nan]
[1 1 1 ... 1 1 1]


In [None]:
x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1))

In [None]:
#Build the LSTM network model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True,input_shape=(x_train.shape[1],1)))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dense(units=25))
model.add(Dense(units=1))

In [None]:
#Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')


In [None]:
#Train the model
model.fit(x_train, y_train, batch_size=1, epochs=1)


Epoch 1/1


<keras.callbacks.History at 0x7f4ad2c47be0>

In [None]:
#Test data set
test_data = scaled_data[training_data_len - 60: , : ]
x_test = []
y_test =  dataset[training_data_len : ] #Get all of the rows from index 1603 to the rest and all of the columns (in this case it's only column 'Close'), so 2003 - 1603 = 400 rows of data
for i in range(60,len(test_data)):
    x_test.append(test_data[i-60:i,0])

In [None]:
#Reshape the data into the shape accepted by the LSTM
x_test = np.array(x_test)

x_test = np.reshape(x_test, (x_test.shape[0],x_test.shape[1],1))

In [None]:
y_pred = model.predict(x_test) 

In [None]:
y_pred = scaler.inverse_transform(y_pred)#Undo scaling


In [None]:
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
import math

In [None]:
from pmdarima import auto_arima

In [None]:
data = pd.read_csv('adani.csv')
data=data.dropna()
data=data[:2343]
data['Date'].value_counts()


3/19/2013     1
8/3/2015      1
11/12/2014    1
5/9/2011      1
3/11/2014     1
7/2/2015      1
11/27/2014    1
1/20/2016     1
8/25/2009     1
1/9/2015      1
3/20/2012     1
10/21/2014    1
6/2/2008      1
12/22/2016    1
9/5/2011      1
7/24/2015     1
1/21/2010     1
12/3/2008     1
9/21/2010     1
4/2/2009      1
11/30/2009    1
11/21/2011    1
7/19/2012     1
12/10/2008    1
6/26/2009     1
7/14/2016     1
8/14/2014     1
1/10/2017     1
4/7/2017      1
2/19/2013     1
             ..
6/18/2015     1
7/9/2008      1
9/16/2010     1
4/11/2012     1
12/26/2008    1
8/4/2011      1
12/30/2014    1
1/29/2014     1
12/15/2011    1
7/17/2009     1
5/31/2017     1
8/31/2010     1
6/29/2015     1
11/15/2012    1
3/12/2012     1
12/4/2008     1
3/25/2015     1
4/10/2015     1
3/17/2010     1
1/11/2017     1
10/17/2014    1
4/1/2015      1
2/18/2013     1
5/23/2013     1
5/29/2012     1
3/6/2013      1
2/15/2010     1
12/19/2016    1
3/29/2011     1
12/20/2012    1
Name: Date, Length: 2343

In [None]:
stepwise_fit = auto_arima(data['Close'], start_p = 1, start_q = 1, 
                          max_p = 3, max_q = 3, m = 12, 
                          start_P = 0, seasonal = True, 
                          d = None, D = 1, trace = True, 
                          error_action ='ignore',   # we don't want to know if an order does not work 
                          suppress_warnings = True,  # we don't want convergence warnings 
                          stepwise = True)           # set to stepwise 
  
# To print the summary 
stepwise_fit.summary() 

Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,1,1)[12] intercept   : AIC=inf, Time=107.61 sec
 ARIMA(0,0,0)(0,1,0)[12] intercept   : AIC=20544.448, Time=1.29 sec
 ARIMA(1,0,0)(1,1,0)[12] intercept   : AIC=15799.270, Time=31.50 sec
 ARIMA(0,0,1)(0,1,1)[12] intercept   : AIC=18364.660, Time=43.70 sec
 ARIMA(0,0,0)(0,1,0)[12]             : AIC=20554.771, Time=0.91 sec
 ARIMA(1,0,0)(0,1,0)[12] intercept   : AIC=16402.374, Time=3.96 sec
 ARIMA(1,0,0)(2,1,0)[12] intercept   : AIC=15527.601, Time=87.68 sec
 ARIMA(1,0,0)(2,1,1)[12] intercept   : AIC=inf, Time=249.08 sec
 ARIMA(1,0,0)(1,1,1)[12] intercept   : AIC=inf, Time=110.36 sec
 ARIMA(0,0,0)(2,1,0)[12] intercept   : AIC=20520.370, Time=64.56 sec
 ARIMA(2,0,0)(2,1,0)[12] intercept   : AIC=15525.840, Time=135.82 sec
 ARIMA(2,0,0)(1,1,0)[12] intercept   : AIC=15800.030, Time=53.10 sec
 ARIMA(2,0,0)(2,1,1)[12] intercept   : AIC=inf, Time=300.36 sec
 ARIMA(2,0,0)(1,1,1)[12] intercept   : AIC=inf, Time=112.61 sec
 ARIMA(3,0,0)(2,1,

KeyboardInterrupt: 

In [None]:
from sklearn.model_selection import train_test_split
y_train,y_test=train_test_split(data['Close'],test_size=0.15)

In [None]:
from statsmodels.tsa.arima_model import ARIMA 
  
model = ARIMA(y_train,  
                order = (4,0,5))
  
result = model.fit(disp=0) 
result.summary() 

0,1,2,3
Dep. Variable:,Close,No. Observations:,1991.0
Model:,"ARMA(4, 5)",Log Likelihood,-11757.419
Method:,css-mle,S.D. of innovations,88.79
Date:,"Thu, 03 Sep 2020",AIC,23536.838
Time:,18:54:52,BIC,23598.399
Sample:,0,HQIC,23559.447
,,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,191.9653,2.167,88.599,0.000,187.719,196.212
ar.L1.Close,-0.7163,0.419,-1.711,0.087,-1.537,0.104
ar.L2.Close,-0.4122,0.291,-1.417,0.156,-0.982,0.158
ar.L3.Close,0.2497,0.356,0.702,0.483,-0.448,0.947
ar.L4.Close,0.5931,0.291,2.037,0.042,0.022,1.164
ma.L1.Close,0.7207,0.419,1.720,0.085,-0.100,1.542
ma.L2.Close,0.4547,0.296,1.535,0.125,-0.126,1.035
ma.L3.Close,-0.2274,0.376,-0.604,0.546,-0.965,0.510
ma.L4.Close,-0.5685,0.300,-1.897,0.058,-1.156,0.019

0,1,2,3,4
,Real,Imaginary,Modulus,Frequency
AR.1,-1.1757,-0.0000j,1.1757,-0.5000
AR.2,-0.3097,-0.9735j,1.0216,-0.2990
AR.3,-0.3097,+0.9735j,1.0216,0.2990
AR.4,1.3741,-0.0000j,1.3741,-0.0000
MA.1,-0.3098,-0.9619j,1.0106,-0.2996
MA.2,-0.3098,+0.9619j,1.0106,0.2996
MA.3,-1.1826,-0.0000j,1.1826,-0.5000
MA.4,1.4374,-0.0000j,1.4374,-0.0000
MA.5,27.8416,-0.0000j,27.8416,-0.0000


In [None]:
start=len(y_train)
end=len(y_train)+len(y_test)-1
predictions = result.predict(start,end, 
                             typ = 'levels').rename("Predictions") 
print(predictions)

1990    197.838392
1991    183.811004
1992    219.169994
1993    181.911670
1994    203.745371
1995    199.398645
1996    190.207264
1997    214.688725
1998    178.646383
1999    209.602080
2000    197.472053
2001    191.655556
2002    210.530462
2003    180.284036
2004    214.235835
2005    192.133234
2006    193.534337
2007    209.017581
2008    183.036118
2009    214.919503
2010    186.857846
2011    198.274799
2012    207.554318
2013    184.161849
2014    213.897358
2015    184.472811
2016    203.764423
2017    203.742358
2018    185.059641
2019    213.670053
           ...    
2312    193.383716
2313    203.688654
2314    200.358769
2315    195.684109
2316    206.811566
2317    192.930384
2318    205.029354
2319    198.684443
2320    197.252384
2321    205.972119
2322    192.880391
2323    206.057656
2324    197.129834
2325    198.931753
2326    204.828392
2327    193.227217
2328    206.726971
2329    195.779106
2330    200.628221
2331    203.453390
2332    193.942630
2333    207.

In [None]:
from sklearn.metrics import mean_squared_error 
from statsmodels.tools.eval_measures import rmse
rmse(y_test,predictions)

86.70260268301074

In [None]:
data1=pd.read_csv('adaniports_newMA.csv')
data1=data1['Y']
predictions[2300]

192.49962525175548

In [None]:
predi=[]
for i in range(1991,2341):
    if (predictions[i]-predictions[i-1]>1.6):
        predi.append(1)
    if (predictions[i]-predictions[i-1]<-1.6):
        predi.append(-1)
    else :
        predi.append(0)
       

In [None]:
print(predi)

[-1, 1, 0, -1, 1, 0, -1, -1, 1, 0, -1, 1, 0, -1, -1, 1, 0, -1, 1, 0, -1, 0, 1, 0, -1, 1, 0, -1, 1, 0, 1, 0, -1, 1, 0, -1, 1, 0, 0, -1, 1, 0, -1, 1, 0, -1, -1, 1, 0, -1, 1, 0, -1, -1, 1, 0, -1, 1, 0, -1, 1, 0, 1, 0, -1, 1, 0, -1, 1, 0, 1, 0, -1, 1, 0, -1, 1, 0, 0, -1, 1, 0, -1, 1, 0, -1, -1, 1, 0, -1, 1, 0, -1, -1, 1, 0, -1, 1, 0, -1, 1, 0, 1, 0, -1, 1, 0, -1, 1, 0, 1, 0, -1, 1, 0, -1, 1, 0, 0, -1, 1, 0, -1, 1, 0, -1, -1, 1, 0, -1, 1, 0, -1, 0, 1, 0, -1, 1, 0, -1, 1, 0, 1, 0, -1, 1, 0, -1, 1, 0, 1, 0, -1, 1, 0, -1, 1, 0, -1, -1, 1, 0, -1, 1, 0, -1, -1, 1, 0, -1, 1, 0, -1, 0, 1, 0, -1, 1, 0, -1, 1, 0, 1, 0, -1, 1, 0, -1, 1, 0, 1, 0, -1, 1, 0, -1, 1, 0, -1, -1, 1, 0, -1, 1, 0, -1, -1, 1, 0, -1, 1, 0, -1, 0, 1, 0, -1, 1, 0, -1, 1, 0, 1, 0, -1, 1, 0, -1, 1, 0, 1, 0, -1, 1, 0, -1, 1, 0, -1, -1, 1, 0, -1, 1, 0, -1, -1, 1, 0, -1, 1, 0, -1, 0, 1, 0, -1, 1, 0, -1, 1, 0, 1, 0, -1, 1, 0, -1, 1, 0, 1, 0, -1, 1, 0, -1, 1, 0, -1, -1, 1, 0, -1, 1, 0, -1, -1, 1, 0, -1, 1, 0, -1, 0, 1, 0, -1, 1, 0, -1, 

In [None]:
c=0
for i in range(1991,2341):
    if (predi[i-1991]==data1[i]):
       c=c+1
print(c)
accuracy=c/len(y_test)
print(accuracy)

108
0.3068181818181818


In [None]:
from sklearn.metrics import mean_squared_error 
from statsmodels.tools.eval_measures import rmse
rmse(y_test,predictions)

86.70260268301074

In [None]:
from arch import arch_model

In [None]:
model=arch_model(y_train, vol='GARCH', p=1,o=0,q=1, dist='Normal')
results=model.fit()
print(results.summary())


Iteration:      1,   Func. Count:      6,   Neg. LLF: 16165.340269880759
Iteration:      2,   Func. Count:     14,   Neg. LLF: 11764.179693413222
Iteration:      3,   Func. Count:     19,   Neg. LLF: 11764.191979851897
Iteration:      4,   Func. Count:     25,   Neg. LLF: 11764.179467650592
Iteration:      5,   Func. Count:     30,   Neg. LLF: 11764.179457442584
Iteration:      6,   Func. Count:     34,   Neg. LLF: 11764.179457442577
Optimization terminated successfully    (Exit mode 0)
            Current function value: 11764.179457442584
            Iterations: 6
            Function evaluations: 34
            Gradient evaluations: 6
                     Constant Mean - GARCH Model Results                      
Dep. Variable:                  Close   R-squared:                      -0.000
Mean Model:             Constant Mean   Adj. R-squared:                 -0.000
Vol Model:                      GARCH   Log-Likelihood:               -11764.2
Distribution:                  Normal 

In [None]:
forecasts = results.forecast(horizon=30, method='simulation', simulations=1000)
sims = forecasts.simulations
print(sims)

<function print>

In [None]:
x_train, x_test, y_train, y_test = train_test_split(my_data[['RSI','MACD', 'MA5_disc', 'MA12_disc', 'PROC', 'SOC']], my_data['Y'], test_size = 0.33)
rfc.fit(x_train,y_train)
y_pred = rfc.predict(x_test)
print(classification_report(y_test,y_pred))
print(accuracy_score(y_test,y_pred))