In [2]:
import pandas as pd
import pandas_ta as ta
import yfinance as yf
import matplotlib as plt
from sklearn.preprocessing import MinMaxScaler
import numpy as np

In [3]:
data = yf.download(tickers="^GSPC", start="2012-03-11", end = "2023-08-09")
data

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2012-03-12,1370.780029,1373.040039,1366.689941,1371.089966,1371.089966,3081870000
2012-03-13,1371.920044,1396.130005,1371.920044,1395.949951,1395.949951,4386470000
2012-03-14,1395.949951,1399.420044,1389.969971,1394.280029,1394.280029,4502280000
2012-03-15,1394.170044,1402.630005,1392.780029,1402.599976,1402.599976,4271650000
2012-03-16,1402.550049,1405.880005,1401.469971,1404.170044,1404.170044,5163950000
...,...,...,...,...,...,...
2023-08-02,4550.930176,4550.930176,4505.750000,4513.390137,4513.390137,4270710000
2023-08-03,4494.270020,4519.490234,4485.540039,4501.890137,4501.890137,4149120000
2023-08-04,4513.959961,4540.339844,4474.549805,4478.029785,4478.029785,4143310000
2023-08-07,4491.580078,4519.839844,4491.149902,4518.439941,4518.439941,3493920000


In [4]:
# Adding indicators
# Relative strength index: charts the current strength or weakness of the stock based on the closing prices of recent trading period.
data['RSI'] = ta.rsi(data.Close, length=15)

data['EMAF'] = ta.ema(data.Close, length=20)
data['EMAM'] = ta.ema(data.Close, length=100)
data['EMAS'] = ta.ema(data.Close, length=150)

data['Target'] = data['Adj Close']-data.Open
data['Target'] = data['Target'].shift(-1)

data['TargetClass'] = [1 if data.Target[i]>0 else 0 for i in range(len(data))]
data['TargetNextClose'] = data['Adj Close'].shift(-1)

data.dropna(inplace=True)
data.reset_index(inplace=True)
data.drop(['Volume', 'Close', 'Date'], axis=1, inplace=True)

In [5]:
data_set = data.iloc[:, 0:11] #.values
pd.set_option('display.max_columns', None)
data_set.head(20)

Unnamed: 0,Open,High,Low,Adj Close,RSI,EMAF,EMAM,EMAS,Target,TargetClass,TargetNextClose
0,1432.819946,1443.900024,1432.819946,1432.839966,46.542416,1443.59987,1405.823514,1382.254066,-4.25,0,1428.589966
1,1432.839966,1438.430054,1425.530029,1428.589966,44.420706,1442.170355,1406.274334,1382.867786,11.380005,1,1440.130005
2,1428.75,1441.310059,1427.23999,1440.130005,50.928689,1441.976036,1406.944744,1383.626226,14.609985,1,1454.920044
3,1440.310059,1455.51001,1440.310059,1454.920044,57.72593,1443.208799,1407.89475,1384.570515,6.690063,1,1460.910034
4,1454.219971,1462.199951,1453.349976,1460.910034,60.122828,1444.894631,1408.944557,1385.581635,-3.599976,0,1457.339966
5,1460.939941,1464.02002,1452.630005,1457.339966,58.022041,1446.079901,1409.902882,1386.532076,-24.150024,0,1433.189941
6,1457.339966,1457.339966,1429.849976,1433.189941,46.297315,1444.852286,1410.364012,1387.150061,0.609985,1,1433.819946
7,1433.209961,1435.459961,1422.060059,1433.819946,46.598928,1443.801587,1410.828486,1387.768205,-20.630005,0,1413.109985
8,1433.73999,1433.73999,1407.560059,1413.109985,38.903386,1440.878577,1410.873664,1388.103858,-4.449951,0,1408.75
9,1413.199951,1420.040039,1407.099976,1408.75,37.506259,1437.818713,1410.831612,1388.377317,3.22998,1,1412.969971


In [6]:
# we scale data to be in the range 0-1
sc = MinMaxScaler(feature_range=(0,1))
data_set_scaled = sc.fit_transform(data_set)
print(data_set_scaled)

[[0.0230242  0.02408329 0.02603376 ... 0.48695693 0.         0.02185739]
 [0.02303    0.02250146 0.02391256 ... 0.53909866 1.         0.0252089 ]
 [0.0218449  0.02333431 0.02441012 ... 0.54987388 1.         0.0295043 ]
 ...
 [0.9101054  0.91349628 0.91430708 ... 0.3812718  0.         0.90749085]
 [0.91581072 0.91952566 0.91110917 ... 0.5907395  1.         0.91922697]
 [0.90932596 0.91359738 0.91593943 ... 0.50563887 1.         0.91369146]]


In [8]:
# multiple features from data provided to the model
X = []

backcandles = 30
print(data_set_scaled.shape[0])
for j in range(8):
    X.append([])
    for i in range(backcandles, data_set_scaled.shape[0]):
        X[j].append(data_set_scaled[i-backcandles:i,j])

# move axis from 0 to position 2 
X = np.moveaxis(X, [0], [2])

# Erase first elements of y because of backcandles to match X length
X, yi = np.array(X), np.array(data_set_scaled[backcandles:,-1])
y = np.reshape(yi, (len(yi),1))
print(X)
print(X.shape)
print(y)
print(y.shape)

2721
[[[2.30241995e-02 2.40832928e-02 2.60337619e-02 ... 1.49511302e-02
   3.28147847e-04 0.00000000e+00]
  [2.30300003e-02 2.25014621e-02 2.39125589e-02 ... 1.45214298e-02
   4.70150579e-04 1.97429272e-04]
  [2.18448979e-02 2.33343148e-02 2.44101194e-02 ... 1.44630191e-02
   6.81320841e-04 4.41413702e-04]
  ...
  [9.97640088e-03 8.85772233e-03 1.25236894e-02 ... 0.00000000e+00
   1.10172426e-06 2.17642941e-03]
  [1.09152156e-02 1.40370264e-02 1.38738300e-02 ... 4.37694152e-04
   2.83265559e-05 2.26220157e-03]
  [1.61656383e-02 1.40341317e-02 1.58088385e-02 ... 7.51828082e-04
   3.71735020e-05 2.33465177e-03]]

 [[2.30300003e-02 2.25014621e-02 2.39125589e-02 ... 1.45214298e-02
   4.70150579e-04 1.97429272e-04]
  [2.18448979e-02 2.33343148e-02 2.44101194e-02 ... 1.44630191e-02
   6.81320841e-04 4.41413702e-04]
  [2.51945232e-02 2.74407204e-02 2.82132171e-02 ... 1.48335774e-02
   9.80560424e-04 7.45184353e-04]
  ...
  [1.09152156e-02 1.40370264e-02 1.38738300e-02 ... 4.37694152e-04
   2.

In [9]:
# split data into train and test sets
splitlimits = int(len(X) * 0.8)
print(splitlimits)
X_train, X_test = X[:splitlimits], X[splitlimits:]
y_train, y_test = y[:splitlimits], y[splitlimits:]
print(X_test.shape)
print(X_train.shape)
print(y_train.shape)
print(y_test.shape)

2152
(539, 30, 8)
(2152, 30, 8)
(2152, 1)
(539, 1)
