## Import Library

In [1]:
import seaborn as sns

import yfinance as yf
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import ta 
import plotly.graph_objects as go
import stock_lib as stlib

## Import data and clean up

In [2]:
data = yf.download("TSLA", period='60d', interval='5m')
data = data.reset_index()

psar = ta.trend.PSARIndicator(high=data['High'], low=data['Low'], close=data['Close'])
rsi = ta.momentum.RSIIndicator(close=data['Close'])
bb = ta.volatility.BollingerBands(close=data['Close'])
cmf = ta.volume.ChaikinMoneyFlowIndicator(high=data['High'], low=data['Low'], close=data['Close'], volume=data['Volume'])

data['psar'] = psar.psar()
data['rsi'] = rsi.rsi()
data['bb_width'] = bb.bollinger_wband()
data['cmf'] = cmf.chaikin_money_flow()

[*********************100%***********************]  1 of 1 completed


In [23]:
data1 = data.drop(['Adj Close', 'High', 'Low', 'Open', 'Volume'], axis=1)
data1['5h_close'] = data['Close'].diff(periods=-5)
# data1['5h_close'] = (data1['5h_close'].abs() / data['Close']) * 100 
data1['close_diff'] = data['Close'].diff(periods=10)
data1['rsi_diff'] = data['rsi'].diff(periods=10)
data1['cmf_diff'] = data['cmf'].diff(periods=10)
data1['psar_diff'] = data['psar'] - data['Close']
data1 = data1[data1['5h_close'] > 0]
data1['5h_close'] = (data1['5h_close'].abs() / data1['Close']) * 100 
data1.head(20)

Unnamed: 0,Datetime,Close,psar,rsi,bb_width,cmf,5h_close,close_diff,rsi_diff,cmf_diff,psar_diff
11,2021-06-24 10:25:00-04:00,685.309998,676.901994,,,,0.03013,9.419983,,,-8.408004
23,2021-06-24 11:25:00-04:00,694.86853,688.659973,81.597128,2.39247,0.403917,0.412816,8.369507,4.5127,,-6.208557
24,2021-06-24 11:30:00-04:00,694.192017,689.927979,78.606539,2.484115,0.317903,0.042062,8.452698,4.836812,,-4.264038
31,2021-06-24 12:05:00-04:00,695.240112,690.559998,70.474277,2.163697,0.210132,0.093742,3.667114,-7.338227,-0.142987,-4.680115
32,2021-06-24 12:10:00-04:00,695.830017,690.659398,71.433535,2.053858,0.282647,0.458446,3.850037,-6.894609,-0.140926,-5.170619
33,2021-06-24 12:15:00-04:00,696.669983,690.881423,72.789165,2.092604,0.221228,0.200951,1.801453,-8.807963,-0.182689,-5.78856
34,2021-06-24 12:20:00-04:00,695.95459,691.285737,69.753067,2.028301,0.197262,0.568225,1.762573,-8.853472,-0.12064,-4.668852
35,2021-06-24 12:25:00-04:00,695.945007,691.665793,69.711123,1.880235,0.186139,0.683244,3.585388,-1.303404,-0.086543,-4.279214
36,2021-06-24 12:30:00-04:00,694.588379,692.023045,63.856753,1.627332,0.168855,0.704499,2.488464,-6.126119,-0.141999,-2.565334
37,2021-06-24 12:35:00-04:00,692.640015,697.619995,56.515958,1.350274,0.164395,0.074355,-1.332703,-16.510102,-0.165909,4.97998


## Create Model

In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.neural_network import MLPRegressor

In [17]:
clf = MLPRegressor(solver='adam', 
                    hidden_layer_sizes=(10, 100),
                    max_iter=1000,
                    random_state=1)

In [18]:
data1 = data1.dropna()
X = data1[['rsi', 'cmf', 'close_diff', 'rsi_diff', 'cmf_diff']].to_numpy()
y = data1[['5h_close']].to_numpy()

X = np.array(X)
y = np.array(y).ravel()

print(y.shape)

(2382,)


In [19]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

clf.fit(X_train, y_train)
prediction = clf.predict(X_test)

In [20]:
clf.score(X, y)

0.058981235530592195

In [21]:
prediction[0:10]

array([0.34805319, 0.28051888, 0.25751742, 0.33575469, 0.28504981,
       0.29286981, 0.26789852, 0.28944764, 0.27599941, 0.28816846])

In [22]:
y_test[0:10]

array([0.13917147, 0.14345204, 0.18900562, 0.67711208, 0.84281154,
       0.04405181, 0.16177525, 0.16886596, 0.74824015, 0.06398164])

In [28]:
from sklearn import metrics

metrics.mean_absolute_error(y_test, prediction)

0.23329903928718518