# Machine Learning Forecasting example notebook

Uses market data to predict stock price movement

In [1]:
import numpy as np
import pandas as pd
import yfinance as yf

In [2]:
api_object = yf.Ticker('BAC')
raw = api_object.history(period='10y')
market_data = pd.DataFrame(raw['Close'])

In [3]:
lags = 3
cols = []
for lag in range(1, lags + 1):
    col = 'lag_{}'.format(lag)
    market_data[col] = market_data['Close'].shift(lag)
    cols.append(col)
market_data.dropna(inplace=True)

In [4]:
market_data.head()

Unnamed: 0_level_0,Close,lag_1,lag_2,lag_3
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-12-08 00:00:00-05:00,14.41752,14.433845,14.05014,14.115452
2014-12-09 00:00:00-05:00,14.335875,14.41752,14.433845,14.05014
2014-12-10 00:00:00-05:00,14.188929,14.335875,14.41752,14.433845
2014-12-11 00:00:00-05:00,14.262403,14.188929,14.335875,14.41752
2014-12-12 00:00:00-05:00,13.984829,14.262403,14.188929,14.335875


In [5]:
train_size = int(len(market_data) * 0.7)
train, test = market_data[1:train_size], market_data[train_size:]

In [6]:
X_train, y_train = train.iloc[:, 1:], train.iloc[:, 0]
X_test, y_test = test.iloc[:, 1:], test.iloc[:, 0]

In [7]:
def model_scoring(y_test, y_hat):
    from sklearn.metrics import mean_squared_error
    print('Mean square error : ' + str(mean_squared_error(y_test, y_hat)))
    from sklearn.metrics import root_mean_squared_error
    print('Root mean square error : ' + str(root_mean_squared_error(y_test, y_hat)))
    from sklearn.metrics import r2_score
    print('R2 score : ' + str(r2_score(y_test, y_hat)))

In [17]:
from xgboost import XGBRegressor
xgb_model = XGBRegressor()
xgb_model.fit(X_train, y_train)
y_hat = xgb_model.predict(X_test)
model_scoring(y_test, y_hat)

Mean square error : 0.7420248289287475
Root mean square error : 0.8614086306328417
R2 score : 0.9752920723619187


In [18]:
from sklearn.neural_network import MLPRegressor
dnn_model = MLPRegressor(max_iter=1000)
dnn_model.fit(X_train, y_train)
y_hat = dnn_model.predict(X_test)
model_scoring(y_test, y_hat)

Mean square error : 0.37938063214127443
Root mean square error : 0.6159388217520263
R2 score : 0.9873673914392207


In [8]:
from sklearn import svm
svr_model = svm.SVR()
svr_model.fit(X_train, y_train)
y_hat = svr_model.predict(X_test)
model_scoring(y_test, y_hat)

Mean square error : 1.1075986778414124
Root mean square error : 1.052425141205498
R2 score : 0.9631192030768951
