In [16]:
# Import libs
from yahooquery import Ticker
import pandas as pd
import pandas_ta
from matplotlib import pyplot
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import numpy as np
import plotly.express as px
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [17]:
symbol = "GGBR4.SA"

# Query stock 
stock = Ticker(symbol)

# Get all history data
history = stock.history(period="48mo")

# Let only date as index
history.reset_index(level=["symbol"], inplace=True)

# Reindex data using a DatetimeIndex
history.set_index(pd.DatetimeIndex(history.index), inplace=True)

In [18]:
# select features that have interest to us
features = ['adjclose','low','high','volume','close']

# Select our features from dataset
data = history[features].copy()

# use technical analyses of the last 21 one days
data.ta.ema(close='adjclose', length=21, append=True)

# fill empty values
data.dropna(inplace=True)

In [19]:
print(data)

# Plot stock performance data
fig = go.Figure([go.Scatter(x=history.index, y=history['adjclose'])])
fig.show()


             adjclose        low       high    volume      close     EMA_21
date                                                                       
2018-03-19  12.642447  14.920000  15.450000  10213100  15.030000  13.732243
2018-03-20  13.239660  15.020000  15.880000  16580500  15.740000  13.687463
2018-03-21  13.441539  15.670000  15.980000  10501000  15.980000  13.665106
2018-03-22  12.920026  15.190000  15.950000  15905400  15.360000  13.597372
2018-03-23  12.692915  15.000000  15.490000   9854700  15.090000  13.515148
...               ...        ...        ...       ...        ...        ...
2022-02-14  27.780001  27.459999  28.250000   8495200  27.780001  28.348887
2022-02-15  27.670000  27.020000  27.879999   7343800  27.670000  28.287170
2022-02-16  27.450001  27.320000  28.090000   9305700  27.450001  28.211064
2022-02-17  25.990000  25.959999  27.309999  20896900  25.990000  28.009149
2022-02-18  25.690001  25.650000  26.240000   2901600  25.690001  27.798317

[972 rows x

In [20]:
# Features evaluation
X = data[["adjclose"]]
Y = data[['EMA_21']]

# Split data into testing and training sets
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.30, train_size = 0.70,)

# Train model
model = LinearRegression().fit(x_train, y_train)

y_pred = model.predict(x_test)

In [21]:
print(f"Features importance:")
# get importance
importance = model.coef_

# summarize feature importance
for i,v in enumerate(importance):
    print('Feature: %0d, Score: %.5f' % (i,v))

print("--------------------------------------------------------------------------------")
    
# Cross validation
print(f"Cross Validation: {cross_val_score(model, x_train, y_train, scoring='r2', cv=5)}")

print("--------------------------------------------------------------------------------")

print(f"R2 Score: {r2_score(y_test, y_pred)}")

Features importance:
Feature: 0, Score: 0.96614
--------------------------------------------------------------------------------
Cross Validation: [0.97307809 0.97169575 0.98193976 0.96438846 0.97377512]
--------------------------------------------------------------------------------
R2 Score: 0.9717465368547161


In [22]:
df_list = [x_train, x_test, y_train, y_test,y_pred]
df_list = [df.squeeze() for df in df_list]

x_train, x_test, y_train, y_test, y_pred = df_list

In [23]:
# Plot linear regression result
fig = go.Figure([
    go.Scatter(x=x_train, y=y_train, name='train', mode='markers'),
    go.Scatter(x=x_test,  y=y_test, name='test', mode='markers'),
    go.Scatter(x=x_test,  y=y_pred,  name='prediction')
])

fig.show()

In [24]:
fig = px.scatter(
x=x_test,
y=y_pred, 
trendline="rolling", 
trendline_options=dict(window=5),
labels=dict(x='Adjusted Closing Price', y='Prediction'),
title=f"Linear Regression Stock Prediction: {symbol}")

fig.show()