In [1]:
 # Libraries and pre-settings
import pandas as pd
import numpy as np
import yfinance as yf
import ta

import warnings
warnings.filterwarnings('ignore')

ModuleNotFoundError: No module named 'ta'

In [None]:
# Importing the dataset
df = yf.download('ABEV', start='2012-01-01', end='2019-12-31', multi_level_index=False, interval='1wk', auto_adjust=False).astype(float)

In [None]:
# Features of the paper using TA

macd = ta.trend.MACD(close=df['Adj Close'], window_fast=12, window_slow=26, window_sign=9)
rsi = ta.momentum.RSIIndicator(close=df['Adj Close'], window=14)
mfi = ta.volume.MFIIndicator(high=df['High'], low=df['Low'], close=df['Adj Close'], volume=df['Volume'], window=14)
so = ta.momentum.stoch(high=df['High'], low=df['Low'], close=df['Adj Close'], window=14, smooth_window=3)


df['MACD'] = macd.macd()
df['RSI'] = rsi.rsi()
df['MFI'] = mfi.money_flow_index()
df['SO'] = so

In [None]:
# Return and target columns
df['Return'] = df['Adj Close'].pct_change(1)
df['Target'] = df['Return'].shift(-1)
df['Target_cat'] = np.where(df['Target'] > 0, 1, 0)

df.dropna(axis=0, inplace=True)

In [None]:
# Splitting the data

half = int(len(df)*0.8)
df_train = df.iloc[:half]
df_test = df.iloc[half:]

x_train = df_train[['MACD','RSI','MFI','SO']]
y_train = df_train['Target']

x_test = df_test[['MACD','RSI','MFI','SO']]
y_test = df_test['Target']

In [None]:
# Feature normalization, although the difference in the results is negligible.

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

x_train = pd.DataFrame(scaler.fit_transform(x_train), columns = x_train.columns, index = x_train.index)
x_test = pd.DataFrame(scaler.transform(x_test), columns = x_test.columns, index = x_test.index)

In [None]:
# Training the models

import statsmodels.api as sm
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor

np.random.seed(42)

x_train1 = sm.add_constant(x_train)
x_test1 = sm.add_constant(x_test)

svr = SVR(C=1.0, epsilon=0.2)
reg = sm.OLS(y_train, x_train1).fit()
rfr = RandomForestRegressor()

svr.fit(x_train, y_train)
rfr.fit(x_train, y_train)

df_test['y_pred_lr'] = reg.predict(x_test1)
df_test['y_pred_svr'] = svr.predict(x_test)
df_test['y_pred_rfr'] = rfr.predict(x_test)

In [None]:
print(reg.summary())

In [None]:
# Trading rules

df_test['Trade_lr'] = np.where(df_test['y_pred_lr'] >= 0, -1*df_test['Target'], df_test['Target'])
df_test['Trade_svr'] = np.where(df_test['y_pred_svr'] >= 0, -1*df_test['Target'], df_test['Target'])
df_test['Trade_rfr'] = np.where(df_test['y_pred_rfr'] >= 0, -1*df_test['Target'], df_test['Target'])

In [None]:
# Data viz

import plotly.graph_objects as go

fig = go.Figure()
fig.add_scatter(y = df_test['Trade_svr'].cumsum() * 100, x = df_test.index, mode = "lines", name = "Support Vector Regression" , line = dict(color = "blue"))
fig.add_scatter(y = df_test['Trade_rfr'].cumsum() * 100, x = df_test.index, mode = "lines", name = "Random Forest Regression" , line = dict(color = "green"))
fig.add_scatter(y = df_test['Trade_lr'].cumsum() * 100, x = df_test.index, mode = "lines", name = "Multiple Linear Regression" , line = dict(color = "red"))
fig.add_scatter(y = df_test['Return'].cumsum() * 100, x = df_test.index, mode = "lines", name = "Buy & Hold" , line = dict(color = "black"))

fig.update_layout(
    title='Paper Settings',
    xaxis_title='Date',
    yaxis_title='Accumulated Returns',
    height=600,
    width=1000
)

fig.show()