In [27]:
# Libraries and pre-settings
import pandas as pd
import numpy as np
import yfinance as yf
import ta
import warnings
warnings.filterwarnings('ignore')

In [28]:
# Importing the dataset
df = yf.download('ABEV', start='2012-01-01', end='2024-12-31', multi_level_index=False, interval='1wk', auto_adjust=False).astype(float)

[*********************100%***********************]  1 of 1 completed


In [29]:
# Features of the paper using TA

macd = ta.trend.MACD(close=df['Adj Close'], window_fast=12, window_slow=26, window_sign=9)
rsi = ta.momentum.RSIIndicator(close=df['Adj Close'], window=14)
mfi = ta.volume.MFIIndicator(high=df['High'], low=df['Low'], close=df['Adj Close'], volume=df['Volume'], window=14)
so = ta.momentum.stoch(high=df['High'], low=df['Low'], close=df['Adj Close'], window=14, smooth_window=3)


df['MACD'] = macd.macd()
df['RSI'] = rsi.rsi()
df['MFI'] = mfi.money_flow_index()
df['SO'] = so

# Filter features

df['MM15'] = df['Adj Close'].rolling(15).mean()
df['RSL'] = (df['Adj Close']/df['MM15']) -1

In [30]:
# Return and target columns

df['Return'] = df['Adj Close'].pct_change(1)
df['Target'] = df['Return'].shift(-1)
df['Target_cat'] = np.where(df['Target'] > 0, 1, 0)

df.dropna(axis=0, inplace=True)

In [31]:
# Splitting the data

df_train = df.loc[:'2016-03-20']
df_test = df.loc['2016-03-20':]

x_train = df_train[['MACD','RSI','MFI','SO']]
y_train = df_train['Target']

x_test = df_test[['MACD','RSI','MFI','SO']]
y_test = df_test['Target']

In [32]:
# Transforming the RSL feature

df_train['RSL'], bins = pd.qcut(df_train['RSL'], 8, retbins=True, labels=False)
df_test['RSL'] = pd.cut(df_test['RSL'], bins=bins, labels=False)

In [33]:
pd.pivot_table(data=df_train, index='RSL', aggfunc={'Target':sum,
                                                    'Target_cat':np.mean})

Unnamed: 0_level_0,Target,Target_cat
RSL,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.341675,0.56
1,0.135578,0.583333
2,-0.090067,0.44
3,-0.114115,0.333333
4,-0.045672,0.583333
5,-0.22096,0.36
6,-0.133703,0.458333
7,0.2195,0.52


In [34]:
# Feature normalization, although the difference in the results is negligible.

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

x_train = pd.DataFrame(scaler.fit_transform(x_train), columns = x_train.columns, index = x_train.index)
x_test = pd.DataFrame(scaler.transform(x_test), columns = x_test.columns, index = x_test.index)

In [35]:
# Training the models

import statsmodels.api as sm
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor

np.random.seed(42)

x_train1 = sm.add_constant(x_train)
x_test1 = sm.add_constant(x_test)

svr = SVR(C=1.0, epsilon=0.2)
reg = sm.OLS(y_train, x_train1).fit()
rfr = RandomForestRegressor()


svr.fit(x_train, y_train)
rfr.fit(x_train, y_train)

df_test['y_pred_lr'] = reg.predict(x_test1)
df_test['y_pred_svr'] = svr.predict(x_test)
df_test['y_pred_rfr'] = rfr.predict(x_test)

In [36]:
print(reg.summary())

                            OLS Regression Results                            
Dep. Variable:                 Target   R-squared:                       0.005
Model:                            OLS   Adj. R-squared:                 -0.015
Method:                 Least Squares   F-statistic:                    0.2610
Date:                Mon, 17 Feb 2025   Prob (F-statistic):              0.903
Time:                        18:48:19   Log-Likelihood:                 364.29
No. Observations:                 196   AIC:                            -718.6
Df Residuals:                     191   BIC:                            -702.2
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0005      0.003      0.172      0.8

In [37]:
# Trading rules

df_test['Trade_lr'] = np.where((df_test['y_pred_lr'] >= 0) & (df_test['RSL'] <= 1), df_test['Target'], 0)
df_test['Trade_svr'] = np.where((df_test['y_pred_svr'] >= 0) & (df_test['RSL'] <= 1), df_test['Target'], 0)
df_test['Trade_rfr'] = np.where((df_test['y_pred_rfr'] >= 0) & (df_test['RSL'] <= 1), df_test['Target'], 0)

df_test['Trade_lr'] = np.where((df_test['y_pred_lr'] < 0) & (df_test['RSL'] == 5), -1*df_test['Target'], df_test['Trade_lr'])
df_test['Trade_svr'] = np.where((df_test['y_pred_svr'] < 0) & (df_test['RSL'] == 5), -1*df_test['Target'], df_test['Trade_svr'])
df_test['Trade_rfr'] = np.where((df_test['y_pred_rfr'] < 0) & (df_test['RSL'] == 5), -1*df_test['Target'], df_test['Trade_rfr'])

In [38]:
# Data viz

import plotly.graph_objects as go

fig = go.Figure()
fig.add_scatter(y = df_test['Trade_svr'].cumsum(), x = df_test.index, mode = "lines", name = "Support Vector Regression" , line = dict(color = "blue"))
fig.add_scatter(y = df_test['Trade_rfr'].cumsum(), x = df_test.index, mode = "lines", name = "Random Forest Regression" , line = dict(color = "green"))
fig.add_scatter(y = df_test['Trade_lr'].cumsum(), x = df_test.index, mode = "lines", name = "Multiple Linear Regression" , line = dict(color = "red"))
fig.add_scatter(y = df_test['Return'].cumsum(), x = df_test.index, mode = "lines", name = "Buy & Hold" , line = dict(color = "black"))

fig.update_layout(
    title='Optimization',
    xaxis_title='Date',
    yaxis_title='Accumulated Returns',
    height=600,
    width=1000
)

fig.show()