# Risk Factors :

Sob a perspectiva de evitar fazer cálculos desnecessários, aumentando o risco de erros de validação, e tratamento dos dados de mercado, foram utilizados o conjunto de dados a respeito dos Fatores fornecidos pelo *NEFIN* : https://nefin.com.br/data/risk_factors.html . O conjunto de dados é dividido em 6 arquivos separados, desde $02$ de Janeiro de 2001

In case there is an error in the read_excel of ".xls" files:

In [1]:
#!pip install xlrd

### Library

In [2]:
# Initial Imports:
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')



# To run models:
import statsmodels.api as sm
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from joblib import dump, load
# Import Linear Regression Model from SKLearn:
from sklearn.linear_model import LinearRegression

# For visualizations:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


# Project files
from evaluation import *
from portfolio import *

### Data 

In [3]:
# Rm - Market Factor
MKT = pd.read_excel("./data/risk_factors/Market_Factor.xls",index_col=None)

# High minus low - Value Factor
HML = pd.read_excel("./data/risk_factors/HML_Factor.xls",index_col=None)

# Illiquid Minus Liquid - Liquidity Factor
IML = pd.read_excel("./data/risk_factors/IML_Factor.xls",index_col=None)

#Small minus big - Size Factor
SMB = pd.read_excel("./data/risk_factors/SMB_Factor.xls",index_col=None)

#Winners Minus Loser - Momentum Factor
WML = pd.read_excel("./data/risk_factors/WML_Factor.xls",index_col=None)

# Daily Risk Free - Rf
RF = pd.read_excel("./data/risk_factors/Risk_Free.xls", index_col = None)

# TEST stock:
stocks_folder = "./data/stocks"

In [4]:
HML.columns

Index(['year', 'month', 'day', 'HML'], dtype='object')

### Functions:

In [5]:
stock = choose_stock("ITUB3",stocks_folder)

Index(['Data', 'Fech Ajustado', 'Variação(%)', 'Fech Histórico',
       'Abertura Ajustado', 'Mín Ajustado', 'Médio Ajustado', 'Máx Ajustado',
       'Vol (MM R$)', 'Negócios', 'Fator', 'Tipo'],
      dtype='object')


In [6]:
stock.columns

Index(['Fech Ajustado', 'Variação(%)', 'Fech Histórico', 'Abertura Ajustado',
       'Mín Ajustado', 'Médio Ajustado', 'Máx Ajustado', 'Vol (MM R$)',
       'Negócios', 'Fator', 'Tipo'],
      dtype='object')

In [7]:
stock_prepared = prepare_portfolio(stock,1)

In [8]:
# analyse_stock(stock)

In [9]:
stock_prepared.head()

Unnamed: 0_level_0,Close,Returns
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023/04/28,22.07235,1.191022
2023/04/27,21.812558,1.771562
2023/04/26,21.432861,-0.786309
2023/04/25,21.602726,0.886608
2023/04/24,21.412877,-0.832948


In [10]:
stock.columns

Index(['Fech Ajustado', 'Variação(%)', 'Fech Histórico', 'Abertura Ajustado',
       'Mín Ajustado', 'Médio Ajustado', 'Máx Ajustado', 'Vol (MM R$)',
       'Negócios', 'Fator', 'Tipo'],
      dtype='object')

In [11]:
HML.columns

Index(['year', 'month', 'day', 'HML'], dtype='object')

In [12]:
HML = pre_processing(HML)
MKT = pre_processing(MKT)
IML = pre_processing(IML)
SMB = pre_processing(SMB)
WML = pre_processing(WML)
RF = pre_processing(RF)

In [13]:
factors = pd.concat([MKT,HML,IML,SMB,WML,RF],axis=1)
factors.head()

Unnamed: 0_level_0,Rm_minus_Rf,HML,IML,SMB,WML,Risk_free
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2001/01/02,0.006601,0.06549,0.014109,0.000524,-0.006308,0.000579
2001/01/03,0.062427,0.00939,0.00451,0.00539,-0.028644,0.000577
2001/01/04,-0.00031,-0.002327,-0.009227,0.00669,-0.000946,0.000574
2001/01/05,-0.012839,-0.002397,0.025124,0.003523,0.005985,0.000573
2001/01/08,0.003982,0.001948,-0.001175,0.007883,-0.004099,0.000573


In [14]:
MKT.head(10)

Unnamed: 0_level_0,Rm_minus_Rf
date,Unnamed: 1_level_1
2001/01/02,0.006601
2001/01/03,0.062427
2001/01/04,-0.00031
2001/01/05,-0.012839
2001/01/08,0.003982
2001/01/09,0.019986
2001/01/10,-0.004368
2001/01/11,0.004694
2001/01/12,-0.006783
2001/01/15,0.00511


In [15]:
factors.tail()

Unnamed: 0_level_0,Rm_minus_Rf,HML,IML,SMB,WML,Risk_free
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023/04/24,-0.003781,0.003445,0.002344,0.002098,-0.008456,0.000508
2023/04/25,-0.006295,-0.004699,-0.001941,-0.008449,0.008281,0.000508
2023/04/26,-0.007424,0.007253,0.012345,0.01094,0.00571,0.000508
2023/04/27,0.005182,0.010677,0.003433,0.007564,-0.01,0.000508
2023/04/28,0.014016,0.010166,0.004222,0.008966,-0.02214,0.000508


In [16]:
stock.tail()

Unnamed: 0_level_0,Fech Ajustado,Variação(%),Fech Histórico,Abertura Ajustado,Mín Ajustado,Médio Ajustado,Máx Ajustado,Vol (MM R$),Negócios,Fator,Tipo
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2001/01/08,0.949153,0,189,0.949153,0.949153,0.949153,0.949153,0.12474,2,1000,ON *EJ
2001/01/05,0.949153,0,189,0.949153,0.949153,0.949153,0.949153,0.01701,3,1000,ON *EJ
2001/01/04,0.949153,2.162162,189,0.949153,0.949153,0.949153,0.949153,0.02268,1,1000,ON *EJ
2001/01/03,0.929065,nd,185,0.929065,0.929065,0.929065,0.929065,0.00555,1,1000,ON *EJ
2001/01/02,nd,nd,nd,nd,nd,nd,nd,nd,nd,nd,nd


In [17]:
# concatenating two Dataframes:
combined_df = merge_portifolio(stock_prepared, factors)

In [18]:
combined_df

Unnamed: 0_level_0,Mkt-RF,HML,IML,SMB,WML,Close,Returns
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2001/01/04,-0.000310,-0.002327,-0.009227,0.006690,-0.000946,0.949153,2.162162
2001/01/05,-0.012839,-0.002397,0.025124,0.003523,0.005985,0.949153,0.000000
2001/01/08,0.003982,0.001948,-0.001175,0.007883,-0.004099,0.949153,0.000000
2001/01/09,0.019986,-0.000916,-0.003978,0.005928,0.009564,0.949153,0.000000
2001/01/10,-0.004368,0.012468,0.022134,0.013150,-0.000691,0.949153,0.000000
...,...,...,...,...,...,...,...
2023/04/24,-0.003781,0.003445,0.002344,0.002098,-0.008456,21.412877,-0.832948
2023/04/25,-0.006295,-0.004699,-0.001941,-0.008449,0.008281,21.602726,0.886608
2023/04/26,-0.007424,0.007253,0.012345,0.010940,0.005710,21.432861,-0.786309
2023/04/27,0.005182,0.010677,0.003433,0.007564,-0.010000,21.812558,1.771562


## Split Train / Test Method:


In order to preserve the temporal order of the data:

In [19]:
def split_data(data,rate=0.8):
    # Define X and y variables:
    X = data.drop('Returns', axis=1)
    X = X.drop('Close',axis=1)
    y = data.loc[:, 'Returns']
    # Split into Training/Testing Data:
    split = int(rate * len(X))
    X_train = X[: split]
    X_test = X[split:]
    y_train = y[: split]
    y_test = y[split:]
#     close_test=data["Close"][split:]
#     close_test
    return X_train, X_test, y_train, y_test

In [20]:
X_train, X_test, y_train, y_test = split_data(combined_df,rate=0.8)

## Make predictions:

In [21]:
lin_reg_model = LinearRegression(fit_intercept=True)
lin_reg_model = lin_reg_model.fit(X_train, y_train)
predictions = lin_reg_model.predict(X_test)

In [22]:
# Convert y_test to a dataframe:
y_test = y_test.to_frame()

In [23]:
signals_df = y_test.copy()

# Add "predictions" to dataframe:
y_test['Predictions'] = predictions
y_test["Close"]=close_test

# Add "Buy Signal" column based on whether day's predictions were greater than the day's actual returns:
y_test['Buy Signal'] = np.where(y_test['Predictions'] > y_test['Returns'], 1.0,0.0)

# Drop nulls:
y_test=y_test.dropna()

y_test.head()


NameError: name 'close_test' is not defined

In [None]:
# Generate and view signals dataframe using generate signals function
signals_df=generate_signals(y_test)
signals_df.head(10)

In [None]:
algo_evaluation(signals_df)

In [None]:
# Generate Metrics for Function vs. Buy-and-Hold Strategy:
algo_vs_underlying(signals_df)

In [None]:
# Generate Evaluation table:
trade_evaluation_df=trade_evaluation(signals_df)
trade_evaluation_df

## ANOVA Table / Other Visualizations for 3-Factor Models:

### ATT

In [None]:
# To run models:
import statsmodels.api as sm
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from joblib import dump, load

In [None]:
# Set X and y variables:
y = combined_df.loc[:, 'Returns']
X = combined_df.drop('Returns', axis=1)
X = X.drop('Close',axis=1)

# Add "Constant" column of "1s" to DataFrame to act as an intercept, using StatsModels:
X = sm.add_constant(X)

# Split into Training/Testing data:
split = int(0.8 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]

# Run Ordinary Least Squares (OLS )Model:
model = sm.OLS(y_test, X_test)
model_results = model.fit()
print(model_results.summary())

In [None]:

# Plot Partial Regression Plot:
fig = sm.graphics.plot_partregress_grid(model_results, fig = plt.figure(figsize=(12,8)))
plt.show()

In [None]:
# Plot P&L Histrogram:
trade_evaluation_df["Profit/Loss"].hist(bins=20)

In [None]:

# Generate Cumulative Return plot using above defined function:
underlying_returns(signals_df).plot(figsize=(20,10))

# VALE3


In [None]:
ticker = "VALE3"
stock = choose_stock(ticker,stocks_folder)
stock.head()

In [None]:
stock_prepared = prepare_portfolio(stock,1)
stock_prepared.head()

In [None]:
# concatenating two Dataframes:
combined_df = merge_portifolio(stock_prepared, factors)
combined_df.head()

In [None]:
X_train, X_test, y_train, y_test = split_data(combined_df,rate=0.8)

In [None]:
# Create, train, and predict model:
lin_reg_model = LinearRegression(fit_intercept=True)
lin_reg_model = lin_reg_model.fit(X_train, y_train)
predictions = lin_reg_model.predict(X_test)

In [None]:
# Convert y_test to a dataframe:
y_test = y_test.to_frame()

In [None]:
signals_df = y_test.copy()

# Add "predictions" to dataframe:
y_test['Predictions'] = predictions
y_test["Close"]=close_test

# Add "Buy Signal" column based on whether day's predictions were greater than the day's actual returns:
y_test['Buy Signal'] = np.where(y_test['Predictions'] > y_test['Returns'], 1.0,0.0)

# Drop nulls:
y_test=y_test.dropna()

y_test.head()

In [None]:
# Generate signals Dataframe using generate signals function
signals_df=generate_signals(y_test)

In [None]:
# Generate Metrics table for Algorithm:
algo_evaluation(signals_df)

In [None]:
# Generate Metrics table for Algorithm vs. Buy-and-Hold Strategy:
algo_vs_underlying(signals_df)

In [None]:
# Generate Metrics table for Disney using pre-defined function:
trade_evaluation_df=trade_evaluation(signals_df)
trade_evaluation_df

In [None]:
# Set X and y variables:
y = combined_df.loc[:, 'Returns']
X = combined_df.drop('Returns', axis=1)
X = X.drop('Close',axis=1)

# Add "Constant" column of "1s" to DataFrame to act as an intercept, using StatsModels:
X = sm.add_constant(X)

# Split into Training/Testing data:
split = int(0.8 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]

# Run Ordinary Least Squares (OLS )Model:
model = sm.OLS(y_test, X_test)
model_results = model.fit()
print(model_results.summary())

In [None]:
# Plot Partial Regression Plot:
fig = sm.graphics.plot_partregress_grid(model_results, fig = plt.figure(figsize=(12,8)))
plt.show()


In [None]:
# Plot Cumulative Returns:
underlying_returns(signals_df).plot(figsize=(20,10))