In [26]:
import pandas as pd
import numpy as np

# Step 1 - import data

In [27]:
FF = pd.read_csv("data+files/Fama_French_daily.csv") # Fama-French Dataset
daily_prices = pd.read_parquet("data+files/sp500_daily_prices.parquet")

Mkt: Refere-se ao retorno do mercado. Este é o retorno de um portfólio amplo de ações, como o retorno do mercado de ações em geral (por exemplo, S&P 500).
RF: Refere-se à taxa livre de risco (Risk-Free rate). Esta é a taxa de retorno de um investimento considerado livre de risco, como os títulos do governo dos EUA de curto prazo.
Mkt_RF: É o retorno excedente do mercado sobre a taxa livre de risco.

In [28]:
FF

Unnamed: 0,date,Mkt_RF,SMB,HML,RF
0,19260701,0.10,-0.25,-0.27,0.009
1,19260702,0.45,-0.33,-0.06,0.009
2,19260706,0.17,0.30,-0.39,0.009
3,19260707,0.09,-0.58,0.02,0.009
4,19260708,0.21,-0.38,0.19,0.009
...,...,...,...,...,...
25518,20230626,-0.48,-0.32,1.30,0.019
25519,20230627,1.21,0.47,-0.51,0.019
25520,20230628,0.04,0.55,-0.40,0.019
25521,20230629,0.44,0.53,0.77,0.019


In [29]:
daily_prices

Unnamed: 0,date,close,symbol
2544,2010-01-04,31.30,A
2545,2010-01-05,30.96,A
2546,2010-01-06,30.85,A
2547,2010-01-07,30.81,A
2548,2010-01-08,30.80,A
...,...,...,...
24292494,2023-08-01,185.51,ZTS
24292495,2023-08-02,182.00,ZTS
24292496,2023-08-03,180.88,ZTS
24292497,2023-08-04,178.63,ZTS


## Step 2: Cleaning, Calculations and Merging
1) Parse dates
2) Fama-French Dataset - Calculate Market Return and rescale
3) Daily Stocks Dataset - Calculate daily returns
4) Merge
5) Calculate daily excess returns
6) Detect and delete outliers
7) Exploratory statistics

In [30]:
FF.head()

Unnamed: 0,date,Mkt_RF,SMB,HML,RF
0,19260701,0.1,-0.25,-0.27,0.009
1,19260702,0.45,-0.33,-0.06,0.009
2,19260706,0.17,0.3,-0.39,0.009
3,19260707,0.09,-0.58,0.02,0.009
4,19260708,0.21,-0.38,0.19,0.009


In [31]:
FF.date = pd.to_datetime(FF.date, format="%Y%m%d")

In [32]:
FF

Unnamed: 0,date,Mkt_RF,SMB,HML,RF
0,1926-07-01,0.10,-0.25,-0.27,0.009
1,1926-07-02,0.45,-0.33,-0.06,0.009
2,1926-07-06,0.17,0.30,-0.39,0.009
3,1926-07-07,0.09,-0.58,0.02,0.009
4,1926-07-08,0.21,-0.38,0.19,0.009
...,...,...,...,...,...
25518,2023-06-26,-0.48,-0.32,1.30,0.019
25519,2023-06-27,1.21,0.47,-0.51,0.019
25520,2023-06-28,0.04,0.55,-0.40,0.019
25521,2023-06-29,0.44,0.53,0.77,0.019


In [33]:
FF["Mkt"] = FF.Mkt_RF + FF.RF

In [34]:
FF

Unnamed: 0,date,Mkt_RF,SMB,HML,RF,Mkt
0,1926-07-01,0.10,-0.25,-0.27,0.009,0.109
1,1926-07-02,0.45,-0.33,-0.06,0.009,0.459
2,1926-07-06,0.17,0.30,-0.39,0.009,0.179
3,1926-07-07,0.09,-0.58,0.02,0.009,0.099
4,1926-07-08,0.21,-0.38,0.19,0.009,0.219
...,...,...,...,...,...,...
25518,2023-06-26,-0.48,-0.32,1.30,0.019,-0.461
25519,2023-06-27,1.21,0.47,-0.51,0.019,1.229
25520,2023-06-28,0.04,0.55,-0.40,0.019,0.059
25521,2023-06-29,0.44,0.53,0.77,0.019,0.459


In [35]:
FF.loc[:, "Mkt_RF":"Mkt"] = FF.loc[:, "Mkt_RF":"Mkt"]/100

In [36]:
FF

Unnamed: 0,date,Mkt_RF,SMB,HML,RF,Mkt
0,1926-07-01,0.0010,-0.0025,-0.0027,0.00009,0.00109
1,1926-07-02,0.0045,-0.0033,-0.0006,0.00009,0.00459
2,1926-07-06,0.0017,0.0030,-0.0039,0.00009,0.00179
3,1926-07-07,0.0009,-0.0058,0.0002,0.00009,0.00099
4,1926-07-08,0.0021,-0.0038,0.0019,0.00009,0.00219
...,...,...,...,...,...,...
25518,2023-06-26,-0.0048,-0.0032,0.0130,0.00019,-0.00461
25519,2023-06-27,0.0121,0.0047,-0.0051,0.00019,0.01229
25520,2023-06-28,0.0004,0.0055,-0.0040,0.00019,0.00059
25521,2023-06-29,0.0044,0.0053,0.0077,0.00019,0.00459


In [37]:
daily_prices.head()

Unnamed: 0,date,close,symbol
2544,2010-01-04,31.3,A
2545,2010-01-05,30.96,A
2546,2010-01-06,30.85,A
2547,2010-01-07,30.81,A
2548,2010-01-08,30.8,A


In [38]:
daily_prices["ret"] = daily_prices.groupby("symbol")["close"].pct_change(1)

In [39]:
daily_prices

Unnamed: 0,date,close,symbol,ret
2544,2010-01-04,31.30,A,
2545,2010-01-05,30.96,A,-0.010863
2546,2010-01-06,30.85,A,-0.003553
2547,2010-01-07,30.81,A,-0.001297
2548,2010-01-08,30.80,A,-0.000325
...,...,...,...,...
24292494,2023-08-01,185.51,ZTS,-0.013717
24292495,2023-08-02,182.00,ZTS,-0.018921
24292496,2023-08-03,180.88,ZTS,-0.006154
24292497,2023-08-04,178.63,ZTS,-0.012439


In [40]:
data = (daily_prices
        .merge(FF[["date", "Mkt"]], on="date", how="inner")
        .sort_values(["symbol", "date"]))

In [41]:
data.loc[data["symbol"] == "AAPL"]

Unnamed: 0,date,close,symbol,ret,Mkt
2,2010-01-04,7.6432,AAPL,,0.01690
433,2010-01-05,7.6564,AAPL,0.001727,0.00310
864,2010-01-06,7.5346,AAPL,-0.015908,0.00130
1296,2010-01-07,7.5207,AAPL,-0.001845,0.00400
1728,2010-01-08,7.5707,AAPL,0.006648,0.00330
...,...,...,...,...,...
1595893,2023-06-26,185.2700,AAPL,-0.007553,-0.00461
1596385,2023-06-27,188.0600,AAPL,0.015059,0.01229
1596877,2023-06-28,189.2500,AAPL,0.006328,0.00059
1597369,2023-06-29,189.5900,AAPL,0.001797,0.00459


In [44]:
data["excess_ret"] = data.ret - data.Mkt

In [45]:
data

Unnamed: 0,date,close,symbol,ret,Mkt,excess_ret
0,2010-01-04,31.30,A,,0.01690,
431,2010-01-05,30.96,A,-0.010863,0.00310,-0.013963
862,2010-01-06,30.85,A,-0.003553,0.00130,-0.004853
1294,2010-01-07,30.81,A,-0.001297,0.00400,-0.005297
1726,2010-01-08,30.80,A,-0.000325,0.00330,-0.003625
...,...,...,...,...,...,...
1596382,2023-06-26,167.54,ZTS,-0.004930,-0.00461,-0.000320
1596874,2023-06-27,170.67,ZTS,0.018682,0.01229,0.006392
1597366,2023-06-28,169.10,ZTS,-0.009199,0.00059,-0.009789
1597858,2023-06-29,171.87,ZTS,0.016381,0.00459,0.011791


In [46]:
data.describe()

Unnamed: 0,date,close,ret,Mkt,excess_ret
count,1598351,1598351.0,1597859.0,1598351.0,1597859.0
mean,2016-11-20 14:37:36.100694016,98.79279,0.0006527672,0.000544998,0.000112112
min,2010-01-04 00:00:00,0.699,-0.8205564,-0.11994,-0.8227364
25%,2013-07-30 00:00:00,35.09,-0.008191202,-0.00402,-0.007071735
50%,2016-12-12 00:00:00,60.78,0.0006826521,0.00077,-6.954733e-05
75%,2020-03-31 00:00:00,107.37,0.009552057,0.006,0.007058342
max,2023-06-30 00:00:00,6350.62,9.270531,0.09346,9.262831
std,,180.7181,0.0234912,0.0114706,0.02046229


In [47]:
data.symbol.nunique()

492