In [26]:
import pandas as pd
import numpy as np

# Step 1 - import data

In [27]:
FF = pd.read_csv("data+files/Fama_French_daily.csv") # Fama-French Dataset
daily_prices = pd.read_parquet("data+files/sp500_daily_prices.parquet")

Mkt: Refere-se ao retorno do mercado. Este é o retorno de um portfólio amplo de ações, como o retorno do mercado de ações em geral (por exemplo, S&P 500).
RF: Refere-se à taxa livre de risco (Risk-Free rate). Esta é a taxa de retorno de um investimento considerado livre de risco, como os títulos do governo dos EUA de curto prazo.
Mkt_RF: É o retorno excedente do mercado sobre a taxa livre de risco.

In [28]:
FF

Unnamed: 0,date,Mkt_RF,SMB,HML,RF
0,19260701,0.10,-0.25,-0.27,0.009
1,19260702,0.45,-0.33,-0.06,0.009
2,19260706,0.17,0.30,-0.39,0.009
3,19260707,0.09,-0.58,0.02,0.009
4,19260708,0.21,-0.38,0.19,0.009
...,...,...,...,...,...
25518,20230626,-0.48,-0.32,1.30,0.019
25519,20230627,1.21,0.47,-0.51,0.019
25520,20230628,0.04,0.55,-0.40,0.019
25521,20230629,0.44,0.53,0.77,0.019


In [29]:
daily_prices

Unnamed: 0,date,close,symbol
2544,2010-01-04,31.30,A
2545,2010-01-05,30.96,A
2546,2010-01-06,30.85,A
2547,2010-01-07,30.81,A
2548,2010-01-08,30.80,A
...,...,...,...
24292494,2023-08-01,185.51,ZTS
24292495,2023-08-02,182.00,ZTS
24292496,2023-08-03,180.88,ZTS
24292497,2023-08-04,178.63,ZTS


## Step 2: Cleaning, Calculations and Merging
1) Parse dates
2) Fama-French Dataset - Calculate Market Return and rescale
3) Daily Stocks Dataset - Calculate daily returns
4) Merge
5) Calculate daily excess returns
6) Detect and delete outliers
7) Exploratory statistics

In [30]:
FF.head()

Unnamed: 0,date,Mkt_RF,SMB,HML,RF
0,19260701,0.1,-0.25,-0.27,0.009
1,19260702,0.45,-0.33,-0.06,0.009
2,19260706,0.17,0.3,-0.39,0.009
3,19260707,0.09,-0.58,0.02,0.009
4,19260708,0.21,-0.38,0.19,0.009


In [31]:
FF.date = pd.to_datetime(FF.date, format="%Y%m%d")

In [32]:
FF

Unnamed: 0,date,Mkt_RF,SMB,HML,RF
0,1926-07-01,0.10,-0.25,-0.27,0.009
1,1926-07-02,0.45,-0.33,-0.06,0.009
2,1926-07-06,0.17,0.30,-0.39,0.009
3,1926-07-07,0.09,-0.58,0.02,0.009
4,1926-07-08,0.21,-0.38,0.19,0.009
...,...,...,...,...,...
25518,2023-06-26,-0.48,-0.32,1.30,0.019
25519,2023-06-27,1.21,0.47,-0.51,0.019
25520,2023-06-28,0.04,0.55,-0.40,0.019
25521,2023-06-29,0.44,0.53,0.77,0.019


In [33]:
FF["Mkt"] = FF.Mkt_RF + FF.RF

In [34]:
FF

Unnamed: 0,date,Mkt_RF,SMB,HML,RF,Mkt
0,1926-07-01,0.10,-0.25,-0.27,0.009,0.109
1,1926-07-02,0.45,-0.33,-0.06,0.009,0.459
2,1926-07-06,0.17,0.30,-0.39,0.009,0.179
3,1926-07-07,0.09,-0.58,0.02,0.009,0.099
4,1926-07-08,0.21,-0.38,0.19,0.009,0.219
...,...,...,...,...,...,...
25518,2023-06-26,-0.48,-0.32,1.30,0.019,-0.461
25519,2023-06-27,1.21,0.47,-0.51,0.019,1.229
25520,2023-06-28,0.04,0.55,-0.40,0.019,0.059
25521,2023-06-29,0.44,0.53,0.77,0.019,0.459


In [35]:
FF.loc[:, "Mkt_RF":"Mkt"] = FF.loc[:, "Mkt_RF":"Mkt"]/100

In [36]:
FF

Unnamed: 0,date,Mkt_RF,SMB,HML,RF,Mkt
0,1926-07-01,0.0010,-0.0025,-0.0027,0.00009,0.00109
1,1926-07-02,0.0045,-0.0033,-0.0006,0.00009,0.00459
2,1926-07-06,0.0017,0.0030,-0.0039,0.00009,0.00179
3,1926-07-07,0.0009,-0.0058,0.0002,0.00009,0.00099
4,1926-07-08,0.0021,-0.0038,0.0019,0.00009,0.00219
...,...,...,...,...,...,...
25518,2023-06-26,-0.0048,-0.0032,0.0130,0.00019,-0.00461
25519,2023-06-27,0.0121,0.0047,-0.0051,0.00019,0.01229
25520,2023-06-28,0.0004,0.0055,-0.0040,0.00019,0.00059
25521,2023-06-29,0.0044,0.0053,0.0077,0.00019,0.00459


In [37]:
daily_prices.head()

Unnamed: 0,date,close,symbol
2544,2010-01-04,31.3,A
2545,2010-01-05,30.96,A
2546,2010-01-06,30.85,A
2547,2010-01-07,30.81,A
2548,2010-01-08,30.8,A


In [38]:
daily_prices["ret"] = daily_prices.groupby("symbol")["close"].pct_change(1)

In [39]:
daily_prices

Unnamed: 0,date,close,symbol,ret
2544,2010-01-04,31.30,A,
2545,2010-01-05,30.96,A,-0.010863
2546,2010-01-06,30.85,A,-0.003553
2547,2010-01-07,30.81,A,-0.001297
2548,2010-01-08,30.80,A,-0.000325
...,...,...,...,...
24292494,2023-08-01,185.51,ZTS,-0.013717
24292495,2023-08-02,182.00,ZTS,-0.018921
24292496,2023-08-03,180.88,ZTS,-0.006154
24292497,2023-08-04,178.63,ZTS,-0.012439


In [40]:
data = (daily_prices
        .merge(FF[["date", "Mkt"]], on="date", how="inner")
        .sort_values(["symbol", "date"]))

In [41]:
data.loc[data["symbol"] == "AAPL"]

Unnamed: 0,date,close,symbol,ret,Mkt
2,2010-01-04,7.6432,AAPL,,0.01690
433,2010-01-05,7.6564,AAPL,0.001727,0.00310
864,2010-01-06,7.5346,AAPL,-0.015908,0.00130
1296,2010-01-07,7.5207,AAPL,-0.001845,0.00400
1728,2010-01-08,7.5707,AAPL,0.006648,0.00330
...,...,...,...,...,...
1595893,2023-06-26,185.2700,AAPL,-0.007553,-0.00461
1596385,2023-06-27,188.0600,AAPL,0.015059,0.01229
1596877,2023-06-28,189.2500,AAPL,0.006328,0.00059
1597369,2023-06-29,189.5900,AAPL,0.001797,0.00459


In [44]:
data["excess_ret"] = data.ret - data.Mkt

In [45]:
data

Unnamed: 0,date,close,symbol,ret,Mkt,excess_ret
0,2010-01-04,31.30,A,,0.01690,
431,2010-01-05,30.96,A,-0.010863,0.00310,-0.013963
862,2010-01-06,30.85,A,-0.003553,0.00130,-0.004853
1294,2010-01-07,30.81,A,-0.001297,0.00400,-0.005297
1726,2010-01-08,30.80,A,-0.000325,0.00330,-0.003625
...,...,...,...,...,...,...
1596382,2023-06-26,167.54,ZTS,-0.004930,-0.00461,-0.000320
1596874,2023-06-27,170.67,ZTS,0.018682,0.01229,0.006392
1597366,2023-06-28,169.10,ZTS,-0.009199,0.00059,-0.009789
1597858,2023-06-29,171.87,ZTS,0.016381,0.00459,0.011791


In [46]:
data.describe()

Unnamed: 0,date,close,ret,Mkt,excess_ret
count,1598351,1598351.0,1597859.0,1598351.0,1597859.0
mean,2016-11-20 14:37:36.100694016,98.79279,0.0006527672,0.000544998,0.000112112
min,2010-01-04 00:00:00,0.699,-0.8205564,-0.11994,-0.8227364
25%,2013-07-30 00:00:00,35.09,-0.008191202,-0.00402,-0.007071735
50%,2016-12-12 00:00:00,60.78,0.0006826521,0.00077,-6.954733e-05
75%,2020-03-31 00:00:00,107.37,0.009552057,0.006,0.007058342
max,2023-06-30 00:00:00,6350.62,9.270531,0.09346,9.262831
std,,180.7181,0.0234912,0.0114706,0.02046229


In [47]:
data.symbol.nunique()

492

## Step 3: Calculate Momentum Indicators/Proxies
100-Day & 200-Day Simple Moving Averages (SMA)

In [48]:
data = data.set_index("date")
data

Unnamed: 0_level_0,close,symbol,ret,Mkt,excess_ret
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,31.30,A,,0.01690,
2010-01-05,30.96,A,-0.010863,0.00310,-0.013963
2010-01-06,30.85,A,-0.003553,0.00130,-0.004853
2010-01-07,30.81,A,-0.001297,0.00400,-0.005297
2010-01-08,30.80,A,-0.000325,0.00330,-0.003625
...,...,...,...,...,...
2023-06-26,167.54,ZTS,-0.004930,-0.00461,-0.000320
2023-06-27,170.67,ZTS,0.018682,0.01229,0.006392
2023-06-28,169.10,ZTS,-0.009199,0.00059,-0.009789
2023-06-29,171.87,ZTS,0.016381,0.00459,0.011791


In [52]:
MA_100 = data.groupby("symbol").rolling(100)["excess_ret"].mean().reset_index()
MA_200 = data.groupby("symbol").rolling(200)["excess_ret"].mean().reset_index()

In [53]:
MA_200.head(2)

Unnamed: 0,symbol,date,excess_ret
0,A,2010-01-04,
1,A,2010-01-05,


In [56]:
MA_100= MA_100.rename({"excess_ret":"MA_100"},axis=1)
MA_200= MA_200.rename({"excess_ret":"MA_200"},axis=1)

In [57]:
MA_100.head()

Unnamed: 0,symbol,date,MA_100
0,A,2010-01-04,
1,A,2010-01-05,
2,A,2010-01-06,
3,A,2010-01-07,
4,A,2010-01-08,


In [58]:
data = data.reset_index()
data.head(2)

Unnamed: 0,date,close,symbol,ret,Mkt,excess_ret
0,2010-01-04,31.3,A,,0.0169,
1,2010-01-05,30.96,A,-0.010863,0.0031,-0.013963


In [61]:
data = data.merge(MA_100, on= ["symbol", "date"])
data = data.merge(MA_200, on= ["symbol", "date"])

In [62]:
data.head(10)

Unnamed: 0,date,close,symbol,ret,Mkt,excess_ret,MA_100,MA_200
0,2010-01-04,31.3,A,,0.0169,,,
1,2010-01-05,30.96,A,-0.010863,0.0031,-0.013963,,
2,2010-01-06,30.85,A,-0.003553,0.0013,-0.004853,,
3,2010-01-07,30.81,A,-0.001297,0.004,-0.005297,,
4,2010-01-08,30.8,A,-0.000325,0.0033,-0.003625,,
5,2010-01-11,30.82,A,0.000649,0.0013,-0.000651,,
6,2010-01-12,30.45,A,-0.012005,-0.01,-0.002005,,
7,2010-01-13,30.69,A,0.007882,0.0085,-0.000618,,
8,2010-01-14,31.15,A,0.014989,0.0024,0.012589,,
9,2010-01-15,30.43,A,-0.023114,-0.0112,-0.011914,,


In [64]:
data = data.dropna()

In [65]:
data

Unnamed: 0,date,close,symbol,ret,Mkt,excess_ret,MA_100,MA_200
200,2010-10-19,34.02,A,-0.018465,-0.01669,-0.001775,-0.000075,0.000272
201,2010-10-20,34.66,A,0.018812,0.01061,0.008202,0.000009,0.000383
202,2010-10-21,34.75,A,0.002597,0.00111,0.001487,0.000106,0.000414
203,2010-10-22,34.75,A,0.000000,0.00291,-0.002910,0.000105,0.000426
204,2010-10-25,34.90,A,0.004317,0.00311,0.001207,0.000116,0.000451
...,...,...,...,...,...,...,...,...
1598346,2023-06-26,167.54,ZTS,-0.004930,-0.00461,-0.000320,-0.000390,-0.000126
1598347,2023-06-27,170.67,ZTS,0.018682,0.01229,0.006392,-0.000346,0.000005
1598348,2023-06-28,169.10,ZTS,-0.009199,0.00059,-0.009789,-0.000478,-0.000144
1598349,2023-06-29,171.87,ZTS,0.016381,0.00459,0.011791,-0.000279,-0.000093


## Step 4: Calculate forward-looking returns windows
50-day, 100-day, 200-day returns

In [66]:
data

Unnamed: 0,date,close,symbol,ret,Mkt,excess_ret,MA_100,MA_200
200,2010-10-19,34.02,A,-0.018465,-0.01669,-0.001775,-0.000075,0.000272
201,2010-10-20,34.66,A,0.018812,0.01061,0.008202,0.000009,0.000383
202,2010-10-21,34.75,A,0.002597,0.00111,0.001487,0.000106,0.000414
203,2010-10-22,34.75,A,0.000000,0.00291,-0.002910,0.000105,0.000426
204,2010-10-25,34.90,A,0.004317,0.00311,0.001207,0.000116,0.000451
...,...,...,...,...,...,...,...,...
1598346,2023-06-26,167.54,ZTS,-0.004930,-0.00461,-0.000320,-0.000390,-0.000126
1598347,2023-06-27,170.67,ZTS,0.018682,0.01229,0.006392,-0.000346,0.000005
1598348,2023-06-28,169.10,ZTS,-0.009199,0.00059,-0.009789,-0.000478,-0.000144
1598349,2023-06-29,171.87,ZTS,0.016381,0.00459,0.011791,-0.000279,-0.000093


In [71]:
data["close_50_d"] =data.groupby("symbol")["close"].shift(-50)
data["close_100_d"] =data.groupby("symbol")["close"].shift(-100)
data["close_200_d"] =data.groupby("symbol")["close"].shift(-200)


In [73]:
data.head(3)

Unnamed: 0,date,close,symbol,ret,Mkt,excess_ret,MA_100,MA_200,close_100_d,close_200_d,close_50_d
200,2010-10-19,34.02,A,-0.018465,-0.01669,-0.001775,-7.5e-05,0.000272,44.84,36.91,41.59
201,2010-10-20,34.66,A,0.018812,0.01061,0.008202,9e-06,0.000383,43.82,36.59,41.43
202,2010-10-21,34.75,A,0.002597,0.00111,0.001487,0.000106,0.000414,42.33,32.56,41.88


In [74]:
data["ret_50_d"] =data.close_50_d / data.close -1
data["ret_100_d"] =data.close_100_d / data.close -1
data["ret_200_d"] =data.close_200_d / data.close -1

In [75]:
data.head(3)

Unnamed: 0,date,close,symbol,ret,Mkt,excess_ret,MA_100,MA_200,close_100_d,close_200_d,close_50_d,ret_50_d,ret_100_d,ret_200_d
200,2010-10-19,34.02,A,-0.018465,-0.01669,-0.001775,-7.5e-05,0.000272,44.84,36.91,41.59,0.222516,0.318048,0.08495
201,2010-10-20,34.66,A,0.018812,0.01061,0.008202,9e-06,0.000383,43.82,36.59,41.43,0.195326,0.264282,0.055684
202,2010-10-21,34.75,A,0.002597,0.00111,0.001487,0.000106,0.000414,42.33,32.56,41.88,0.20518,0.218129,-0.063022


In [82]:
data = data.dropna().copy()

In [77]:
data.describe()

Unnamed: 0,date,close,ret,Mkt,excess_ret,MA_100,MA_200,close_100_d,close_200_d,close_50_d,ret_50_d,ret_100_d,ret_200_d
count,1402626,1402626.0,1402626.0,1402626.0,1402626.0,1402626.0,1402626.0,1402626.0,1402626.0,1402626.0,1402626.0,1402626.0,1402626.0
mean,2016-11-14 13:40:26.759522048,98.11851,0.0006550309,0.0005348223,0.0001202086,0.0001216846,0.0001204164,102.3882,106.9315,100.1669,0.03027741,0.06036261,0.1221028
min,2010-10-19 00:00:00,0.699,-0.8205564,-0.11994,-0.8227364,-0.01862517,-0.01229943,0.699,0.699,0.699,-0.8870043,-0.9135802,-0.960952
25%,2013-12-10 00:00:00,36.45,-0.007945498,-0.0038,-0.006947803,-0.0007646852,-0.0005201356,37.775,39.08,37.14,-0.04169684,-0.04865263,-0.0485054
50%,2016-12-05 00:00:00,62.0,0.0007194245,0.00083,-5.90502e-05,8.745907e-05,7.524932e-05,64.36,66.78,63.17,0.02845576,0.0513089,0.09337532
75%,2019-10-31 00:00:00,107.25,0.009391075,0.0058,0.006957556,0.0009313717,0.000672048,112.27,116.95,109.65,0.09721277,0.1513752,0.2433121
max,2022-09-13 00:00:00,5959.33,9.270531,0.09346,9.262831,0.09398033,0.05049223,5959.33,6350.62,5959.33,10.32796,12.24611,25.40994
std,,172.776,0.02343554,0.01136151,0.02046154,0.001981916,0.001422747,180.3714,190.8029,176.1298,0.159116,0.2309812,0.3620384


## Step 5: Form Portfolios according to Stock Momentum

In [84]:
data["decile_100"] = data.groupby("date")["MA_100"].transform(pd.qcut, 10, labels=False, duplicates="drop")
data["decile_200"] = data.groupby("date")["MA_200"].transform(pd.qcut, 10, labels=False, duplicates="drop")


In [85]:
data.head(2)

Unnamed: 0,date,close,symbol,ret,Mkt,excess_ret,MA_100,MA_200,close_100_d,close_200_d,close_50_d,ret_50_d,ret_100_d,ret_200_d,decile_100,decile_200
200,2010-10-19,34.02,A,-0.018465,-0.01669,-0.001775,-7.5e-05,0.000272,44.84,36.91,41.59,0.222516,0.318048,0.08495,4,5
201,2010-10-20,34.66,A,0.018812,0.01061,0.008202,9e-06,0.000383,43.82,36.59,41.43,0.195326,0.264282,0.055684,4,5


In [88]:
result_ma_100= data.groupby("decile_100")[["ret_50_d",	"ret_100_d","ret_200_d"]].agg(["mean","median", "sem"])
result_ma_200= data.groupby("decile_200")[["ret_50_d",	"ret_100_d","ret_200_d"]].agg(["mean","median", "sem"])

In [89]:
result_ma_200

Unnamed: 0_level_0,ret_50_d,ret_50_d,ret_50_d,ret_100_d,ret_100_d,ret_100_d,ret_200_d,ret_200_d,ret_200_d
Unnamed: 0_level_1,mean,median,sem,mean,median,sem,mean,median,sem
decile_200,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
0,0.033545,0.027218,0.000488,0.062386,0.049229,0.000695,0.132939,0.085783,0.001174
1,0.027559,0.026132,0.000368,0.056995,0.048627,0.00054,0.113635,0.086474,0.000895
2,0.028203,0.027355,0.0004,0.056108,0.049222,0.000536,0.110543,0.087768,0.000822
3,0.026247,0.025404,0.000441,0.056578,0.048783,0.000696,0.109273,0.087215,0.000916
4,0.026423,0.027364,0.000412,0.05446,0.049474,0.000595,0.109718,0.089448,0.00092
5,0.026037,0.028021,0.000327,0.053422,0.050877,0.000486,0.107328,0.092293,0.000812
6,0.026183,0.027698,0.000321,0.053227,0.049516,0.000489,0.106393,0.092526,0.000779
7,0.028467,0.029471,0.000337,0.055288,0.050908,0.000529,0.113609,0.095331,0.000884
8,0.03317,0.031412,0.000418,0.064911,0.05582,0.000636,0.130762,0.105138,0.000993
9,0.046733,0.036496,0.000632,0.089915,0.062413,0.000851,0.186022,0.118283,0.001304
