In [None]:
import pandas as pd
import pandas_datareader as web
import yfinance as yf
import numpy as np
import statsmodels.api as smf
import matplotlib.pyplot as plt

## CAPM - Un solo fattore

Lewinson (2020) Python for Finance Cookbook - cap. 4

### Calcolo del beta vs mercato azionario

In [None]:
risky_asset = 'AMZN' # Amazon
market_benchmark = '^GSPC' # S&P 500
start_date = '2014-05-31'
end_date = '2024-04-30'

In [None]:
df = yf.download([risky_asset, market_benchmark],
                       start = start_date, end = end_date)

In [None]:
df.head()

In [None]:
X = df['Adj Close'].rename(columns={risky_asset:'asset', market_benchmark:'market'})\
.resample('M').last().pct_change().dropna()


In [None]:
X.head()

In [None]:
X.cov()

In [None]:
covariance = X.cov().iloc[0,1]
covariance

In [None]:
X.corr()

In [None]:
benchmark_variance = X.market.var()

In [None]:
X.market.var()

In [None]:
beta = covariance / benchmark_variance
beta

### CAPM - 1 solo fattore

In [None]:
y = X.pop('asset')
y

In [None]:

X = smf.add_constant(X)
X

In [None]:
capm_model = smf.OLS(y, X).fit()
print(capm_model.summary())

#### Inseriamo il risk free

In [None]:
risk_free = '^IRX'

In [None]:
df = yf.download([risky_asset, market_benchmark, risk_free],
                       start = start_date, end = end_date)

In [None]:
df.head()

In [None]:
X1 = df['Adj Close'].rename(columns={risky_asset:'asset', market_benchmark:'market', risk_free: 'risk_free'})


In [None]:
X1.head()

In [None]:
X1 = X1.resample('M').last()
X1

In [None]:
plt.plot(X1['risk_free'])
plt.grid()

In [None]:
X1['mrf'] = (1 + X1['risk_free']/100)**(30/360) - 1

In [None]:
X1[50:70]

In [None]:
X1.tail()

In [None]:
X1['asset_nmr'] = X1['asset'].pct_change() - X1['mrf']

In [None]:
X1['market_nmr'] = X1['market'].pct_change() - X1['mrf']

In [None]:
X1.head()

In [None]:
X1.tail()

In [None]:
X1.dropna(inplace=True)

In [None]:
X1.head()

In [None]:
X2 = X1[['asset_nmr', 'market_nmr']]

In [None]:
X2.head()

In [None]:
y = X2.pop('asset_nmr')

In [None]:
X = smf.add_constant(X2)
X

In [None]:
capm_model = smf.OLS(y, X).fit()
print(capm_model.summary())

In [None]:
#Il tasso risk free si può anche scaricare dal database della Banca della Riserva Federale di Saint Louis (FRED)

rf = web.DataReader("TB3MS", "fred", start = start_date, end = end_date)

In [None]:
rf = (1 + (rf/100))**(1/12)-1

In [None]:
rf.plot(title = "Risk-free rate (3-Month Treasury Bill)")
plt.grid()

### Il modello di Fama - French a tre fattori

I tre fattori del modello di Fama e French (*) sono:
- il fattore mercato, cioè la dipendenza dall'andamento del mercato azionario  (MKT)
- il fattore dimensione (SMB) costruito come rendimento in eccesso delle azioni a piccola capitalizzazione rispetto alle grandi
- il fattore valore (HML) costruito come rendimento in eccesso delle azioni con un rapporto fra patrimonio e prezzo alto (Value stocks) e quelle con un rapporto fra patriomonio e prezzo basso (Growth stocks)

I dati vanno scaricati da questo sito

https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html

#### Fama, E.F. - French, K.R. (1993) "Common risk factors in the returns on stocks and bonds" Journal of Financial Economics, 33 (1) 

In [None]:
factor_df = pd.read_csv('F-F_Research_Data_Factors.csv', skiprows=3)

In [None]:
factor_df.head()

Definiamo i parametri

In [None]:
risky_asset = 'X' #azione da analizzare con i fattori di rischio
start_date = '2014-05-31'
end_date = '2024-03-31'

Eliminiamo i dati annuali in fondo al file

In [None]:
stringa = ' Annual Factors: January-December '
indices = factor_df.iloc[:,0] == stringa
start_of_annual = factor_df[indices].index[0]
factor_df = factor_df[factor_df.index < start_of_annual]

Rinominiamo le colonne, definiamo come indice la data, filtriamo l'intervallo che ci interessa, dividiamo tutto per 100

In [None]:
factor_df.columns = ['data', 'mkt', 'smb', 'hml', 'rf']

In [None]:
factor_df['data'] = pd.to_datetime(factor_df['data'], format='%Y%m').dt.strftime("%Y-%m")

In [None]:
factor_df.head()

In [None]:
factor_df = factor_df.set_index('data')
factor_df = factor_df[start_date:end_date]

In [None]:
factor_df.head()

In [None]:
factor_df.tail()

In [None]:
factor_df = factor_df.apply(pd.to_numeric, errors='coerce').div(100)

In [None]:
factor_df.head()

Scarichiamo i prezzi dell'azione e calcoliamo i rendimenti mensili

In [None]:
risky_asset = 'TSLA'
asset_df = yf.download(risky_asset, start_date, end_date) 

In [None]:
y = asset_df['Adj Close'].resample('M').last().pct_change().dropna()

In [None]:
y.index = y.index.strftime('%Y-%m')

In [None]:
y.name = 'rtn'

In [None]:
y.head()

Uniamo i due df e calcoliamo i rendimenti in eccesso rispetto al rf dell'azione

In [None]:
ff_data = factor_df.join(y)
ff_data['excess_rtn'] = ff_data.rtn - ff_data.rf

In [None]:
ff_data.head()

Stimiamo il modello a tre fattori

In [None]:
import statsmodels.formula.api as smf
ff_model = smf.ols(formula = 'excess_rtn ~ mkt + smb + hml', data = ff_data).fit()
print(ff_model.summary())

#### Modello Fama French a tre fattori rolling su un portafoglio

Parametri

In [None]:
assets = ['JPM','X','JNJ','CSCO']
weights = [0.25, 0.25, 0.25, 0.25]
start_date = '2014-05-31'
end_date = '2024-03-31'

Carichiamo i dati di Fama French in un altro modo

In [None]:
df_three_factor = web.DataReader('F-F_Research_Data_Factors', 'famafrench', start_date, end_date)[0]

In [None]:
df_three_factor.head()

In [None]:
df_three_factor = df_three_factor.div(100)
df_three_factor.index = df_three_factor.index.format()

In [None]:
df_three_factor.head()

Carichiamo i prezzi delle quattro azioni 

In [None]:
asset_df = yf.download(assets, start_date, end_date) 

In [None]:
asset_df = asset_df['Adj Close'].resample('M').last().pct_change().dropna()

In [None]:
asset_df.index = asset_df.index.strftime('%Y-%m')
asset_df.head()

Calcoliamo i rendimenti del portafoglio

In [None]:
asset_df['portfolio_returns'] = np.matmul(asset_df[assets].values, weights)

In [None]:
asset_df.head()

In [None]:
asset_df.plot(figsize=(16,9), grid = True);

Uniamo i due df

In [None]:
ff_data = asset_df.join(df_three_factor).drop(assets, axis = 1)

In [None]:
ff_data.columns = ['portf_rtn', 'mkt', 'smb', 'hml', 'rf']
ff_data['portf_ex_rtn'] = ff_data.portf_rtn - ff_data.rf

In [None]:
ff_data.head()

Creiamo una funzione per calcolare la regressione ricorrente

In [None]:
def rolling_factor_model(input_data, formula, window_size):
    coeffs = []

    for start_index in range(len(input_data) - window_size + 1):        
        end_index = start_index + window_size

        # define and fit the regression model 
        ff_model = smf.ols(formula=formula,data=input_data[start_index:end_index]).fit()
   
        # store coefficients
        coeffs.append(ff_model.params)
    
    coeffs_df = pd.DataFrame(
        coeffs, 
        index=input_data.index[window_size - 1:])

    return coeffs_df

In [None]:
model_formula = 'portf_ex_rtn ~ mkt + smb + hml'
results_df = rolling_factor_model(ff_data, model_formula, window_size = 36)

In [None]:
results_df.plot(title = 'Rolling Fama-French Three Factor model')
plt.grid();

### Modelo di Carhart a quattro fattori e Fama e French a cinque fattori

In [None]:
risky_asset = 'AMZN'
start_date = '2014-05-31'
end_date = '2024-03-31'

In [None]:
# tre fattori
df_three_factor = web.DataReader('F-F_Research_Data_Factors', 'famafrench', start = start_date)[0]
df_three_factor.index = df_three_factor.index.format()

In [None]:
# quattro fattori con fattore "momentum" di Carhart
df_mom = web.DataReader('F-F_Momentum_factor', 'famafrench', start = start_date)[0]
df_mom.index = df_mom.index.format()

In [None]:
# cinque fattori con fattori Robust Minus Weak (RMW) e Conservative Minus Aggressive (CMA)
df_five_factor = web.DataReader('F-F_Research_Data_5_Factors_2x3', 'famafrench', start = start_date)[0]
df_five_factor.index = df_five_factor.index.format()

In [None]:
asset_df = yf.download(risky_asset,
                       start = start_date, end = end_date)

In [None]:
y = asset_df['Adj Close'].resample('M').last().pct_change().dropna()
y.index = y.index.strftime('%Y-%m')
y.name = 'return'

In [None]:
y

In [None]:
four_factor_data = df_three_factor.join(df_mom).join(y)

In [None]:
four_factor_data.columns = ['mkt', 'smb', 'hml', 'rf', 'mom', 'rtn']

In [None]:
four_factor_data.loc[:,four_factor_data.columns != 'rtn'] /= 100

In [None]:
#four_factor_data.index = pd.to_datetime(four_factor_data.index, format = '%Y-%m')
four_factor_data.dropna(inplace = True)

In [None]:
four_factor_data = four_factor_data.loc[start_date:end_date]

In [None]:
four_factor_data['excess_rtn'] = four_factor_data['rtn'] - four_factor_data['rf']

In [None]:
four_factor_data.head()

In [None]:
four_factor_model = smf.ols(formula ='excess_rtn ~ mkt + smb + hml + mom', data = four_factor_data).fit()

In [None]:
print(four_factor_model.summary())

In [None]:
five_factor_data = df_five_factor.join(y)

In [None]:
five_factor_data.columns = ['mkt', 'smb', 'hml', 'rmw', 'cma', 'rf', 'rtn']

In [None]:
five_factor_data.loc[:, five_factor_data.columns != 'rtn'] /= 100

In [None]:
#five_factor_data.index = pd.to_datetime(five_factor_data.index, format = '%Y-%m')

In [None]:
five_factor_data.dropna(inplace = True)

In [None]:
five_factor_data = five_factor_data.loc[start_date:end_date]

In [None]:
five_factor_data['excess_rtn'] = five_factor_data['rtn'] - five_factor_data['rf']

In [None]:
five_factor_data.head()

In [None]:
five_factor_model = smf.ols(formula ='excess_rtn ~ mkt + smb + hml + rmw + cma', data = five_factor_data).fit()

In [None]:
print(five_factor_model.summary())

#### Andamento nel tempo degli indici MSCI

In [None]:
data = pd.read_excel('MSCI_Value_Growth.xlsx', index_col = 0, parse_dates = True)

In [None]:
returns = (data/data.shift(1)) - 1
returns.dropna(inplace=True)

In [None]:
returns.head()

In [None]:
plt.plot(cum)
plt.grid();