In [2]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
import yfinance as yf
from pandas_datareader import data as web
import statsmodels.api as sm

plt.rcParams["figure.figsize"] = (12,6)

tickers = ["AAPL","MSFT","GOOGL","AMZN","META","NVDA","JPM","XOM","PG","JNJ"]
start, end, window = "2005-01-01", "2024-12-31", 36

px = yf.download(tickers, start=start, end=end, auto_adjust=True)["Close"]
px_m = px.resample("ME").last()
rets_m = px_m.pct_change().dropna()

ff5 = web.DataReader("F-F_Research_Data_5_Factors_2x3", "famafrench", start=start)[0]
try:
    mom = web.DataReader("F-F_Momentum_Factor", "famafrench", start=start)[0]
    ff = ff5.join(mom, how="left").rename(columns={"Mom   ": "MOM"})
except Exception:
    ff = ff5.copy()

ff = ff/100.0
rf = ff[["RF"]]
factors = ff.drop(columns=["RF"])

idx = rets_m.index.intersection(factors.index)
rets_m, factors, rf = rets_m.loc[idx], factors.loc[idx], rf.loc[idx]
excess = rets_m.sub(rf["RF"], axis=0)

excess.tail(), factors.tail()


[*********************100%***********************]  10 of 10 completed
  ff5 = web.DataReader("F-F_Research_Data_5_Factors_2x3", "famafrench", start=start)[0]
  ff5 = web.DataReader("F-F_Research_Data_5_Factors_2x3", "famafrench", start=start)[0]
  mom = web.DataReader("F-F_Momentum_Factor", "famafrench", start=start)[0]
  mom = web.DataReader("F-F_Momentum_Factor", "famafrench", start=start)[0]


(Empty DataFrame
 Columns: [AAPL, AMZN, GOOGL, JNJ, JPM, META, MSFT, NVDA, PG, XOM]
 Index: [],
 Empty DataFrame
 Columns: [Mkt-RF, SMB, HML, RMW, CMA, Mom]
 Index: [])

In [3]:
def rolling_alpha_and_tstat(excess_series: pd.Series,
                            factors_df: pd.DataFrame,
                            window: int = 36):
    """
    Calcule alpha_hat et tstat(alpha) via OLS roulante pour un titre.
    """
    alphas, tstats, dates = [], [], []
    y = excess_series
    X = sm.add_constant(factors_df)  # inclut constante (alpha)

    for t in range(window, len(y)+1):
        y_win = y.iloc[t-window:t]
        X_win = X.iloc[t-window:t]
        df_win = pd.concat([y_win, X_win], axis=1).dropna()

        if len(df_win) < window * 0.8:  # si trop de NaNs, skip
            alphas.append(np.nan); tstats.append(np.nan); dates.append(y.index[t-1])
            continue

        y_w = df_win.iloc[:,0]
        X_w = df_win.iloc[:,1:]

        model = sm.OLS(y_w, X_w).fit()
        alpha_hat = model.params.get("const", np.nan)
        t_alpha   = model.tvalues.get("const", np.nan)

        alphas.append(alpha_hat)
        tstats.append(t_alpha)
        dates.append(y.index[t-1])

    return pd.Series(alphas, index=dates, name=excess_series.name), \
           pd.Series(tstats, index=dates, name=excess_series.name)


# --- Appliquer à tous les tickers ---
alpha_hat = pd.DataFrame(index=excess.index, columns=excess.columns, dtype=float)
t_alpha   = pd.DataFrame(index=excess.index, columns=excess.columns, dtype=float)

for col in excess.columns:
    a, t = rolling_alpha_and_tstat(excess[col], factors, window=36)
    alpha_hat.loc[a.index, col] = a.values
    t_alpha.loc[t.index, col]   = t.values

alpha_hat.tail(), t_alpha.tail()


(Empty DataFrame
 Columns: [AAPL, AMZN, GOOGL, JNJ, JPM, META, MSFT, NVDA, PG, XOM]
 Index: [],
 Empty DataFrame
 Columns: [AAPL, AMZN, GOOGL, JNJ, JPM, META, MSFT, NVDA, PG, XOM]
 Index: [])

In [4]:
# 1) Prix -> rendements mensuels
px = yf.download(tickers, start=start, end=end, auto_adjust=True)["Close"]
px_m = px.resample("ME").last()              # 'ME' = month end
rets_m = px_m.pct_change().dropna()

# 2) Facteurs Fama-French
ff5 = web.DataReader("F-F_Research_Data_5_Factors_2x3", "famafrench", start=start)[0]
try:
    mom = web.DataReader("F-F_Momentum_Factor", "famafrench", start=start)[0]
    ff = ff5.join(mom, how="left").rename(columns={"Mom   ": "MOM"})
except Exception:
    ff = ff5.copy()

ff = ff / 100.0

# --- NORMALISER LES INDEX EN "FIN DE MOIS" TIMESTAMP ---
# Fama-French arrive souvent en PeriodIndex mensuel -> convertissons en Timestamp fin de mois
if isinstance(ff.index, pd.PeriodIndex):
    ff.index = ff.index.to_timestamp("M")  # fin de mois calendrier

# De notre côté, assurons-nous que rets_m est aussi "fin de mois" calendrier
rets_m.index = rets_m.index.to_period("M").to_timestamp("M")

# 3) Séparation RF / facteurs et alignement
rf = ff[["RF"]]
factors = ff.drop(columns=["RF"])

idx = rets_m.index.intersection(factors.index)
rets_m  = rets_m.loc[idx]
factors = factors.loc[idx]
rf      = rf.loc[idx]

# 4) Excès de rendements
excess = rets_m.sub(rf["RF"], axis=0)

print("Shapes:", rets_m.shape, factors.shape)
print("First dates:", rets_m.index[:3].tolist(), factors.index[:3].tolist())


[*********************100%***********************]  10 of 10 completed
  ff5 = web.DataReader("F-F_Research_Data_5_Factors_2x3", "famafrench", start=start)[0]
  ff5 = web.DataReader("F-F_Research_Data_5_Factors_2x3", "famafrench", start=start)[0]


Shapes: (151, 10) (151, 6)
First dates: [Timestamp('2012-06-30 00:00:00'), Timestamp('2012-07-31 00:00:00'), Timestamp('2012-08-31 00:00:00')] [Timestamp('2012-06-30 00:00:00'), Timestamp('2012-07-31 00:00:00'), Timestamp('2012-08-31 00:00:00')]


  mom = web.DataReader("F-F_Momentum_Factor", "famafrench", start=start)[0]
  mom = web.DataReader("F-F_Momentum_Factor", "famafrench", start=start)[0]


In [5]:
alpha_hat = pd.DataFrame(index=excess.index, columns=excess.columns, dtype=float)
t_alpha   = pd.DataFrame(index=excess.index, columns=excess.columns, dtype=float)

for col in excess.columns:
    a, t = rolling_alpha_and_tstat(excess[col], factors, window=36)
    alpha_hat.loc[a.index, col] = a.values
    t_alpha.loc[t.index, col]   = t.values

alpha_hat.tail(), t_alpha.tail()


(Ticker          AAPL      AMZN     GOOGL       JNJ       JPM      META  \
 Date                                                                     
 2024-08-31  0.006644  0.008400  0.002422 -0.006245  0.002364  0.021725   
 2024-09-30  0.005739  0.007720  0.002245 -0.005641 -0.000811  0.022624   
 2024-10-31  0.005602  0.008782  0.003262 -0.005981  0.001537  0.023681   
 2024-11-30  0.003919  0.007661  0.002498 -0.006468  0.002396  0.023836   
 2024-12-31  0.005377  0.009520  0.005772 -0.009059  0.004190  0.023137   
 
 Ticker          MSFT      NVDA        PG       XOM  
 Date                                                
 2024-08-31  0.001716  0.049017 -0.003539  0.010691  
 2024-09-30  0.001121  0.043530 -0.004988  0.009097  
 2024-10-31 -0.001653  0.046688 -0.006139  0.007795  
 2024-11-30 -0.000851  0.039105 -0.002130  0.009008  
 2024-12-31 -0.000948  0.041281 -0.005313  0.008277  ,
 Ticker          AAPL      AMZN     GOOGL       JNJ       JPM      META  \
 Date              

In [6]:
# Option : annualiser les alphas si tu veux les comparer à des rendements annuels
alpha_ann = alpha_hat * 12

# Notre score = t-stat de l'alpha
score = t_alpha.copy()

# Vérif rapide
score.tail()


Ticker,AAPL,AMZN,GOOGL,JNJ,JPM,META,MSFT,NVDA,PG,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-08-31,0.740748,0.917373,0.26845,-0.909294,0.323153,1.042737,0.283062,2.560174,-0.535214,0.939572
2024-09-30,0.637457,0.844286,0.248441,-0.833647,-0.10769,1.086339,0.184856,2.247412,-0.760137,0.805046
2024-10-31,0.633462,0.996181,0.368765,-0.90031,0.206117,1.173844,-0.300341,2.422174,-0.937554,0.70491
2024-11-30,0.477967,1.013611,0.284172,-1.011546,0.328022,1.195412,-0.155713,2.243893,-0.306859,0.844847
2024-12-31,0.64025,1.226577,0.614841,-1.43591,0.590009,1.166622,-0.174688,2.502009,-0.752413,0.781263


In [7]:
def long_short_signal(scores_df, top_q=0.2, bottom_q=0.2):
    """
    Classe les titres chaque mois selon le score.
    Top 20% -> +1 (long), Bottom 20% -> -1 (short).
    """
    ranks = scores_df.rank(axis=1, pct=True, method="first")
    sig = pd.DataFrame(0, index=scores_df.index, columns=scores_df.columns, dtype=int)
    sig[ranks >= (1 - top_q)] = 1
    sig[ranks <= bottom_q] = -1
    return sig

signal = long_short_signal(score, top_q=0.2, bottom_q=0.2)
signal.tail()


Ticker,AAPL,AMZN,GOOGL,JNJ,JPM,META,MSFT,NVDA,PG,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-08-31,0,0,0,-1,0,1,0,1,-1,1
2024-09-30,0,1,0,-1,0,1,0,1,-1,0
2024-10-31,0,1,0,-1,0,1,0,1,-1,0
2024-11-30,0,1,0,-1,0,1,0,1,-1,0
2024-12-31,0,1,0,-1,0,1,0,1,-1,0
