In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from scipy.optimize import minimize
from sklearn.preprocessing import StandardScaler

In [17]:
df = pd.read_csv('/Users/dominicprenovost/Programmation/TP2-PF-management/48_Industry_Portfolios.CSV', header=6)
df = df.rename(columns={'Unnamed: 0': 'Date'})

df_48ind = df.iloc[:1171].copy()
df_48ind['Date'] = pd.to_datetime(df_48ind['Date'], format='%Y%m')
df_48ind.set_index('Date', inplace=True)
df_48ind = df_48ind.apply(pd.to_numeric, errors='coerce')
df_48ind.replace(-99.99, np.nan, inplace = True)
df_48ind.replace(-999, np.nan, inplace = True)
df_48ind.dropna(inplace = True)

df_numfirm = df.iloc[2564-20:3735-20].copy()
df_numfirm['Date'] = pd.to_datetime(df_numfirm['Date'], format='%Y%m')
df_numfirm.set_index('Date', inplace=True)
df_numfirm = df_numfirm.apply(pd.to_numeric, errors='coerce')

df_avgsize = df.iloc[3739-22:4910-22].copy()
df_avgsize['Date'] = pd.to_datetime(df_avgsize['Date'], format='%Y%m')
df_avgsize.set_index('Date', inplace=True)
df_avgsize = df_avgsize.apply(pd.to_numeric, errors='coerce')


market_cap = df_numfirm.multiply(df_avgsize, axis=0)
market_cap = market_cap.loc[df_48ind.index]


df_BtoM = df.iloc[4890:4988].copy()
df_BtoM = df_BtoM.apply(pd.to_numeric, errors='coerce')

df_BtoM = df_BtoM.loc[df_BtoM.index.repeat(12)].reset_index(drop=True)

df_BtoM['Date'] = pd.to_datetime(df_BtoM['Date'], format='%Y')

df_BtoM = df_BtoM.drop('Date', axis=1)

df_BtoM.replace(-99.99, np.nan, inplace = True)
df_BtoM.replace(-999, np.nan, inplace = True)
df_BtoM.dropna(inplace = True)

df_BtoM = pd.DataFrame(data = df_BtoM.iloc[5:].values, index = df_48ind.index, columns = df_48ind.columns)


df_mom = df_48ind.rolling(window=12).mean()

df_mom.replace(-99.99, np.nan, inplace = True)
df_mom.replace(-999, np.nan, inplace = True)
df_mom.dropna(inplace = True)


# Créer un objet StandardScaler
scaler = StandardScaler()

def standardize(df):
    # Standardiser les données
    df_scaled = scaler.fit_transform(df)
    
    df_standardized = pd.DataFrame(df_scaled, columns=df.columns, index=df.index)
    
    return df_standardized

standardized_market_caps = standardize(market_cap)
standardized_BtoM = standardize(df_BtoM)
standardized_mom = standardize(df_mom)

market_weights = market_cap.div(market_cap.sum(axis=1), axis=0)


standardized_market_caps = standardized_market_caps.loc[standardized_mom.index]
standardized_BtoM = standardized_BtoM.loc[standardized_mom.index]
standardized_mom = standardized_mom.loc[standardized_mom.index]
market_weights = market_weights.loc[standardized_mom.index]
df_48ind = df_48ind.loc[standardized_mom.index]

df4 = df_48ind.shift(-1).dropna()

standardized_market_caps = standardized_market_caps.loc[df4.index]
standardized_BtoM = standardized_BtoM.loc[df4.index]
standardized_mom = standardized_mom.loc[df4.index]
market_weights = market_weights.loc[df4.index]
df_48ind = df_48ind.loc[df4.index]

def Expanding_window_optimization(MC, BM, MOM, r, gamma=5):
    
    def u(x, gamma=5):
        return (1+x) ** (1 - gamma) / (1 - gamma)

    def function_to_maximize(beta, MC, BM, MOM, r, T, N, gamma=5):
        wi_t = market_weights
        return -(1/T) * np.sum(u(np.sum((wi_t + (1/N) * (beta[0] * MC + beta[1] * BM + beta[2] * MOM)) * (r),axis=1), gamma))

    optimal_betas = {}  # Define optimal_betas here to store the optimal coefficients for each period
    
    start_year = df_48ind.index.min() # Date de début fixée à la première date dans les données de rendement
    
    # Date de fin initiale fixée à décembre 1973
    initial_end_year = pd.to_datetime('1973-12-01')
    # Date de fin courante, commence par initial_end_year et sera ajustée dans la boucle
    current_end_year = initial_end_year

    while current_end_year <= df_48ind.index.max(): # Boucle jusqu'à la dernière date dans les données de rendement 
        
        # Extraire les composantes nécessaires pour l'optimisation des coefficients pour la période sélectionnée dans la window
        MC = standardized_market_caps.loc[start_year:current_end_year]
        BM = standardized_BtoM.loc[start_year:current_end_year]
        MOM = standardized_mom.loc[start_year:current_end_year]
        r = df_48ind.loc[start_year:current_end_year]
            
        N = df_48ind.shape[1]
        T = df_48ind.shape[0]

        # Initial guess for theta
        initial_beta = np.array([0.3, 0.3, 0.3])

        # Run the optimization for the current period
        result = minimize(function_to_maximize, initial_beta, args=( MC, BM, MOM, r, T, N, gamma), method='SLSQP')

        # Store the optimal theta for the current period
        optimal_betas[current_end_year] = result.x if result.success else None

        # Élargir la fenêtre pour la prochaine itération de window en ajoutant 12 mois à la dernière date courante de fin 
        current_end_year += pd.DateOffset(months=12)

    return optimal_betas


optimal_betas = Expanding_window_optimization(standardized_market_caps, standardized_BtoM, standardized_mom, df_48ind)

optimal_betas = pd.DataFrame(optimal_betas).T

#optimal_weights = market_weights + (1 / len(market_weights.columns)) * (optimal_betas[0] * standardized_market_caps + optimal_betas[1] * standardized_BtoM + optimal_betas[2] * standardized_mom)

#optimal_weights.dropna(inplace = True)


In [26]:
optimal_weights_list = []  # Liste pour stocker les poids optimaux pour chaque ligne de coefficients

for index, row in optimal_betas.iterrows():  # Itérer sur chaque ligne de coefficients
    # Multiplication des coefficients par les caractéristiques correspondantes
    weighted_factors = row.values[0] * standardized_market_caps.values + row.values[1] * standardized_BtoM.values + row.values[2] * standardized_mom.values
    
    # Calcul des poids optimaux en ajoutant les contributions pondérées des caractéristiques à la formule
    optimal_weights = market_weights + (1 / 48) * weighted_factors
    
    optimal_weights.dropna(inplace=True)  # Supprimer les lignes avec des valeurs manquantes
    
    optimal_weights_list.append(optimal_weights)  # Ajouter les poids optimaux de cette période à la liste

# Concaténer les poids optimaux pour chaque période dans un seul DataFrame
optimal_weights_df = pd.concat(optimal_weights_list)

# Afficher les poids optimaux pour chaque période
optimal_weights_df


Unnamed: 0_level_0,Agric,Food,Soda,Beer,Smoke,Toys,Fun,Books,Hshld,Clths,...,Boxes,Trans,Whlsl,Rtail,Meals,Banks,Insur,RlEst,Fin,Other
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1970-06-01,0.067347,0.095789,0.036190,0.058779,0.043221,0.069147,0.062625,0.086186,0.146485,0.074922,...,0.077308,0.114066,0.095274,0.112214,0.081924,0.059296,0.071017,0.054601,0.090597,0.117119
1970-07-01,0.061948,0.085752,0.029754,0.051499,0.038150,0.058431,0.057452,0.071530,0.138408,0.063175,...,0.069738,0.096821,0.084881,0.098127,0.074689,0.052651,0.056622,0.043698,0.078689,0.108101
1970-08-01,0.065204,0.088933,0.034453,0.059177,0.040115,0.055496,0.061458,0.073593,0.136506,0.060241,...,0.067478,0.094755,0.084576,0.102229,0.075625,0.056865,0.062472,0.048217,0.081133,0.104378
1970-09-01,0.055833,0.082461,0.034616,0.054303,0.042232,0.053147,0.051953,0.068706,0.129451,0.051575,...,0.067836,0.087241,0.072313,0.096504,0.063551,0.055445,0.060822,0.039022,0.074038,0.096792
1970-10-01,0.070733,0.089829,0.036206,0.064758,0.043350,0.058585,0.061115,0.075116,0.136408,0.058111,...,0.071946,0.095274,0.087030,0.103117,0.073166,0.065282,0.073337,0.045818,0.085872,0.110863
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,759.710411,258.286886,-714.759956,-384.735364,-130.005691,-52.228667,-842.111217,324.890938,-637.313847,-615.468249,...,59.901749,-685.863669,-801.315776,-693.187858,-904.514029,-86.308773,-1049.161439,-290.215171,-560.783274,-26.202007
2023-09-01,667.419560,199.947602,-734.365909,-398.411693,-212.085068,-114.821952,-805.790376,267.308173,-690.291842,-735.243253,...,-74.146316,-797.421541,-877.116275,-708.400295,-937.661948,-146.301625,-1128.477188,-318.544647,-613.858151,-56.675474
2023-10-01,863.002072,395.982372,-680.805593,-253.109193,-67.322042,7.013568,-749.913870,354.694079,-637.663770,-690.140860,...,18.587250,-674.917086,-733.425290,-712.919252,-833.140832,-1.567119,-1024.119234,-236.984498,-469.954168,40.997067
2023-11-01,901.492814,365.367832,-654.154988,-269.739951,-65.718439,-18.503077,-769.977855,418.667948,-583.459677,-625.748468,...,10.718598,-711.955738,-767.648978,-753.495414,-853.917078,-82.676027,-1041.605073,-250.604035,-531.790627,39.905283
