In [2]:
import pandas as pd
import numpy as np
from datetime import datetime
from scipy.optimize import minimize
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm

In [3]:
df_monthly = pd.read_csv('/Users/dominicprenovost/Programmation/TP2-PF-management/48_Industry_Portfolios.CSV', header=6)
df_monthly = df_monthly.rename(columns={'Unnamed: 0': 'Date'})

df_48ind = df_monthly.iloc[:1171].copy()
df_48ind['Date'] = pd.to_datetime(df_48ind['Date'], format='%Y%m')
df_48ind.set_index('Date', inplace=True)
df_48ind = df_48ind.apply(pd.to_numeric, errors='coerce')
df_48ind.replace(-99.99, np.nan, inplace = True)
df_48ind.replace(-999, np.nan, inplace = True)
df_48ind.dropna(inplace = True)

df_numfirm = df_monthly.iloc[2564-20:3735-20].copy()
df_numfirm['Date'] = pd.to_datetime(df_numfirm['Date'], format='%Y%m')
df_numfirm.set_index('Date', inplace=True)
df_numfirm = df_numfirm.apply(pd.to_numeric, errors='coerce')

df_avgsize = df_monthly.iloc[3739-22:4910-22].copy()
df_avgsize['Date'] = pd.to_datetime(df_avgsize['Date'], format='%Y%m')
df_avgsize.set_index('Date', inplace=True)
df_avgsize = df_avgsize.apply(pd.to_numeric, errors='coerce')


df_MC = df_numfirm.multiply(df_avgsize, axis=0)
df_MC = df_MC.loc[df_48ind.index]


df_BtoM = df_monthly.iloc[4890:4988].copy()
df_BtoM = df_BtoM.apply(pd.to_numeric, errors='coerce')

df_BtoM = df_BtoM.loc[df_BtoM.index.repeat(12)].reset_index(drop=True)

df_BtoM['Date'] = pd.to_datetime(df_BtoM['Date'], format='%Y')

df_BtoM = df_BtoM.drop('Date', axis=1)

df_BtoM.replace(-99.99, np.nan, inplace = True)
df_BtoM.replace(-999, np.nan, inplace = True)
df_BtoM.dropna(inplace = True)

df_BtoM = pd.DataFrame(data = df_BtoM.iloc[5:].values, index = df_48ind.index, columns = df_48ind.columns)


df_mom = df_48ind.rolling(window=12).mean()

df_mom.replace(-99.99, np.nan, inplace = True)
df_mom.replace(-999, np.nan, inplace = True)
df_mom.dropna(inplace = True)

df_48ind = df_48ind.loc[df_mom.index]
df_MC = df_MC.loc[df_mom.index]
df_BtoM = df_BtoM.loc[df_mom.index]

df_ret_shift = df_48ind.shift(-1)

In [4]:
df_daily = pd.read_csv('/Users/dominicprenovost/Programmation/TP2-PF-management/48_Industry_Portfolios_Daily.csv', header=5)
df_daily = df_daily.rename(columns={'Unnamed: 0': 'Date'})

df_daily_ret = df_daily.iloc[:25670].copy()
df_daily_ret['Date'] = pd.to_datetime(df_daily_ret['Date'], format='%Y%m%d')
df_daily_ret.set_index('Date', inplace=True)
df_daily_ret = df_daily_ret.apply(pd.to_numeric, errors='coerce')
df_daily_ret.replace(-99.99, np.nan, inplace = True)
df_daily_ret.replace(-999, np.nan, inplace = True)
df_daily_ret.dropna(inplace = True)


df_FF = pd.read_csv('/Users/dominicprenovost/Programmation/TP2-PF-management/F-F_Research_Data_Factors_daily.CSV', header=3)
df_FF = df_FF.rename(columns={'Unnamed: 0': 'Date'})
df_FF_daily = df_FF.iloc[:25670].copy()
df_FF_daily['Date'] = pd.to_datetime(df_FF_daily['Date'], format='%Y%m%d')
df_FF_daily.set_index('Date', inplace=True)
df_FF_daily = df_FF_daily.apply(pd.to_numeric, errors='coerce')
df_FF_daily.replace(-99.99, np.nan, inplace = True)
df_FF_daily.replace(-999, np.nan, inplace = True)
df_FF_daily.dropna(inplace = True)
df_FF_daily = df_FF_daily.loc[df_daily_ret.index]

df_daily_ret_reshaped = df_daily_ret.iloc[231:].copy()

  df_daily = pd.read_csv('/Users/dominicprenovost/Programmation/TP2-PF-management/48_Industry_Portfolios_Daily.csv', header=5)


In [5]:
########## boucle pour toutes les colonnes, avec une date de début et de fin, et pour chaque mois


def calculate_betas(start_date, end_date, df_daily_ret_reshaped, df_FF_daily):
    # Sélectionner les données pour la plage de dates spécifiée
    df_daily_ret_selected = df_daily_ret_reshaped.loc[start_date:end_date]
    df_FF_daily_selected = df_FF_daily.loc[start_date:end_date]

    # Maintenant, vous pouvez utiliser df_daily_ret_selected et df_FF_daily_selected pour votre régression
    Rft = df_FF_daily_selected['RF']
    Rm_t = df_FF_daily_selected['Mkt-RF']

    betas = []  # Create an empty list to store the betas

    for column in df_daily_ret_selected.columns:
        Ri_t = df_daily_ret_selected[column]

        Y = Ri_t - Rft
        X = Rm_t

        X = sm.add_constant(X)

        model = sm.OLS(Y, X)
        results = model.fit()

        betas.append(results.params[1])  # Add the betas to the list

    # Convert the list of betas to a numpy array
    betas = np.array(betas)

    return betas

# Get the first and last date in the data
first_date = df_daily_ret_reshaped.index.min()
last_date = df_daily_ret_reshaped.index.max()

# Create a date range for each month in the data
date_range = pd.date_range(start=first_date, end=last_date, freq='M')

# Calculate the betas for each month
monthly_betas = {}
for date in date_range:
    start_date = date - pd.DateOffset(months=12)
    end_date = date
    betas = calculate_betas(start_date, end_date, df_daily_ret, df_FF_daily)
    monthly_betas[date] = betas
    
    # Convert the dictionary to a DataFrame
df_monthly_betas = pd.DataFrame(monthly_betas.items(), columns=['Date', 'Betas'])

# Convert the Betas column from numpy arrays to lists
df_monthly_betas['Betas'] = df_monthly_betas['Betas'].apply(list)

# Set the Date column as the index
df_monthly_betas.set_index('Date', inplace=True)

# Convert each item in the 'Betas' list to a separate column
df_monthly_betas = df_monthly_betas['Betas'].apply(pd.Series)

# Set the index of df_monthly_betas to match df_48ind
df_monthly_betas.index = df_48ind.index

# Set the column names of df_monthly_betas to match df_48ind
df_monthly_betas.columns = df_48ind.columns

In [6]:
def calculate_idiosyncratic_volatility(start_date, end_date, df_daily_ret_reshaped, df_FF_daily):
    # Sélectionner les données pour la plage de dates spécifiée
    df_daily_ret_selected = df_daily_ret_reshaped.loc[start_date:end_date]
    df_FF_daily_selected = df_FF_daily.loc[start_date:end_date]

    # Maintenant, vous pouvez utiliser df_daily_ret_selected et df_FF_daily_selected pour votre régression
    Rft = df_FF_daily_selected['RF']
    Rm_t = df_FF_daily_selected['Mkt-RF']
    SMB = df_FF_daily_selected['SMB']
    HML = df_FF_daily_selected['HML']

    volatilities = []  # Create an empty list to store the volatilities

    for column in df_daily_ret_selected.columns:
        Ri_t = df_daily_ret_selected[column]

        Y = Ri_t - Rft
        X = pd.concat([Rm_t, SMB, HML], axis=1)

        X = sm.add_constant(X)

        model = sm.OLS(Y, X)
        results = model.fit()

        residuals = results.resid
        volatility = np.std(residuals)

        volatilities.append(volatility)  # Add the volatility to the list

    # Convert the list of volatilities to a numpy array
    volatilities = np.array(volatilities)

    return volatilities

# Get the first and last date in the data
first_date = df_daily_ret_reshaped.index.min()
last_date = df_daily_ret_reshaped.index.max()

# Create a date range for each month in the data
date_range = pd.date_range(start=first_date, end=last_date, freq='M')

# Calculate the volatilities for each month
monthly_volatilities = {}
for date in date_range:
    start_date = date
    end_date = date + pd.DateOffset(months=1) - pd.DateOffset(days=1)
    volatilities = calculate_idiosyncratic_volatility(start_date, end_date, df_daily_ret, df_FF_daily)
    monthly_volatilities[date] = volatilities
    
        # Convert the dictionary to a DataFrame
df_monthly_vol = pd.DataFrame(monthly_volatilities.items(), columns=['Date', 'Betas'])

# Convert the Betas column from numpy arrays to lists
df_monthly_vol['Betas'] = df_monthly_vol['Betas'].apply(list)

# Set the Date column as the index
df_monthly_vol.set_index('Date', inplace=True)

# Convert each item in the 'Betas' list to a separate column
df_monthly_vol = df_monthly_vol['Betas'].apply(pd.Series)

# Set the index of df_monthly_vol to match df_48ind
df_monthly_vol.index = df_48ind.index

# Set the column names of df_monthly_vol to match df_48ind
df_monthly_vol.columns = df_48ind.columns

In [7]:
def select_extreme_values(row, num_values=5):
    sorted_row = row.sort_values(ascending=False)
    top_values = sorted_row.head(num_values)
    bottom_values = sorted_row.tail(num_values)
    return top_values, bottom_values

In [8]:
top_bottom_values = df_MC.apply(select_extreme_values, axis=1)

In [11]:
def get_total_returns_ew(top_bottom_values, df_ret, num_positions):
    returns = []
    weight = 1.0 / num_positions
    for date, values in top_bottom_values.items():
        top_indices, bottom_indices = values[0].index, values[1].index
        top_returns = df_ret.loc[date, top_indices] * weight
        bottom_returns = df_ret.loc[date, bottom_indices] * weight * -1  # short positions have negative weight
        total_return = top_returns.sum() + bottom_returns.sum()
        returns.append(total_return)
    return returns

ret_ew = get_total_returns_ew(top_bottom_values, df_ret_shift, 5)

In [12]:
def get_total_returns_vw(top_bottom_values, df_ret):
    returns = []
    for date, values in top_bottom_values.items():
        top_indices, bottom_indices = values[0].index, values[1].index
        top_values = df_ret.loc[date, top_indices]
        bottom_values = df_ret.loc[date, bottom_indices]
        
        top_weights = top_values.abs() / top_values.abs().sum()
        bottom_weights = bottom_values.abs() / bottom_values.abs().sum()
        
        top_returns = top_values * top_weights
        bottom_returns = bottom_values * bottom_weights * -1  # short positions have negative weight
        
        total_return = top_returns.sum() + bottom_returns.sum()
        returns.append(total_return)
    return returns

ret_vw = get_total_returns_vw(top_bottom_values, df_ret_shift)

In [None]:
def get_returns(top_bottom_values, df_48ind, df_ret_shift, num_positions):
    # Part 1: Get returns
    returns_ew = []
    weight = 1.0 / num_positions
    for date, values in top_bottom_values.items():
        top_indices, bottom_indices = values[0].index, values[1].index
        top_returns = df_48ind.loc[date, top_indices] * weight
        bottom_returns = df_48ind.loc[date, bottom_indices] * weight * -1  # short positions have negative weight
        returns_ew.append((top_returns.sum(), bottom_returns.sum()))

    # Part 2: Get total returns
    total_returns_ew = [sum(x) for x in returns_ew]

    # Part 3: Get value-weighted returns
    returns_vw = []
    for date, values in top_bottom_values.items():
        top_indices, bottom_indices = values[0].index, values[1].index
        top_values = df_ret_shift.loc[date, top_indices]
        bottom_values = df_ret_shift.loc[date, bottom_indices]
        top_weights = top_values.abs() / top_values.abs().sum()
        bottom_weights = bottom_values.abs() / bottom_values.abs().sum()
        top_returns = top_values * top_weights
        bottom_returns = bottom_values * bottom_weights * -1  # short positions have negative weight
        returns_vw.append((top_returns.sum(), bottom_returns.sum()))

    # Part 4: Get total value-weighted returns
    total_returns_vw = [sum(x) for x in returns_vw]

    return returns_ew, total_returns_ew, returns_vw, total_returns_vw

returns_ew, total_returns_ew, returns_vw, total_returns_vw = get_returns(top_bottom_values, df_48ind, df_ret_shift, 5)