In [None]:
import pandas as pd
import calendar
import numpy as np
from sklearn.decomposition import PCA
from datetime import datetime

Loading and data pre-processing

In [ ]:
# loading dataframes 

r_daily = pd.read_csv('managed_portfolios_anom_d_50.csv')

# drop all momentum factors or factors that are constructed based on momentum

momentum_list = ['r_mom', 'r_indmom', 'r_valmom', 'r_valmomprof', 'r_mom12', 'r_momrev', 'r_indmomrev']
r_daily.drop(columns=momentum_list, inplace=True)

# set date to datetime format and set the date to the index 

r_daily['date'] = pd.to_datetime(r_daily['date'])
r_daily.set_index('date', inplace=True)

# missing list = ['dur', 'divg', 'invaci', 'ipo'] -> deze worden gemaakt uit daily stock data doormiddel van een signals list?

# following the procedure in the paper, if there are observations missing we set them to 0. 

r_daily.fillna(0, inplace=True)

# create a list of factors for later analysis purposes 

factors = [col for col in r_daily.columns if col.startswith('r_')]
r_daily.drop(columns=['rme', 're_ew'], inplace=True)

# create a monthly return dataframe for later analysis purposes 

r_monthly = r_daily.resample('M').sum()
r_monthly.index = r_monthly.index.strftime('%Y-%m')

component analysis and calculation of returns

In [ ]:
# initialize pca model 

pca = PCA(n_components=len(factors))

# select our start date 

start_date = pd.to_datetime("1963-07-01")

# create an empty dataframe to store the average return for each PC from t until t-11

pc_avg_df = pd.DataFrame()

# create an empty list for the pc return dataframes. These will be concated in a later stage to one large dataframe

pc_return_dfs = []

# create our loop set up, this is actually an expanding PCA analysis. In each iteration a new month is added to the dataset and the return is computed. 

for year in range(1973, 1975):
    # as our out of sample procedure starts July 1973, we start in the 7th month in 1973
    for mo in range(6,13) if year == 1973 else range(1, 13):
        # first we have to find the last month of the day. For this we use the calender function with inputs from the loop variables
        last_day = calendar.monthrange(year, mo)[1]

        # we select our new end_date variable for which the PCA analysis is done, also with inputs from our loop and the last_day variable
        end_date = pd.to_datetime(f'{year}-{mo}-{last_day}')

        t_dt = pd.to_datetime(f'{year}-{mo}')
        t = t_dt.strftime('%Y-%m')

        # we select the datarange from our dataset (July 1963 = start_date until our defined end_date) and we fit the model
        pca_data = r_daily.loc[start_date:end_date]
        pca.fit(pca_data)

        # we extract the principal components. These principal components are put in a new dataframe for later analysis. 

        principal_components = pca.components_
        components_df = pd.DataFrame(data=principal_components, index=factors, columns=[f"PC{i+1}" for i in range(len(factors))])

        # calculating return for month t+1. If mo = 12, then year will increment with 1. 

        t_plus_1_year = year + 1 if mo == 12 else year
        t_plus_1_month = (mo % 12) + 1

        # creating a datetime variable for the month t+1 and storing this in our pc_return_data variable

        t_plus_1_dt =pd.to_datetime(f'{t_plus_1_year}-{t_plus_1_month}')
        t_plus_1 = t_plus_1_dt.strftime('%Y-%m')

        pc_return_data = {'date': t_plus_1}


        # in this loop we calculate the monthly factor returns (f) using the principal components and returns

        for f in range(len(factors)):
            # select our factor and extract its principal component from principal_df and its return from r_daily for all observations in month mo 
            pc = components_df.iloc[:, f]
            r_month = r_monthly.loc[t]

            # multiply the principal components with the returns and sum them up to get PC factor return for month mo 
            pc_return = (pc*r_month).sum()

            # place this in our dictionary for later transposing to dataframe

            pc_return_data[components_df.columns[f]] = pc_return

            r_pc_month_n_list = []

            for n in range(1, 12):
                # calculate the datetime for t - n
                t_minus_n_dt = t_dt - pd.DateOffset(months=n)

                # transpose it to our YYYY-MM format
                t_minus_n = t_minus_n_dt.strftime('%Y-%m')

                # select the return corresponding to our month t-n
                r_month_n = r_monthly.shift(n).loc[t_minus_n]
                pc_return_n = (pc*r_month_n).sum()
                r_pc_month_n_list.append(pc_return_n)

            r_pc_month_mean = (np.mean(r_pc_month_n_list))
            pc_avg_df.loc[t, f'PC{f+1}'] = r_pc_month_mean


        pc_return_df = pd.DataFrame.from_dict(pc_return_data, orient='index').T
        pc_return_df.set_index('date', inplace=True)
        pc_return_dfs.append(pc_return_df)

r_pc = pd.concat(pc_return_dfs)
print(r_pc)
print(pc_avg_df)

Demean and leverage our dataframes

In [ ]:
# we start with demeaning and leveraging our PC returns

start_date_dt = pd.to_datetime("1963-07-01")
start_date = start_date_dt.strftime('%Y-%m')
lev_dfs = []

for year in range(1973, 2018):
    # as our out of sample procedure starts July 1973, we start in the 8th month in 1973 as we need to demean our PC factor up to month t
    for mo in range(8,13) if year == 1973 else range(1, 13):
        # first we set our t variable to the current year and month from our loop
        t_dt = pd.to_datetime(f'{year}-{mo}')
        t = t_dt.strftime('%Y-%m')
        #print(t)

        # calculate the variance of the individual factor returns up until month t 
        r_indiv_f_t = r_monthly.loc[start_date:t]
        var_indiv_f_t = r_indiv_f_t.var(axis=0)
        avg_var_indiv_f_t = var_indiv_f_t.mean()
        #print(avg_var_indiv_f_t)

        # calculate the mean and variance of the PC factors up until month t 

        r_pc_t = r_pc.loc[:t]
        demeaned_r_pc_t = r_pc_t - r_pc_t.mean()

        # calculate the leverage factor and multiply this with the demeaned 

        leverage_t = np.sqrt(avg_var_indiv_f_t / r_pc_t.var(axis=0))
        lev_r_pc_t = demeaned_r_pc_t * leverage_t
        lev_df = lev_r_pc_t.loc[t].to_frame().T
        lev_dfs.append(lev_df)

lev_r_pc = pd.concat(lev_dfs)
print(lev_r_pc)

In [ ]:
# then we are also leveraging the individual factor momentum returns (as described in paper)

start_date_dt = pd.to_datetime("1963-07-01")
start_date = start_date_dt.strftime('%Y-%m')
indiv_lev_dfs = []

for year in range(1973, 2018):
    # as our out of sample procedure starts July 1973, we start in the 8th month in 1973 as we need to demean our PC factor up to month t
    for mo in range(7,13) if year == 1973 else range(1, 13):
        # first we set our t variable to the current year and month from our loop
        t_dt = pd.to_datetime(f'{year}-{mo}')
        t = t_dt.strftime('%Y-%m')


        # calculate the variance of the individual factor returns up until month t 
        r_indiv_f_t = r_monthly.loc[start_date:t]
        var_indiv_f_t = r_indiv_f_t.var(axis=0)
        avg_var_indiv_f_t = var_indiv_f_t.mean()

        # calculate the leverage factor 
        leverage_factor = np.sqrt(avg_var_indiv_f_t / var_indiv_f_t)


        # multiply the leverage factor with the returns
        indiv_lev_df = (r_monthly.loc[t].to_frame().T) * leverage_factor
        indiv_lev_dfs.append(indiv_lev_df)

lev_r_indiv = pd.concat(indiv_lev_dfs)
lev_r_indiv.fillna(0, inplace=True)
print(lev_r_indiv)

Constructing the momentum strategies

In [ ]:
# constructing the PC momentum strategies (so PC 1-10, PC 11-20, etc)

positive_returns_PC = pc_avg_df > 0
negative_returns_PC = pc_avg_df < 0


long_portfolio_PC = positive_returns_PC.astype(int)
short_portfolio_PC = negative_returns_PC.astype(int)

r_pc_1_10 = ['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9', 'PC10']
r_pc_11_20 = ['PC11', 'PC12', 'PC13', 'PC14', 'PC15', 'PC16', 'PC17', 'PC18', 'PC19', 'PC20']
r_pc_21_30 = ['PC21', 'PC22', 'PC23', 'PC24', 'PC25', 'PC26', 'PC27', 'PC28', 'PC29', 'PC30']
r_pc_31_40 = ['PC31', 'PC32', 'PC33', 'PC34', 'PC35', 'PC36', 'PC37', 'PC38', 'PC39', 'PC40']
r_pc_41_43 = ['PC41', 'PC42', 'PC43']

pc_mom_return_1_10 = (long_portfolio_PC[r_pc_1_10].shift(-1) *
                      lev_r_pc[r_pc_1_10]).sum(axis=1)- (short_portfolio_PC[r_pc_1_10].shift(-1) * lev_r_pc[r_pc_1_10]).sum(axis=1)

pc_mom_return_1_10 = pc_mom_return_1_10.shift(1)

pc_mom_return_11_21 = (long_portfolio_PC[r_pc_11_20].shift(-1) *
                       lev_r_pc[r_pc_11_20]).sum(axis=1)- (short_portfolio_PC[r_pc_11_20].shift(-1) * lev_r_pc[r_pc_11_20]).sum(axis=1)

pc_mom_return_11_21 = pc_mom_return_11_21.shift(1)

pc_mom_return_21_31 = (long_portfolio_PC[r_pc_21_30].shift(-1) *
                       lev_r_pc[r_pc_21_30]).sum(axis=1)- (short_portfolio_PC[r_pc_21_30].shift(-1) * lev_r_pc[r_pc_21_30]).sum(axis=1)

pc_mom_return_21_31 = pc_mom_return_21_31.shift(1)

pc_mom_return_31_41 = (long_portfolio_PC[r_pc_31_40].shift(-1) *
                       lev_r_pc[r_pc_31_40]).sum(axis=1)- (short_portfolio_PC[r_pc_31_40].shift(-1) * lev_r_pc[r_pc_31_40]).sum(axis=1)

pc_mom_return_31_41 = pc_mom_return_31_41.shift(1)

pc_mom_return_41_43 = (long_portfolio_PC[r_pc_41_43].shift(-1) *
                       lev_r_pc[r_pc_41_43]).sum(axis=1)- (short_portfolio_PC[r_pc_41_43].shift(-1) * lev_r_pc[r_pc_41_43]).sum(axis=1)

pc_mom_return_41_43 = pc_mom_return_41_43.shift(1)

In [ ]:
# constructing the momentum factor of individual factors (weet niet of we deze nodig hebben) 

average_returns_indiv = r_monthly.rolling(window=12).mean()


positive_returns_indiv = average_returns_indiv > 0
negative_returns_indiv = average_returns_indiv < 0

long_portfolio = positive_returns_indiv.astype(int)
short_portfolio = negative_returns_indiv.astype(int)


mom_strategy_return = (long_portfolio.shift(-1) * lev_r_indiv).sum(axis=1) - (short_portfolio.shift(-1) * lev_r_indiv).sum(axis=1)
mom_strategy_return = mom_strategy_return.shift(1)
print(mom_strategy_return)