In [154]:

import pandas as pd
from sklearn.cross_decomposition import PLSRegression
import calendar
import numpy as np

#loading dataframes 

File_url_d = 'https://raw.githubusercontent.com/Sebasleen/Seminargroup/Seminar/managed_portfolios_anom_d_50.csv'

r_daily = pd.read_csv(File_url_d)

# drop all momentum factors or factors that are constructed based on momentum

r_daily['date'] = pd.to_datetime(r_daily['date'])
r_daily.set_index('date', inplace=True)

# select our Y variable of interest 

y_daily = r_daily['r_mom']
y_monthly = y_daily.resample('M').sum()
y_monthly.index = y_monthly.index.strftime('%Y-%m')


momentum_list = ['r_mom', 'r_indmom', 'r_valmom', 'r_valmomprof', 'r_mom12', 'r_momrev', 'r_indmomrev']



# following the procedure in the paper, if there are observations missing we set them to 0. 

r_daily.fillna(0, inplace=True)
r_daily.drop(columns=['rme', 're_ew','r_mom', 'r_indmom', 'r_valmom', 'r_valmomprof', 'r_mom12', 'r_momrev', 'r_indmomrev'], inplace=True)



# create a list of factors for later analysis purposes 

factors = [col for col in r_daily.columns if col.startswith('r_')]

 
# create a monthly return dataframe for later analysis purposes 

r_monthly = r_daily.resample('M').sum()
r_monthly.index = r_monthly.index.strftime('%Y-%m')




In [182]:

# select our start date 

start_date = pd.to_datetime("1963-07-01")

pls_avg_df = pd.DataFrame()

# create an empty list for the pls return dataframes. These will be concated in a later stage to one large dataframe

pls_return_dfs = []


# Create an empty dataframe for monthly PLS returns
pls_return_df = pd.DataFrame(columns=['date'] + [f"PLS{i+1}" for i in range(len(factors))])

# Initialize an empty list to store PLS weights
pls_weights_list = []

# Loop through years and months
for year in range(1973, 2018):
    # as our out of sample procedure starts July 1973, we start in the 7th month in 1973
    for mo in range(6,13) if year == 1973 else range(1, 13):
        # first we have to find the last month of the day. For this we use the calender function with inputs from the loop variables
        last_day = calendar.monthrange(year, mo)[1]

        # we select our new end_date variable for which the Pls analysis is done, also with inputs from our loop and the last_day variable
        end_date = pd.to_datetime(f'{year}-{mo}-{last_day}')

        t_dt = pd.to_datetime(f'{year}-{mo}')
        t = t_dt.strftime('%Y-%m')

        # Select data range for PLS analysis
        pls_data = r_daily.loc[start_date:end_date]
        pls_data_y = y_daily.loc[start_date:end_date]

        # Separate features (X) and target (y)
        X = pls_data
        y = pls_data_y

        # Fit PLS model
        pls = PLSRegression(n_components=43)  
        pls.fit(X, y)


        # Extract and store PLS weights
        pls_weights = pd.DataFrame(
            np.array(pls.x_weights_),  
            index=factors,
            columns=[f"PLS{i+1}" for i in range(43)] 
        )
        # calculating return for month t+1. If mo = 12, then year will increment with 1. 

        t_plus_1_year = year + 1 if mo == 12 else year
        t_plus_1_month = (mo % 12) + 1

        # creating a datetime variable for the month t+1 and storing this in our pls_return_data variable

        t_plus_1_dt =pd.to_datetime(f'{t_plus_1_year}-{t_plus_1_month}')
        t_plus_1 = t_plus_1_dt.strftime('%Y-%m')

        pls_return_data = {'date': t_plus_1}


        # in this loop we calculate the monthly factor returns (f) using the principal components and returns

        for f in range(len(factors)):
            # select our factor and extract its principal component from principal_df and its return from r_daily for all observations in month mo 
            pls = pls_weights.iloc[:, f]
            r_month = r_monthly.loc[t]

            # multiply the principal components with the returns and sum them up to get Pls factor return for month mo 
            pls_return = (pls*r_month).sum()

            # place this in our dictionary for later transposing to dataframe

            pls_return_data[pls_weights.columns[f]] = pls_return

            r_pls_month_n_list = []

            for n in range(1, 12):
                # calculate the datetime for t - n
                t_minus_n_dt = t_dt - pd.DateOffset(months=n)

                # transpose it to our YYYY-MM format
                t_minus_n = t_minus_n_dt.strftime('%Y-%m')

                # select the return corresponding to our month t-n
                r_month_n = r_monthly.shift(n).loc[t_minus_n]
                pls_return_n = (pls*r_month_n).sum()
                r_pls_month_n_list.append(pls_return_n)

            r_pls_month_mean = (np.mean(r_pls_month_n_list))
            pls_avg_df.loc[t, f'PLS{f+1}'] = r_pls_month_mean


        pls_return_df = pd.DataFrame.from_dict(pls_return_data, orient='index').T
        pls_return_df.set_index('date', inplace=True)
        pls_return_dfs.append(pls_return_df)

r_pls = pd.concat(pls_return_dfs)
print(r_pls)
print(pls_avg_df)


             PLS1      PLS2      PLS3      PLS4      PLS5      PLS6      PLS7  \
date                                                                            
1973-07 -0.055711  0.129687  0.037249 -0.024036 -0.020074 -0.021063   0.04419   
1973-08   0.02762  -0.50513 -0.212508  0.007123  0.097147 -0.138669  0.084288   
1973-09  -0.00516  0.054417  0.009852 -0.019354  0.007235  0.006986  0.028687   
1973-10 -0.022413 -0.085402  0.061641 -0.181277  0.038235  0.056513   0.03602   
1973-11 -0.000221  0.222399  0.037388 -0.108471 -0.005582  0.057491 -0.041453   
...           ...       ...       ...       ...       ...       ...       ...   
2017-09  0.066839 -0.081665  0.019334  0.020837  0.026032  0.060983 -0.023135   
2017-10 -0.027442   0.05729 -0.047574 -0.047265 -0.096295 -0.060849 -0.021593   
2017-11   0.06784 -0.058746  0.044482   0.05033  0.004865  0.026983 -0.037586   
2017-12  0.005897  0.060105  0.100813  0.085414 -0.059364 -0.032266  0.051222   
2018-01 -0.028599  0.016518 

In [194]:

# Define the start date
start_date_dt = pd.to_datetime("1963-07-01")
start_date = start_date_dt.strftime('%Y-%m')

# Create an empty list to store leverage-adjusted PLS factors
lev_pls_dfs = []

for year in range(1973, 2018):
    # Loop through months
    for mo in range(7, 13) if year == 1973 else range(1, 13):
        # Define the current date
        t_dt = pd.to_datetime(f'{year}-{mo}')
        t = t_dt.strftime('%Y-%m')

        # Calculate the variance of the individual factor returns up until month t 
        r_indiv_f_t = r_monthly.loc[start_date:t]
        var_indiv_f_t = r_indiv_f_t.var(axis=0)
        avg_var_indiv_f_t = var_indiv_f_t.mean()

        # Calculate the mean and variance of the PLS factors up until month t 
        r_pls_t = r_pls.loc[:t]
        demeaned_r_pls_t = r_pls_t.loc[t].to_frame().T - r_pls_t.mean()

        # Calculate the leverage factor
        leverage_t = np.sqrt(avg_var_indiv_f_t / r_pls_t.var(axis=0))

        # Multiply leverage factor with the demeaned PLS factors
        lev_r_pls_t = demeaned_r_pls_t * leverage_t

        # Append the leveraged PLS factors to the list
        lev_pls_dfs.append(lev_r_pls_t)

# Concatenate the leveraged PLS factors into a single DataFrame
lev_r_pls = pd.concat(lev_pls_dfs)

# Check average returns are close to zero
avg_returns_lev_pls = lev_r_pls.mean(axis=0)
max_abs_avg_return = np.max(np.abs(avg_returns_lev_pls))
tolerance = 0.001  # Adjust tolerance as needed

# Check if variances are equal (use appropriate checks based on your chosen comparison)
lev_r_pls_vars = lev_r_pls.var(axis=0)

if max_abs_avg_return <= tolerance and np.std(lev_r_pls_vars) < tolerance:
    print("Average returns are close to zero and variances are equal within chosen tolerances.")
else:
    print("Some conditions might not be met. Investigate further.")
print(avg_returns_lev_pls)

Some conditions might not be met. Investigate further.
PLS1     0.000823
PLS2    -0.001276
PLS3     0.000675
PLS4     0.010758
PLS5    -0.002898
PLS6     0.000298
PLS7    -0.001293
PLS8    -0.004450
PLS9    -0.000069
PLS10    0.004192
PLS11    0.001809
PLS12   -0.001431
PLS13   -0.001688
PLS14    0.000557
PLS15   -0.003934
PLS16   -0.001967
PLS17   -0.003361
PLS18    0.000703
PLS19   -0.005010
PLS20   -0.001797
PLS21   -0.003605
PLS22    0.001662
PLS23   -0.005580
PLS24   -0.004361
PLS25   -0.000108
PLS26    0.001353
PLS27   -0.003533
PLS28    0.001919
PLS29   -0.000385
PLS30    0.002888
PLS31   -0.002937
PLS32    0.006248
PLS33    0.003680
PLS34   -0.000517
PLS35    0.000251
PLS36    0.002222
PLS37   -0.000095
PLS38    0.007622
PLS39   -0.001885
PLS40   -0.004254
PLS41    0.004402
PLS42   -0.000103
PLS43   -0.001350
dtype: float64


In [184]:
# Define the start date
start_date_dt = pd.to_datetime("1963-07-01")
start_date = start_date_dt.strftime('%Y-%m')

# Create an empty list to store leveraged individual factor momentum returns
indiv_lev_dfs = []

for year in range(1973, 2018):
    # Loop through months
    for mo in range(7, 13) if year == 1973 else range(1, 13):
        # Define the current date
        t_dt = pd.to_datetime(f'{year}-{mo}')
        t = t_dt.strftime('%Y-%m')

        # Calculate the variance of the individual factor returns up until month t 
        r_indiv_f_t = r_monthly.loc[start_date:t]
        var_indiv_f_t = r_indiv_f_t.var(axis=0)
        avg_var_indiv_f_t = var_indiv_f_t.mean()

        # Calculate the leverage factor 
        leverage_factor = np.sqrt(avg_var_indiv_f_t / var_indiv_f_t)

        # Multiply the leverage factor with the returns
        indiv_lev_df = r_monthly.loc[t].to_frame().T * leverage_factor

        # Append the leveraged individual factor momentum returns to the list
        indiv_lev_dfs.append(indiv_lev_df)

# Concatenate the leveraged individual factor momentum returns into a single DataFrame
lev_r_indiv = pd.concat(indiv_lev_dfs)
lev_r_indiv.fillna(0, inplace=True)

# Print the DataFrame
print(lev_r_indiv)

           r_size   r_value    r_prof  r_valprof  r_fscore  r_debtiss  \
1973-07 -0.048953 -0.125946  0.129852   0.033222  0.031672  -0.303715   
1973-08  0.020838  0.009044 -0.005810  -0.016223  0.035741  -0.004212   
1973-09 -0.037885  0.016908  0.009182  -0.005630  0.085452  -0.205952   
1973-10 -0.005048  0.044587 -0.034781   0.003408 -0.006801  -0.085912   
1973-11  0.087170  0.081589 -0.126876  -0.068979 -0.102567   0.162956   
...           ...       ...       ...        ...       ...        ...   
2017-08  0.025351 -0.029649  0.036218  -0.024006 -0.004275   0.047617   
2017-09 -0.036670  0.021026 -0.040234  -0.010097 -0.050019   0.001135   
2017-10 -0.005193 -0.011033  0.018336  -0.007419  0.045026   0.029219   
2017-11  0.008796 -0.013538  0.042746   0.050914  0.016230   0.015697   
2017-12  0.009780 -0.001831  0.006061   0.003697 -0.055648   0.014643   

         r_repurch   r_nissa  r_accruals  r_growth  ...   r_strev    r_ivol  \
1973-07   0.173426  0.015890    0.028854  0.

In [192]:
# Define your ranges for PLS factors
r_pls_1_10 = ['PLS1', 'PLS2', 'PLS3', 'PLS4', 'PLS5', 'PLS6', 'PLS7', 'PLS8', 'PLS9', 'PLS10']

# Initialize a list to store the momentum returns for each range
pls_mom_returns = []

# Loop through each range of PLS factors
for r in [r_pls_1_10]:
    # Calculate the long and short portfolios based on positive and negative average returns
    positive_returns_pls = pls_avg_df[r] > 0
    negative_returns_pls = pls_avg_df[r] < 0

    long_portfolio_pls = positive_returns_pls.astype(int)
    short_portfolio_pls = negative_returns_pls.astype(int)

    # Calculate momentum returns for the current range
    mom_return = (long_portfolio_pls.shift(-1) * lev_r_pls[r]).sum(axis=1) - \
                 (short_portfolio_pls.shift(-1) * lev_r_pls[r]).sum(axis=1)
    # Shift the returns by one period
    mom_return = mom_return.shift(1)

    # Append the momentum returns to the list
    pls_mom_returns.append(mom_return)

# Concatenate the momentum returns for different ranges into a single DataFrame
pls_mom_returns_df = pd.concat(pls_mom_returns, axis=1)

# Calculate the average momentum return across different ranges
average_mom_return = pls_mom_returns_df.mean(axis=1)

print(average_mom_return)



1973-06         NaN
1973-07    0.000000
1973-08    0.000000
1973-09    0.110818
1973-10    0.122549
             ...   
2017-08   -0.072997
2017-09    0.098384
2017-10    0.103905
2017-11    0.289313
2017-12    0.097110
Length: 535, dtype: float64


In [189]:
print(pls_mom_returns_df)

                0   Average
1973-06       NaN       NaN
1973-07  0.000000  0.000000
1973-08  0.000000  0.000000
1973-09  0.110818  0.110818
1973-10  0.122549  0.122549
...           ...       ...
2017-08 -0.072997 -0.072997
2017-09  0.098384  0.098384
2017-10  0.103905  0.103905
2017-11  0.289313  0.289313
2017-12  0.097110  0.097110

[535 rows x 2 columns]
