# Necessary Package Import

In [1]:
import pandas as pd
import numpy as np
from pandas.tseries.holiday import USFederalHolidayCalendar
import matplotlib.pyplot as plt
import seaborn as sns

# Data Import

The excel contains 4 sheets in total.

- `Universe of broad assets` -> 11-asset daily returns from 07/31/00 to 09/06/24.
- `Equity factor baskets domestic` -> 46-asset daily returns from 01/03/01 to 08/30/24 with some N/A values of several assets in the early period.
- `Equity factor baskets global` -> 52-asset daily returns from 04/14/03 to 04/11/23 with some N/A values of several assets iin the early period.
- `MPT Stats` -> 3 dataframes for each asset (benchmark): daily returns, stats, and annual returns.
    - For `Universe of broad assets (benchmark)`, the ST-DT is 08/12/02.
    - For `Equity factor baskets domestic (benchmark)`, the ST-DT is 02/04/03.
    - For `Equity factor baskets global (benchmark)`, the ST-DT is 06/02/05.

In [2]:
file_path = 'Quantitative ranking and weighting algorithm.xlsx'
xls = pd.ExcelFile(file_path)
broad_assets = pd.read_excel(xls, sheet_name=0)
eq_factor_dom = pd.read_excel(xls, sheet_name=1)
eq_factor_glb = pd.read_excel(xls, sheet_name=2)
stats_sheet = pd.read_excel(xls, sheet_name=3)
display(broad_assets.head(), eq_factor_dom.head(), eq_factor_glb.head())

Unnamed: 0,Date,Asset 1,Asset 2,Asset 3,Asset 4,Asset 5,Asset 6,Asset 7,Asset 8,Asset 9,Asset 10,Asset 11
0,2000-07-31,0.015154,0.075677,-0.002266,0.000897,0.042162,0.004952,0.037313,0.002093,-0.002477,-0.008385,-0.031787
1,2000-08-01,0.009898,-0.04914,0.007874,0.006272,-0.011478,0.040433,-0.029227,0.010693,0.009594,0.000455,0.014613
2,2000-08-02,0.000718,-0.017754,0.00153,0.003732,0.009817,-0.009512,-0.000527,0.000973,0.011188,0.001176,0.018693
3,2000-08-03,0.019197,0.076083,0.007209,0.003099,-0.003059,-0.020954,-0.030686,-0.009466,0.006064,-0.026208,0.007214
4,2000-08-04,0.014248,-0.002954,0.006556,0.006231,0.016388,-0.024891,0.012934,0.004234,-0.001179,-0.003552,0.032614


Unnamed: 0,Date,Asset 1,Asset 2,Asset 3,Asset 4,Asset 5,Asset 6,Asset 7,Asset 8,Asset 9,...,Asset 37,Asset 38,Asset 39,Asset 40,Asset 41,Asset 42,Asset 43,Asset 44,Asset 45,Asset 46
0,2001-01-03,,,,-0.0338,,,,-0.0137,,...,0.0115,0.0419,0.0675,0.111315,0.0419,0.0177,-0.0411,-0.01177,-0.0171,-0.00126
1,2001-01-04,,,,-0.0471,,,,-0.00474,,...,-0.0255,-0.0447,-0.00193,-0.0247,-0.0155,-0.00337,-0.0719,0.00715,0.00649,0.00302
2,2001-01-05,,,,0.0231,,,,-0.000782,,...,-0.0301,-0.0328,-0.038,-0.0665,-0.0196,-0.0294,0.00541,0.00556,0.00461,0.00327
3,2001-01-08,,,,0.0138,,,,0.00892,,...,0.00268,0.00247,-0.000688,0.0115,0.005,-0.00917,0.0179,0.00271,0.00035,0.00142
4,2001-01-09,,,,-0.0126,,,,-0.0104,,...,0.0041,-0.00766,0.0324,0.0113,-0.00907,0.00821,-0.00934,-0.00386,-0.00261,-0.00183


Unnamed: 0,Date,Asset 1,Asset 2,Asset 3,Asset 4,Asset 5,Asset 6,Asset 7,Asset 8,Asset 9,...,Asset 43,Asset 44,Asset 45,Asset 46,Asset 47,Asset 48,Asset 49,Asset 50,Asset 51,Asset 52
0,2003-04-14,0.005857,0.003495,,0.006887,,,0.008971,0.004245,,...,,,,-0.00275,-0.00085,-0.00281,,,0.020654,0.020384
1,2003-04-15,0.039083,0.02697,,0.024801,,,0.012464,0.017988,,...,,,,0.0054,0.00073,0.00352,,,0.009331,0.009604
2,2003-04-16,0.019012,0.007082,,0.004513,,,0.001826,-0.011566,,...,,,,0.00412,0.00012,0.00222,,,-0.017042,-0.002283
3,2003-04-17,0.0197,0.023314,,0.012538,,,-0.002422,0.018749,,...,,,,0.00159,-0.00085,-0.00128,,,0.014844,0.022883
4,2003-04-21,0.002787,0.044131,,0.024778,,,0.009728,0.012977,,...,,,,-0.00216,0.00061,-0.00187,,,0.001005,0.003729


### 1. Data Preparation
#### 1.1 Broad Assets (Sheet: broad_assets)

- Confirm the date range. Based on the benchmark, it starts from 08/12/2002 to 09/06/2024.
- Convert data into a weekly frequency for the broad assets.
- Holiday adjustment trading logic:
    1. Identify U.S. Federal Holidays: we checked for standard U.S. federal holidays that typically affect trading using package `USFederalHolidayCalendar`, and then we captured the holidays listed below:
        - New Year's Day (January 1)
        - Independence Day (July 4)
        - Thanksgiving Day (fourth Thursday in November)
        - Christmas Day (December 25)
        - Memorial Day (last Monday in May)
        - Labor Day (first Monday in September)
        - Presidents' Day (third Monday in February)

    2. Check if Holiday Falls on a Rebalancing Monday: If the identified holiday falls on a rebalancing Monday, we will adjust the rebalancing date to the next available trading day, typically Tuesday or Wednesday.

    3. Ensure Weekly Gaps are Maintained: Even after adjusting for holidays, we must ensure the rebalancing periods still span roughly a week (e.g., adjust to Tuesday one week, and back to Monday the following week).

In [4]:
broad_assets['Date'] = pd.to_datetime(broad_assets['Date'])
start_date = pd.Timestamp('2002-08-12')
broad_assets = broad_assets[broad_assets['Date'] >= start_date]
broad_assets.set_index('Date', inplace=True)

weekly_returns = broad_assets.resample('W-MON', label='left', closed='left').apply(lambda x: (x + 1).prod() - 1)
weekly_returns.reset_index(inplace=True)
weekly_returns

Unnamed: 0,Date,Asset 1,Asset 2,Asset 3,Asset 4,Asset 5,Asset 6,Asset 7,Asset 8,Asset 9,Asset 10,Asset 11
0,2002-08-12,0.043421,0.122573,-0.001281,-0.016690,0.037419,0.043725,0.001073,0.036815,-0.027764,-0.004438,0.099801
1,2002-08-19,0.024508,0.025306,0.025571,0.017949,0.019668,0.001863,0.017212,0.022577,0.023542,-0.044443,-0.007836
2,2002-08-26,-0.052367,-0.133730,0.017482,0.012286,-0.046635,0.037325,-0.059524,-0.018172,-0.018949,0.036719,0.018215
3,2002-09-02,-0.049785,-0.048782,0.016953,0.016174,0.000352,-0.015561,-0.055104,-0.063007,-0.002808,0.047992,0.042825
4,2002-09-09,-0.009469,0.001476,0.033178,0.014859,-0.008024,-0.006973,-0.060685,0.023402,0.021201,-0.021669,-0.025293
...,...,...,...,...,...,...,...,...,...,...,...,...
1147,2024-08-05,-0.003170,0.004955,-0.042034,-0.020935,-0.029227,-0.003668,0.004093,0.019678,-0.000718,-0.011385,0.024481
1148,2024-08-12,0.079746,0.109137,0.016432,0.002680,0.058302,0.002246,0.064435,0.063662,-0.012633,0.062138,0.001468
1149,2024-08-19,0.028158,0.020263,0.016296,0.010215,0.070880,0.071190,0.055672,0.018223,-0.033153,0.002323,0.001540
1150,2024-08-26,0.004289,-0.016023,-0.033388,-0.011248,-0.001519,0.007773,0.007896,-0.022477,0.020246,-0.008527,-0.014907


In [5]:
us_holidays = USFederalHolidayCalendar().holidays(start=weekly_returns['Date'].min(), end=weekly_returns['Date'].max())

def adjust_for_holiday(date):
    if date in us_holidays:
        # If the date is a holiday, shift it to the next available non-holiday weekday (usually Tuesday)
        new_date = date + pd.DateOffset(days=1)
        while new_date in us_holidays:
            new_date += pd.DateOffset(days=1)
        return new_date
    return date

weekly_returns['Adjusted Date'] = weekly_returns['Date'].apply(adjust_for_holiday)

# Identify which dates were adjusted
adjusted_dates = weekly_returns[weekly_returns['Date'] != weekly_returns['Adjusted Date']]

# Drop the original 'Date' column and rename 'Adjusted Date' to 'Date'
weekly_returns = weekly_returns.drop(columns=['Date']).rename(columns={'Adjusted Date': 'Date'})
weekly_returns.to_csv('broad_assets_rtn.csv')