# Import packages and load data

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

# Connect with Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Read the signal data for all the 10 years
# Here, next_day_return refers to the daily return for next day
# weekly_return refers to the weekly return for next week
# monthly_return refers to the monthly return for next month

file_path = "/content/drive/My Drive/Thesis/sentiment_returns_all_years.parquet"
df = pd.read_parquet(file_path)
df

Unnamed: 0,gvkey,date,next_day_return,weekly_return,monthly_return,Capital Structure,Company Performance,Competition & Challenge,ESG,Risk,...,Capital Structure_QA,Company Performance_QA,Competition & Challenge_QA,ESG_QA,Strategy and Operation_QA,Trend_QA,Risk_QA,Technology_QA,Technology_Pre,Urgency and Emphasis_QA
0,1004,2015-03-30,-0.024158,-0.004576,-0.012006,0.166549,0.390272,0.499978,,-0.127812,...,-0.000141,0.090313,0.999999,,0.611526,0.599943,-0.315781,,,
1,1004,2015-07-14,-0.043210,-0.077526,-0.108956,0.480665,0.146112,-0.026671,,0.344811,...,,0.272562,-0.026671,,0.326759,1.000000,0.402183,,,
2,1004,2015-09-24,-0.095215,0.073506,0.219991,-0.000061,0.223542,-0.046888,0.001573,-0.098396,...,-0.000230,0.300677,-0.046888,,0.389017,0.249848,-0.122991,,,
3,1004,2015-12-17,0.068606,0.080456,-0.030069,0.268410,0.324073,0.186048,-0.000023,0.391562,...,0.020225,0.999996,0.465299,-0.000023,-0.198682,0.997565,0.499726,,,
4,1004,2016-03-22,0.049681,0.025877,0.060966,0.276640,0.652487,-0.000239,0.998155,0.186078,...,0.001974,0.324465,-0.000239,0.998155,-0.148496,0.206847,-0.464402,,,-0.003277
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119227,349337,2023-11-14,-0.014545,-0.045801,0.061068,,0.079446,,,0.037070,...,,0.012824,,,,,,0.869467,0.591287,
119228,349337,2024-03-27,0.004717,0.117372,-0.131454,0.500326,0.238157,0.085643,,0.164576,...,,0.454525,,,0.020255,-0.000043,0.000392,0.213412,0.693661,
119229,349337,2024-05-07,0.005814,0.023392,-0.093567,-0.278886,0.285979,-0.076892,0.994889,0.225685,...,-0.000473,-0.083236,,0.989779,-0.001855,0.023601,0.008075,0.000067,0.488974,
119230,349337,2024-08-12,-0.096386,-0.038710,-0.167743,0.143871,0.305791,-0.130532,,0.211139,...,,0.317151,,,0.227898,,0.333315,-0.238407,0.701661,


In [3]:
file_path = "/content/drive/My Drive/Thesis/F-F_Research_Data_5_Factors_2x3.csv"

# Get FF5F montly data
FF_5F_monthly = pd.read_csv(file_path)
FF_5F_monthly

Unnamed: 0.1,Unnamed: 0,Mkt-RF,SMB,HML,RMW,CMA,RF
0,196307,-0.39,-0.41,-0.97,0.68,-1.18,0.27
1,196308,5.07,-0.80,1.80,0.36,-0.35,0.25
2,196309,-1.57,-0.52,0.13,-0.71,0.29,0.27
3,196310,2.53,-1.39,-0.10,2.80,-2.01,0.29
4,196311,-0.85,-0.88,1.75,-0.51,2.24,0.27
...,...,...,...,...,...,...,...
733,202408,1.61,-3.65,-1.13,0.85,0.86,0.48
734,202409,1.74,-1.02,-2.59,0.04,-0.26,0.40
735,202410,-0.97,-0.88,0.89,-1.38,1.03,0.39
736,202411,6.51,4.78,-0.05,-2.62,-2.17,0.40


In [4]:
file_path = "/content/drive/My Drive/Thesis/F-F_Research_Data_Factors_weekly.csv"

# Get FF3F weekly data
FF_3F_weekly = pd.read_csv(file_path)
FF_3F_weekly

Unnamed: 0.1,Unnamed: 0,Mkt-RF,SMB,HML,RF
0,19260702,1.60,-0.62,-0.83,0.056
1,19260710,0.36,-0.88,0.31,0.056
2,19260717,1.01,0.59,-1.44,0.056
3,19260724,-2.05,0.10,-0.18,0.056
4,19260731,3.04,-1.82,-0.90,0.056
...,...,...,...,...,...
5134,20241129,0.95,0.57,-0.62,0.099
5135,20241206,1.04,-0.20,-3.03,0.092
5136,20241213,-0.93,-0.80,-0.16,0.092
5137,20241220,-2.28,-1.52,-1.42,0.092


# weekly rebalanced portfolio

In [5]:
def compute_weekly_long_short_strategy(
    df,
    signal_cols,
    top_q=0.8,
    bottom_q=0.2,
    date_col='date',
    return_col='weekly_return'
):
    """
    Build a weekly cross-sectional long-short strategy and calculate portfolio returns, aligned by next week's performance.

    Each row in the returned DataFrame represents: "positions established in the previous week" and "returns realized in the current week."
    """

    df = df.copy()
    df[date_col] = pd.to_datetime(df[date_col])

    # Use Monday of the current week as the identifier
    df['week_id'] = df[date_col] - pd.to_timedelta(df[date_col].dt.weekday, unit='d')

    # List of all unique weeks
    unique_weeks = sorted(df['week_id'].unique())

    # Initialize the strategy result table
    strategy_df = pd.DataFrame({'week_id': unique_weeks})
    strategy_df['previous_week_id'] = pd.to_datetime(strategy_df['week_id']) - pd.Timedelta(days=7)
    strategy_df['week_end'] = strategy_df['week_id'] + pd.Timedelta(days=4)

    for signal in signal_cols:
        tag_col = f'{signal}_position'

        # Assign long/short/neutral tags based on cross-sectional ranking
        df[tag_col] = df.groupby('week_id')[signal].transform(
            lambda x: np.where(
                x >= x.quantile(top_q), 1,
                np.where(x <= x.quantile(bottom_q), -1, 0)
            )
        )

        # Strategy return = position established this week × return realized next week
        df[f'{signal}_strategy_return'] = df[tag_col] * df[return_col]

        # Aggregate to weekly portfolio performance
        weekly_returns = df.groupby('week_id')[f'{signal}_strategy_return'].mean()

        # Map returns to the strategy table
        strategy_df[signal] = strategy_df['previous_week_id'].map(weekly_returns)

    return strategy_df


In [6]:
# Select signal columns
signal_cols = [col for col in df.columns if col not in ['gvkey', 'date', 'next_day_return','weekly_return', 'monthly_return', 'week_id']]

# Construct weely rebalanced long-short portfolios
weekly_result_df = compute_weekly_long_short_strategy(
    df,
    signal_cols,
    top_q=0.9,
    bottom_q=0.1,
    date_col='date',
    return_col='weekly_return'
)
weekly_result_df

Unnamed: 0,week_id,previous_week_id,week_end,Capital Structure,Company Performance,Competition & Challenge,ESG,Risk,Strategy and Operation,Technology,...,Capital Structure_QA,Company Performance_QA,Competition & Challenge_QA,ESG_QA,Strategy and Operation_QA,Trend_QA,Risk_QA,Technology_QA,Technology_Pre,Urgency and Emphasis_QA
0,2015-01-05,2014-12-29,2015-01-09,,,,,,,,...,,,,,,,,,,
1,2015-01-12,2015-01-05,2015-01-16,0.008114,0.000429,0.000182,-0.003963,-0.002462,0.001513,-0.003692,...,0.001797,-0.001302,-0.000641,0.003217,-0.001654,-0.000111,-0.004793,-0.000561,0.000316,0.000139
2,2015-01-19,2015-01-12,2015-01-23,-0.003059,0.007967,-0.002230,-0.000463,0.003240,0.002152,0.004214,...,0.000849,0.004003,0.000059,0.001699,0.001910,0.005708,0.001591,-0.000326,0.003711,-0.001134
3,2015-01-26,2015-01-19,2015-01-30,-0.002663,0.000843,-0.000758,-0.000490,0.003160,-0.001040,0.000342,...,-0.000251,-0.000418,0.000034,0.000024,0.002142,0.001225,0.002289,0.000174,0.000427,-0.000080
4,2015-02-02,2015-01-26,2015-02-06,0.000414,0.000664,-0.002029,0.000091,0.000171,-0.000621,0.001110,...,0.000174,-0.001594,-0.001478,-0.000417,-0.001418,0.000072,0.000103,-0.000600,0.000741,0.000184
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
515,2024-11-25,2024-11-18,2024-11-29,-0.000176,-0.001808,-0.002866,0.001037,-0.001664,0.002825,0.006292,...,-0.000205,0.002947,-0.001645,0.000145,-0.002307,-0.007267,-0.001303,0.000028,0.001115,0.000643
516,2024-12-02,2024-11-25,2024-12-06,-0.004273,0.004368,0.000701,-0.000143,0.006796,0.002034,0.006906,...,-0.002938,0.012105,0.001088,-0.000199,0.003314,0.001457,-0.000793,0.004187,-0.002970,0.000509
517,2024-12-09,2024-12-02,2024-12-13,-0.019095,0.002475,-0.011134,0.000086,0.007256,-0.000954,-0.000912,...,-0.001253,0.005466,-0.004492,-0.000797,0.004034,-0.002539,0.000971,-0.003480,-0.000604,0.000214
518,2024-12-16,2024-12-09,2024-12-20,-0.004090,-0.033668,0.001194,0.006173,0.002675,0.038466,0.010060,...,-0.000778,-0.004262,-0.000147,-0.000655,-0.003397,0.006051,0.005869,0.001029,0.000431,0.001712


In [7]:
# Merge with FF3F weekly data
FF_3F_weekly['week_end'] = pd.to_datetime(FF_3F_weekly['Unnamed: 0'], format='%Y%m%d')
weekly_result_df = weekly_result_df.merge(
    FF_3F_weekly[['week_end', 'Mkt-RF', 'SMB', 'HML', 'RF']],
    on='week_end',
    how='left'
)

weekly_result_df

Unnamed: 0,week_id,previous_week_id,week_end,Capital Structure,Company Performance,Competition & Challenge,ESG,Risk,Strategy and Operation,Technology,...,Strategy and Operation_QA,Trend_QA,Risk_QA,Technology_QA,Technology_Pre,Urgency and Emphasis_QA,Mkt-RF,SMB,HML,RF
0,2015-01-05,2014-12-29,2015-01-09,,,,,,,,...,,,,,,,-0.78,-0.36,-2.36,0.000
1,2015-01-12,2015-01-05,2015-01-16,0.008114,0.000429,0.000182,-0.003963,-0.002462,0.001513,-0.003692,...,-0.001654,-0.000111,-0.004793,-0.000561,0.000316,0.000139,-1.32,0.43,-0.47,0.000
2,2015-01-19,2015-01-12,2015-01-23,-0.003059,0.007967,-0.002230,-0.000463,0.003240,0.002152,0.004214,...,0.001910,0.005708,0.001591,-0.000326,0.003711,-0.001134,1.64,-0.68,-0.28,0.000
3,2015-01-26,2015-01-19,2015-01-30,-0.002663,0.000843,-0.000758,-0.000490,0.003160,-0.001040,0.000342,...,0.002142,0.001225,0.002289,0.000174,0.000427,-0.000080,-2.50,0.60,-0.58,0.000
4,2015-02-02,2015-01-26,2015-02-06,0.000414,0.000664,-0.002029,0.000091,0.000171,-0.000621,0.001110,...,-0.001418,0.000072,0.000103,-0.000600,0.000741,0.000184,3.32,0.21,1.52,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
515,2024-11-25,2024-11-18,2024-11-29,-0.000176,-0.001808,-0.002866,0.001037,-0.001664,0.002825,0.006292,...,-0.002307,-0.007267,-0.001303,0.000028,0.001115,0.000643,0.95,0.57,-0.62,0.099
516,2024-12-02,2024-11-25,2024-12-06,-0.004273,0.004368,0.000701,-0.000143,0.006796,0.002034,0.006906,...,0.003314,0.001457,-0.000793,0.004187,-0.002970,0.000509,1.04,-0.20,-3.03,0.092
517,2024-12-09,2024-12-02,2024-12-13,-0.019095,0.002475,-0.011134,0.000086,0.007256,-0.000954,-0.000912,...,0.004034,-0.002539,0.000971,-0.003480,-0.000604,0.000214,-0.93,-0.80,-0.16,0.092
518,2024-12-16,2024-12-09,2024-12-20,-0.004090,-0.033668,0.001194,0.006173,0.002675,0.038466,0.010060,...,-0.003397,0.006051,0.005869,0.001029,0.000431,0.001712,-2.28,-1.52,-1.42,0.092


In [12]:
# All alpha signal columns
alpha_columns = [
    'Capital Structure_QA',
       'Company Performance_QA', 'Competition & Challenge_QA', 'ESG_QA',
       'Strategy and Operation_QA', 'Trend_QA', 'Risk_QA', 'Technology_QA',
        'Urgency and Emphasis_QA', 'Capital Structure_Pre',
       'Company Performance_Pre', 'Competition & Challenge_Pre', 'ESG_Pre',
       'Risk_Pre', 'Strategy and Operation_Pre', 'Trend_Pre',
       'Urgency and Emphasis_Pre','Technology_Pre', 'Capital Structure', 'Company Performance', 'Competition & Challenge',
       'ESG', 'Risk', 'Strategy and Operation', 'Technology', 'Trend',
       'Urgency and Emphasis'
]

# Fama-French factors
# factor_cols = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']
# factor_cols = ['Mkt-RF', 'SMB', 'HML']
factor_cols = ['Mkt-RF']

# Construct a dictionary to save the result
ir_results = {}

# Run regression for each alpha signal
for alpha_col in alpha_columns:

    y = weekly_result_df[weekly_result_df['week_id'] >= '2023-06-01'][alpha_col]
    X = weekly_result_df[weekly_result_df['week_id'] >= '2023-06-01'][factor_cols] / 100

    X = sm.add_constant(X)
    model = sm.OLS(y, X, missing='drop').fit()

    residuals = model.resid.dropna()

    # Calculate Information Ratio
    alpha = model.params['const']
    std_resid = residuals.std()
    ir = alpha / std_resid
    ir_annualized = ir * np.sqrt(52)

    ir_results[alpha_col] = ir_annualized

# Print IR
for factor, ir in ir_results.items():
    print(f"{factor}: IR = {ir:.4f}")


Capital Structure_QA: IR = 0.1719
Company Performance_QA: IR = -0.4062
Competition & Challenge_QA: IR = 0.3309
ESG_QA: IR = 1.5823
Strategy and Operation_QA: IR = -0.8282
Trend_QA: IR = -0.6533
Risk_QA: IR = 0.3219
Technology_QA: IR = -1.5545
Urgency and Emphasis_QA: IR = 1.9974
Capital Structure_Pre: IR = -0.7374
Company Performance_Pre: IR = -0.1761
Competition & Challenge_Pre: IR = 1.0525
ESG_Pre: IR = 0.4875
Risk_Pre: IR = 1.0645
Strategy and Operation_Pre: IR = 1.2650
Trend_Pre: IR = 0.0385
Urgency and Emphasis_Pre: IR = -1.9087
Technology_Pre: IR = -0.1063
Capital Structure: IR = -1.3279
Company Performance: IR = 0.0068
Competition & Challenge: IR = 0.4465
ESG: IR = 0.4843
Risk: IR = 0.6771
Strategy and Operation: IR = 0.9051
Technology: IR = 0.3414
Trend: IR = 0.5790
Urgency and Emphasis: IR = -0.8253


In [20]:
# Set index to week_id to ensure proper alignment
returns_data = weekly_result_df.set_index('week_id')
returns_data = returns_data[returns_data.index > '2023-01-01']

# Exclude non-strategy columns
exclude_cols = ['previous_week_id', 'week_end', 'next_day_return', 'weekly_rf', 'Mkt-RF', 'SMB', 'HML',
       'RF', 'weekly_rf']
strategy_cols = [col for col in returns_data.columns if col not in exclude_cols]

# Initialize dictionary to store results
strategy_metrics = {}

# Calculate performance metrics for each strategy
for col in strategy_cols:
    excess = returns_data[col]
    ann_return = excess.mean() * 52
    ann_vol = excess.std() * (52 ** 0.5)
    sharpe = ann_return / ann_vol if ann_vol != 0 else float('nan')

    # Calculate maximum drawdown
    cum_return = (1 + excess.fillna(0)).cumprod()
    peak = cum_return.cummax()
    drawdown = (cum_return - peak) / peak
    max_drawdown = drawdown.min()  # This is a negative value

    # Store results
    strategy_metrics[col] = {
        'Expected Annual Return': ann_return,
        'Annualized Volatility': ann_vol,
        'Annualized Sharpe Ratio': sharpe,
        'Maximum Drawdown': max_drawdown
    }

# Convert the result dictionary into a DataFrame and format the output
strategy_metrics_df = pd.DataFrame(strategy_metrics).T
strategy_metrics_df = strategy_metrics_df.sort_values(by='Annualized Sharpe Ratio', ascending=False)
strategy_metrics_df = strategy_metrics_df.style.format({
    'Expected Annual Return': '{:.2%}',
    'Annualized Volatility': '{:.2%}',
    'Annualized Sharpe Ratio': '{:.2f}',
    'Maximum Drawdown': '{:.2%}'
})

strategy_metrics_df

Unnamed: 0,Expected Annual Return,Annualized Volatility,Annualized Sharpe Ratio,Maximum Drawdown
Strategy and Operation_Pre,9.46%,7.03%,1.34,-2.55%
ESG_QA,2.23%,1.82%,1.22,-1.08%
Risk_Pre,8.10%,7.81%,1.04,-3.55%
Strategy and Operation,6.96%,7.10%,0.98,-3.39%
Risk_QA,2.76%,2.88%,0.96,-1.83%
Urgency and Emphasis_QA,1.54%,1.67%,0.92,-2.01%
Capital Structure_QA,3.39%,3.87%,0.87,-3.41%
Technology,3.63%,4.18%,0.87,-4.80%
Competition & Challenge_Pre,5.27%,6.67%,0.79,-4.02%
Technology_Pre,2.69%,4.25%,0.63,-4.65%


# monthly rebalanced portfolio

In [13]:
def compute_monthly_long_short_strategy(
    df,
    signal_cols,
    top_q=0.9,
    bottom_q=0.1,
    date_col='date',
    return_col='monthly_return'
):
    """
    Build a monthly cross-sectional long-short strategy and calculate portfolio returns (based on monthly_return).
    Each row in the returned DataFrame represents: "positions established in the previous month" and "returns realized in the current month."
    """

    df = df.copy()
    df[date_col] = pd.to_datetime(df[date_col])

    # Add month identifier
    df['month_id'] = df[date_col].dt.to_period('M').dt.to_timestamp()

    # Initialize the strategy result table
    unique_months = sorted(df['month_id'].unique())
    strategy_df = pd.DataFrame({'month_id': unique_months})
    strategy_df['previous_month_id'] = pd.to_datetime(strategy_df['month_id']) - pd.DateOffset(months=1)
    strategy_df['month_end'] = strategy_df['month_id'] + pd.offsets.MonthEnd(0)

    for signal in signal_cols:
        tag_col = f'{signal}_position'

        # Assign long/short/neutral tags based on cross-sectional ranking
        df[tag_col] = df.groupby('month_id')[signal].transform(
            lambda x: np.where(
                x >= x.quantile(top_q), 1,
                np.where(x <= x.quantile(bottom_q), -1, 0)
            )
        )

        # Strategy return = position established this month × return realized next month
        df[f'{signal}_strategy_return'] = df[tag_col] * df[return_col]

        # Aggregate to monthly portfolio performance for the given signal
        monthly_returns = df.groupby('month_id')[f'{signal}_strategy_return'].mean()

        # Map returns to the strategy result table: representing portfolio performance for this month
        strategy_df[signal] = strategy_df['previous_month_id'].map(monthly_returns)

    return strategy_df


In [15]:
# Construct monthly rebalanced long-short portfolios
signal_cols = [col for col in df.columns if col not in ['gvkey', 'date', 'weekly_return', 'monthly_return', 'week_id']]

monthly_result_df = compute_monthly_long_short_strategy(df, signal_cols)

# Change data type for FF5F
FF_5F_monthly['month_id'] = pd.to_datetime(FF_5F_monthly['Unnamed: 0'].astype(str), format='%Y%m')
monthly_result_df['month_id'] = pd.to_datetime(monthly_result_df['month_id'])

# Merge the data
monthly_result_df = pd.merge(monthly_result_df, FF_5F_monthly, on='month_id', how='left')
monthly_result_df

Unnamed: 0.1,month_id,previous_month_id,month_end,next_day_return,Capital Structure,Company Performance,Competition & Challenge,ESG,Risk,Strategy and Operation,...,Technology_QA,Technology_Pre,Urgency and Emphasis_QA,Unnamed: 0,Mkt-RF,SMB,HML,RMW,CMA,RF
0,2015-01-01,2014-12-01,2015-01-31,,,,,,,,...,,,,201501,-3.11,-0.92,-3.59,1.61,-1.65,0.00
1,2015-02-01,2015-01-01,2015-02-28,0.001577,-0.000351,0.001374,-0.001007,0.000062,0.001707,-0.000369,...,-0.000172,-0.000128,0.000301,201502,6.13,0.32,-1.86,-1.12,-1.82,0.00
2,2015-03-01,2015-02-01,2015-03-31,0.004553,0.000642,0.005338,0.000832,0.001922,-0.000308,0.001762,...,0.000378,-0.000734,-0.000657,201503,-1.12,3.07,-0.38,0.09,-0.52,0.00
3,2015-04-01,2015-03-01,2015-04-30,0.006334,-0.000501,0.003878,-0.000433,-0.000393,0.003585,-0.002317,...,-0.002211,-0.000591,0.000210,201504,0.59,-3.09,1.82,0.06,-0.61,0.00
4,2015-05-01,2015-04-01,2015-05-31,0.000475,0.001261,0.003008,0.001834,0.001612,0.000769,0.001287,...,-0.000552,0.000835,0.000215,201505,1.36,0.84,-1.15,-1.80,-0.75,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,2024-08-01,2024-07-01,2024-08-31,0.000770,0.001510,0.002650,0.000956,-0.000621,0.001710,0.001693,...,0.000022,0.000339,0.000478,202408,1.61,-3.65,-1.13,0.85,0.86,0.48
116,2024-09-01,2024-08-01,2024-09-30,0.003423,0.001560,0.003773,-0.002940,-0.001143,0.001225,0.005135,...,-0.000405,0.001612,-0.000744,202409,1.74,-1.02,-2.59,0.04,-0.26,0.40
117,2024-10-01,2024-09-01,2024-10-31,0.000637,-0.001539,0.001448,0.009494,0.006875,0.009973,0.005845,...,-0.001339,0.006139,0.000143,202410,-0.97,-0.88,0.89,-1.38,1.03,0.39
118,2024-11-01,2024-10-01,2024-11-30,0.003647,0.001492,0.002425,0.001618,-0.000923,0.001103,-0.001279,...,-0.000438,-0.000919,0.000582,202411,6.51,4.78,-0.05,-2.62,-2.17,0.40


In [16]:
# All alpha signals
alpha_columns = [
    'Capital Structure_QA',
       'Company Performance_QA', 'Competition & Challenge_QA', 'ESG_QA',
       'Strategy and Operation_QA', 'Trend_QA', 'Risk_QA', 'Technology_QA',
        'Urgency and Emphasis_QA', 'Capital Structure_Pre',
       'Company Performance_Pre', 'Competition & Challenge_Pre', 'ESG_Pre',
       'Risk_Pre', 'Strategy and Operation_Pre', 'Trend_Pre',
       'Urgency and Emphasis_Pre','Technology_Pre', 'Capital Structure', 'Company Performance', 'Competition & Challenge',
       'ESG', 'Risk', 'Strategy and Operation', 'Technology', 'Trend',
       'Urgency and Emphasis',
]

# Fama-French factors
factor_cols = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']
# factor_cols = ['Mkt-RF', 'SMB', 'HML']
# factor_cols = ['Mkt-RF']

# Construct dictionary to save result
ir_results = {}

# Run regression for each signal
for alpha_col in alpha_columns:

    y = monthly_result_df[monthly_result_df['month_id'] >= '2023-01-01'][alpha_col]
    X = monthly_result_df[monthly_result_df['month_id'] >= '2023-01-01'][factor_cols] / 100  # Fama-French因子原本单位是百分数

    X = sm.add_constant(X)
    model = sm.OLS(y, X, missing='drop').fit()

    residuals = model.resid.dropna()

    # Calculate information ratio
    alpha = model.params['const']
    std_resid = residuals.std()
    ir = alpha / std_resid
    ir_annualized = ir * np.sqrt(12)

    ir_results[alpha_col] = ir_annualized

# Print IR
for factor, ir in ir_results.items():
    print(f"{factor}: IR = {ir:.4f}")


Capital Structure_QA: IR = 1.6259
Company Performance_QA: IR = -0.1554
Competition & Challenge_QA: IR = -0.1032
ESG_QA: IR = -0.1909
Strategy and Operation_QA: IR = 0.5957
Trend_QA: IR = 0.1874
Risk_QA: IR = 3.2347
Technology_QA: IR = 2.7573
Urgency and Emphasis_QA: IR = 2.2957
Capital Structure_Pre: IR = 0.2844
Company Performance_Pre: IR = 3.9212
Competition & Challenge_Pre: IR = 1.4170
ESG_Pre: IR = 0.4569
Risk_Pre: IR = 2.2884
Strategy and Operation_Pre: IR = 3.8956
Trend_Pre: IR = 0.4738
Urgency and Emphasis_Pre: IR = -1.7388
Technology_Pre: IR = 0.9887
Capital Structure: IR = -0.6148
Company Performance: IR = 4.4509
Competition & Challenge: IR = 1.0047
ESG: IR = 1.3750
Risk: IR = 3.8261
Strategy and Operation: IR = 3.9946
Technology: IR = 1.3156
Trend: IR = 0.2803
Urgency and Emphasis: IR = -0.3387


In [25]:
# Set index to month_id
returns_data = monthly_result_df.set_index('month_id')
returns_data = returns_data[returns_data.index >= '2020-06-01' ]

# Exclude non-strategy columns
exclude_cols = ['previous_month_id', 'month_end', 'monthly_rf', 'next_day_return', 'Unnamed: 0', 'Mkt-RF',
       'SMB', 'HML', 'RMW', 'CMA', 'RF', 'monthly_rf']
strategy_cols = [col for col in returns_data.columns if col not in exclude_cols]

# Initialize a dictionary to store performance metrics
strategy_metrics = {}

# Calculate performance metrics for each strategy
for col in strategy_cols:
    excess = returns_data[col]
    ann_return = excess.mean() * 12
    ann_vol = excess.std() * (12 ** 0.5)
    sharpe = ann_return / ann_vol if ann_vol != 0 else float('nan')

    # Calculate maximum drawdown
    cum_return = (1 + excess.fillna(0)).cumprod()
    peak = cum_return.cummax()
    drawdown = (cum_return - peak) / peak
    max_drawdown = drawdown.min()  # 是负值

    # Store the results
    strategy_metrics[col] = {
        'Expected Annual Return': ann_return,
        'Annualized Volatility': ann_vol,
        'Annualized Sharpe Ratio': sharpe,
        'Maximum Drawdown': max_drawdown
    }

# Convert the result dictionary into a DataFrame and format the output
strategy_metrics_df = pd.DataFrame(strategy_metrics).T
strategy_metrics_df = strategy_metrics_df.sort_values(by='Annualized Sharpe Ratio', ascending=False)
strategy_metrics_df = strategy_metrics_df.style.format({
    'Expected Annual Return': '{:.2%}',
    'Annualized Volatility': '{:.2%}',
    'Annualized Sharpe Ratio': '{:.2f}',
    'Maximum Drawdown': '{:.2%}'
})

strategy_metrics_df

Unnamed: 0,Expected Annual Return,Annualized Volatility,Annualized Sharpe Ratio,Maximum Drawdown
Risk_QA,1.10%,0.93%,1.18,-0.92%
Trend_QA,1.05%,0.98%,1.07,-0.65%
Technology_QA,0.42%,0.42%,1.02,-0.40%
Urgency and Emphasis_QA,0.31%,0.38%,0.82,-0.27%
Trend,0.96%,1.30%,0.74,-1.00%
Capital Structure_QA,0.55%,0.80%,0.69,-1.11%
Competition & Challenge,0.71%,1.27%,0.56,-1.18%
ESG_Pre,0.50%,0.94%,0.53,-1.56%
Competition & Challenge_Pre,0.52%,1.00%,0.52,-1.27%
Strategy and Operation,0.77%,1.66%,0.46,-2.37%
