In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# Load the dataset
data = pd.read_stata("anomalies.dta")

# Generate yyyymm variable
data['yyyymm'] = 100 * data['year'] + data['month']

# Collapse data by global and anomaly
collapsed_data = data.groupby(['global', 'anomaly']).agg(
    ret=('ret', 'mean'),
    sd=('ret', np.std),
    semean=('ret', lambda x: np.std(x) / np.sqrt(len(x))),
    n=('ret', 'count'),
    min_ret=('ret', 'min'),
    yyyymm=('yyyymm', lambda x: x.iloc[0])  # Take the first yyyymm value as representative
).reset_index()

# Calculate t-statistic
collapsed_data['tstat'] = collapsed_data['ret'] / collapsed_data['semean']

# Multiply ret by 12 and sd by sqrt(12)
collapsed_data['ret'] *= 12
collapsed_data['sd'] *= np.sqrt(12)

# Format columns
collapsed_data['ret'] = collapsed_data['ret'].map("{:.2f}".format)
collapsed_data['sd'] = collapsed_data['sd'].map("{:.2f}".format)
collapsed_data['tstat'] = collapsed_data['tstat'].map("{:.2f}".format)

# Keep relevant columns and reorder
collapsed_data = collapsed_data[['anomaly', 'yyyymm', 'ret', 'sd', 'tstat']]

print(collapsed_data)

# ===============================================================================
# Table II and AI: factor returns conditional of their past returns 
# ===============================================================================

# Generate flag column
data['flag'] = np.sign(data['MA'])
data['flag'].replace(-1, 0, inplace=True)

# Individual time series regressions of factor returns on their past 12 month returns
reg_results = []

for group, group_data in data.groupby('global'):
    if group_data['Pan'].nunique() == 1:
        model = sm.OLS(group_data['ret'], sm.add_constant(group_data['MA'])).fit()
    else:
        model = sm.OLS(group_data['ret'], sm.add_constant(group_data['MA'])).fit(cov_type='cluster', cov_kwds={'groups': group_data['time']})
    
    a0 = model.params['const']
    a0t = model.tvalues['const']
    b0 = model.params['MA']
    b0t = model.tvalues['MA']
    
    model = sm.OLS(group_data['ret'], sm.add_constant(group_data['flag'])).fit()
    
    a1 = model.params['const']
    a1t = model.tvalues['const']
    b1 = model.params['flag']
    b1t = model.tvalues['flag']
    
    reg_results.append({
        'global': group,
        'a0': a0, 'a0t': a0t, 'b0': b0, 'b0t': b0t,
        'a1': a1, 'a1t': a1t, 'b1': b1, 'b1t': b1t
    })

# Collapse results
reg_results_df = pd.DataFrame(reg_results)
collapsed_reg_results = reg_results_df.groupby(['global', 'anomaly']).mean().reset_index()

print(collapsed_reg_results)


In [7]:
import pandas as pd
import numpy as np

# Load the dataset
data = pd.read_stata("oos_tsmom_scs.dta")

# Convert tsmom variables to percentage
tsmom_cols = [col for col in data.columns if col.startswith('tsmom')]
data[tsmom_cols] *= 100

# Calculate standard deviations
for var in tsmom_cols:
    data[f'sd_{var}'] = data[var]

# Panel A - Total sample statistics
total_sample_stats = data[tsmom_cols + [f'sd_{col}' for col in tsmom_cols]].describe().transpose()[['mean', 'std', 'count']]
total_sample_stats.columns = ['mean_tsmom', 'mean_sd_tsmom', 'count_N']
total_sample_stats['sharpe'] = total_sample_stats['mean_tsmom'] / total_sample_stats['mean_sd_tsmom']
total_sample_stats['tstat'] = np.sqrt(total_sample_stats['count_N']) * (total_sample_stats['mean_tsmom'] / total_sample_stats['mean_sd_tsmom'])
print(total_sample_stats[['mean_tsmom', 'tstat']])

# Panel A - Total sample statistics
total_sample_stats = data.groupby('period').agg(
    mean_tsmom=('tsmom', 'mean'),
    mean_sd_tsmom=('sd_tsmom', 'mean'),
    count_N=('N', 'first')
)

# Panel A - Statistics in the first half and second half
period_stats = data.groupby(['period', 'subset']).agg(
    mean_tsmom=('tsmom', 'mean'),
    mean_sd_tsmom=('sd_tsmom', 'mean'),
    count_N=('N', 'first')
)

period_stats['sharpe'] = period_stats['mean_tsmom'] / period_stats['mean_sd_tsmom']
period_stats['tstat'] = np.sqrt(period_stats['count_N']) * (period_stats['mean_tsmom'] / period_stats['mean_sd_tsmom'])
print(period_stats[['mean_tsmom', 'tstat']])

# Panel B and C - Spanning tests
factor_data = pd.read_stata("fffactors.dta")
data = pd.merge(data, factor_data, on='yyyymm')

# Panel B: Explaining factor momentum in low-eigenvalue PC factors
for i in range(2, 6):
    formula = f'tsmom{i} ~ mktrf + smb + hml + rmw + cma + tsmom1 + period'
    reg_other = sm.OLS.from_formula(formula, data=data).fit()
    print(reg_other.summary())

# Panel C: Explaining factor momentum in high-eigenvalue PC factors
formula = 'tsmom1 ~ mktrf + smb + hml + rmw + cma + tsmom2 + tsmom3 + tsmom4 + tsmom5 + period'
reg_main = sm.OLS.from_formula(formula, data=data).fit()
print(reg_main.summary())


           mean_tsmom     tstat
tsmom1       0.192298  7.069135
tsmom2       0.126633  5.227156
tsmom3       0.101961  5.019928
tsmom4       0.100712  4.048101
tsmom5       0.072299  2.506116
sd_tsmom1    0.192298  7.069135
sd_tsmom2    0.126633  5.227156
sd_tsmom3    0.101961  5.019928
sd_tsmom4    0.100712  4.048101
sd_tsmom5    0.072299  2.506116


KeyError: 'period'

In [None]:
import pandas as pd
from scipy.stats import pearsonr, norm
from math import atanh, sqrt  # Import the necessary functions
# Load the dataset and clean it
data = pd.read_stata("anomalies.dta")
data = data[~data['anomaly'].isin(["umd", "glumd"])]  # Drop rows where anomaly is "umd" or "glumd"

# Generate moving average of returns over a 12-month window
data['MA'] = data.groupby('global')['ret'].transform(lambda x: x.rolling(window=12).mean())

# Drop rows where moving average contains NaN values
data = data.dropna(subset=['MA'])

# Merge with FactorUMD.dta
factor_data = pd.read_stata("FactorUMD.dta")
merged_data = pd.merge(data, factor_data, on=['year', 'month'])

# Group by 'Pan' (or 'global' in Python) and calculate correlations for different conditions
groups = merged_data.groupby('global')

correlations = []
for name, group in groups:
    corr_all, _ = pearsonr(group['ret'], group['umd'])  # Adjust column names from 'AnomalyRet' to 'ret'
    corr_up, _ = pearsonr(group[group['MA'] > 0]['ret'], group[group['MA'] > 0]['umd'])
    corr_down, _ = pearsonr(group[group['MA'] < 0]['ret'], group[group['MA'] < 0]['umd'])
    corr_cond, _ = pearsonr(group['MA'] * group['ret'], group['umd'])  # Calculate conditional correlation
    
    # Calculate Fisher's Z-test for conditional vs. unconditional correlations
    mu_Z = (atanh(corr_cond) - atanh(corr_all))
    sigma_Z = sqrt(1/(len(group) - 3) + 1/(len(group) - 3))
    Z = mu_Z / sigma_Z
    pvalue = 2 * norm.cdf(-abs(Z))
    
    # Calculate unbalanced Fisher's Z-test for positive vs. negative moving averages
    mu_Z_un = (atanh(corr_up) - atanh(corr_down))
    sigma_Z_un = sqrt(1/(len(group[group['MA'] > 0]) - 3) + 1/(len(group[group['MA'] < 0]) - 3))
    Z_un = mu_Z_un / sigma_Z_un
    pvalue_un = 2 * norm.cdf(-abs(Z_un))
    
    correlations.append({
        'global': name,
        'corr_all': corr_all,
        'corr_up': corr_up,
        'corr_down': corr_down,
        'corr_cond': corr_cond,
        'Z': Z,
        'pvalue': pvalue,
        'Z_un': Z_un,
        'pvalue_un': pvalue_un
    })

correlations_df = pd.DataFrame(correlations)
print(correlations_df)
