In [None]:
#Poging table3
import pandas as pd
import numpy as np
import statsmodels.api as sm

# Panel A - total sample statistics
data = pd.read_stata("oos_tsmom_scs.dta")

for var in data.filter(like='tsmom').columns:
    data[var] *= 100
    data[f"sd_{var}"] = data[var]

aggregated_data = data.filter(like='tsmom').agg(['mean', 'std', 'count'])
N = data['tsmom1'].count()
aggregated_data['N'] = N
reshape_data = pd.melt(data, id_vars=data.columns[data.columns.str.startswith('tsmom')].tolist(), 
                       value_vars=data.filter(like='tsmom').columns, var_name='subset', value_name='tsmom')
reshape_data['sd_tsmom'] = data.filter(like='sd_tsmom').values.flatten()
reshape_data['sharpe'] = reshape_data['tsmom'] / reshape_data['sd_tsmom']
reshape_data['tstat'] = np.sqrt(N) * reshape_data['tsmom'] / reshape_data['sd_tsmom']
print(reshape_data[['subset', 'tsmom', 'tstat']])

# Panel A - statistics in the first half and second half
data['period'] = 1
data.loc[data['yyyymm'] >= data['yyyymm'].quantile(0.5), 'period'] = 2
for var in data.filter(like='tsmom').columns:
    data[var] *= 100
    data[f"sd_{var}"] = data[var]

# Group by period and calculate N_period
aggregated_data_period = data.groupby('period').agg({'tsmom1': ['mean', 'std', 'count']})
aggregated_data_period.columns = [f"{agg}_{stat}" for stat, agg in aggregated_data_period.columns]
N_period = data.groupby('period')['tsmom1'].count()
aggregated_data_period['N'] = N_period.values

# Reshape data for Panel A
reshape_data_period = pd.melt(data, id_vars=['period'] + data.columns[data.columns.str.startswith('tsmom')].tolist(), 
                              value_vars=data.filter(like='tsmom').columns, var_name='subset', value_name='tsmom')
reshape_data_period['sd_tsmom'] = data.filter(like='sd_tsmom').values.flatten()
reshape_data_period['sharpe'] = reshape_data_period['tsmom'] / reshape_data_period['sd_tsmom']

# Calculate tstat using N_period
repeated_N_period = np.repeat(N_period.values, len(data) // len(N_period))

# Now calculate tstat
repeated_N_period_tiled = np.tile(repeated_N_period, len(reshape_data_period) // len(repeated_N_period))
reshape_data_period['tstat'] = np.sqrt(repeated_N_period_tiled) * reshape_data_period['tsmom'] / reshape_data_period['sd_tsmom']

print(reshape_data_period[['subset', 'period', 'tsmom', 'tstat']])

# Panel B and C - spanning tests
data = pd.read_stata("oos_tsmom_scs.dta")
fffactors = pd.read_stata("fffactors.dta")

# Merge on 'yyyymm' column
data = pd.merge(data, fffactors, on='yyyymm', how='inner', suffixes=('', '_fffactors'))

# Define period based on yyyymm
data['period'] = 1
data.loc[data['yyyymm'] >= data['yyyymm'].quantile(0.5), 'period'] = 2
data['x'] = 1

# Clear estimates
estimates = []

# Loop for estimating regression models
for i in range(2, 6):
    model_main = sm.OLS(data['tsmom1'], sm.add_constant(data[['mktrf', 'smb', 'hml', 'rmw', 'cma', f'tsmom{i}', 'period', 'x']])).fit()
    estimates.append(model_main)
    model_other = sm.OLS(data[f'tsmom{i}'], sm.add_constant(data[['mktrf', 'smb', 'hml', 'rmw', 'cma', 'tsmom1', 'period', 'x']])).fit()
    estimates.append(model_other)

model_main6 = sm.OLS(data['tsmom1'], sm.add_constant(data[['mktrf', 'smb', 'hml', 'rmw', 'cma'] + [f'tsmom{i}' for i in range(2, 6)] + ['period', 'x']])).fit()
estimates.append(model_main6)

# Panel B: Explaining factor momentum in low-eigenvalue PC factors
print(estimates[1].summary())

# Panel C: Explaining factor momentum in high-eigenvalue PC factors
print(estimates[-1].summary())



In [None]:
# Poging table3
import pandas as pd
import numpy as np
import statsmodels.api as sm

# Panel A - total sample statistics
data = pd.read_stata("oos_tsmom_scs.dta")

for var in data.filter(like='tsmom').columns:
    data[var] *= 100
    data[f"sd_{var}"] = data[var]

aggregated_data = data.filter(like='tsmom').agg(['mean', 'std', 'count'])
N = data['tsmom1'].count()
aggregated_data['N'] = N
reshape_data = pd.melt(data, id_vars=data.columns[data.columns.str.startswith('tsmom')].tolist(), 
                       value_vars=data.filter(like='tsmom').columns, var_name='subset', value_name='tsmom')
reshape_data['sd_tsmom'] = data.filter(like='sd_tsmom').values.flatten()
reshape_data['sharpe'] = reshape_data['tsmom'] / reshape_data['sd_tsmom']
reshape_data['tstat'] = np.sqrt(N) * reshape_data['tsmom'] / reshape_data['sd_tsmom']
print(reshape_data[['subset', 'tsmom', 'tstat']])

# Panel A - statistics in the first half and second half
data['period'] = 1
data.loc[data['yyyymm'] >= data['yyyymm'].quantile(0.5), 'period'] = 2
for var in data.filter(like='tsmom').columns:
    data[var] *= 100
    data[f"sd_{var}"] = data[var]

# Group by period and calculate N_period
aggregated_data_period = data.groupby('period').agg({'tsmom1': ['mean', 'std', 'count']})
aggregated_data_period.columns = [f"{agg}_{stat}" for stat, agg in aggregated_data_period.columns]
N_period = data.groupby('period')['tsmom1'].count()
aggregated_data_period['N'] = N_period.values

# Calculate tstat using N_period
reshape_data_period = pd.melt(data, id_vars=['period'] + data.columns[data.columns.str.startswith('tsmom')].tolist(), 
                              value_vars=data.filter(like='tsmom').columns, var_name='subset', value_name='tsmom')
reshape_data_period['sd_tsmom'] = data.filter(like='sd_tsmom').values.flatten()
reshape_data_period['sharpe'] = reshape_data_period['tsmom'] / reshape_data_period['sd_tsmom']

# Calculate tstat using N_period
repeated_N_period = np.repeat(N_period.values, len(data) // len(N_period))

# Now calculate tstat
repeated_N_period_tiled = np.tile(repeated_N_period, len(reshape_data_period) // len(repeated_N_period))
reshape_data_period['tstat'] = np.sqrt(repeated_N_period_tiled) * reshape_data_period['tsmom'] / reshape_data_period['sd_tsmom']

print(reshape_data_period[['subset', 'period', 'tsmom', 'tstat']])

# Panel B and C - spanning tests
data = pd.read_stata("oos_tsmom_scs.dta")
fffactors = pd.read_stata("fffactors.dta")

# Merge on 'yyyymm' column
data = pd.merge(data, fffactors, on='yyyymm', how='inner', suffixes=('', '_fffactors'))

# Define period based on yyyymm
data['period'] = 1
data.loc[data['yyyymm'] >= data['yyyymm'].quantile(0.5), 'period'] = 2
data['x'] = 1

# Clear estimates
estimates = []

# Loop for estimating regression models
for i in range(2, 6):
    model_main = sm.OLS(data['tsmom1'], sm.add_constant(data[['mktrf', 'smb', 'hml', 'rmw', 'cma', f'tsmom{i}', 'period', 'x']])).fit()
    estimates.append(model_main)
    model_other = sm.OLS(data[f'tsmom{i}'], sm.add_constant(data[['mktrf', 'smb', 'hml', 'rmw', 'cma', 'tsmom1', 'period', 'x']])).fit()
    estimates.append(model_other)

model_main6 = sm.OLS(data['tsmom1'], sm.add_constant(data[['mktrf', 'smb', 'hml', 'rmw', 'cma'] + [f'tsmom{i}' for i in range(2, 6)] + ['period', 'x']])).fit()
estimates.append(model_main6)

# Panel B: Explaining factor momentum in low-eigenvalue PC factors
print(estimates[1].summary())

# Panel C: Explaining factor momentum in high-eigenvalue PC factors
print(estimates[-1].summary())


In [None]:
import pandas as pd
import numpy as np

# Load data
data = pd.read_stata("oos_tsmom_scs.dta")

# Panel A - total sample statistics
data = pd.read_stata("oos_tsmom_scs.dta")

for var in data.filter(like='tsmom').columns:
    data[var] *= 100
    data[f"sd_{var}"] = data[var]

aggregated_data = data.filter(like='tsmom').agg(['mean', 'std', 'count'])
N = data['tsmom1'].count()
aggregated_data['N'] = N
reshape_data = pd.melt(data, id_vars=data.columns[data.columns.str.startswith('tsmom')].tolist(), 
                       value_vars=data.filter(like='tsmom').columns, var_name='subset', value_name='tsmom')
reshape_data['sd_tsmom'] = data.filter(like='sd_tsmom').values.flatten()
reshape_data['sharpe'] = reshape_data['tsmom'] / reshape_data['sd_tsmom']
reshape_data['tstat'] = np.sqrt(N) * reshape_data['tsmom'] / reshape_data['sd_tsmom']
print(reshape_data[['subset', 'tsmom', 'tstat']])

# Display Panel A - Total sample statistics
print("Panel A - Total sample statistics:")
print(reshape_data[['subset', 'tsmom', 'tstat']])
# Panel A - statistics in the first half and second half
data['period'] = 1
data.loc[data['yyyymm'] >= data['yyyymm'].quantile(0.5), 'period'] = 2
for var in data.filter(like='tsmom').columns:
    data[var] *= 100
    data[f"sd_{var}"] = data[var]

# Group by period and calculate N_period
aggregated_data_period = data.groupby('period').agg({'tsmom1': ['mean', 'std', 'count']})
aggregated_data_period.columns = [f"{agg}_{stat}" for stat, agg in aggregated_data_period.columns]
N_period = data.groupby('period')['tsmom1'].count()
aggregated_data_period['N'] = N_period.values

# Reshape data
reshape_data_period = pd.melt(data, id_vars=['period'] + data.columns[data.columns.str.startswith('tsmom')].tolist(), 
                              value_vars=data.filter(like='tsmom').columns, var_name='subset', value_name='tsmom')
reshape_data_period['sd_tsmom'] = data.filter(like='sd_tsmom').values.flatten()
reshape_data_period['sharpe'] = reshape_data_period['tsmom'] / reshape_data_period['sd_tsmom']

# Repeat each element of N_period.values to match the length of tsmom and sd_tsmom columns
repeated_N_period = np.repeat(N_period.values, len(reshape_data_period) // len(N_period))

# Now calculate tstat
reshape_data_period['tstat'] = np.sqrt(repeated_N_period) * reshape_data_period['tsmom'] / reshape_data_period['sd_tsmom']

# Display Panel A - Statistics in the first half and second half
print("\nPanel A - Statistics in the first half and second half:")
print(reshape_data_period[['subset', 'period', 'tsmom', 'tstat']])


In [37]:
#Poging 3, panel A
#Poging 3, panel A
import pandas as pd
import numpy as np

# Read the dataset
data = pd.read_stata("oos_tsmom_scs.dta")

# Multiply 'tsmom*' variables by 100 and generate corresponding 'sd_tsmom*' variables
for var in data.filter(like='tsmom').columns:
    data[var] *= 100
    data[f"sd_{var}"] = data[var]

# Define a function to calculate tstat
def calculate_tstat(x):
    N = x.count()
    if N > 0:
        tstat = np.sqrt(N) * x / x.std()
        return tstat
    else:
        return np.nan

# Group by 'subset' and apply the function
reshape_data = data.filter(like='tsmom').apply(calculate_tstat, axis=0)

# Reshape the data to long format
reshape_data = pd.melt(reshape_data, value_vars=data.filter(like='tsmom').columns, 
                       var_name='subset', value_name='tstat')

# Display the results
print(reshape_data)

# Get unique subset names
unique_subsets = reshape_data['subset'].unique()

print("Unique subset names:", unique_subsets)
# Calculate the averages of each unique subset
subset_averages = reshape_data.groupby('subset')['tstat'].mean().reset_index()

# Display the subset averages
print("Subset Averages:")
print(subset_averages)


         subset      tstat
0        tsmom1  -4.930779
1        tsmom1  11.293370
2        tsmom1   5.437384
3        tsmom1 -13.586415
4        tsmom1  19.774437
...         ...        ...
5575  sd_tsmom5  -9.033099
5576  sd_tsmom5 -11.367080
5577  sd_tsmom5  -5.192739
5578  sd_tsmom5  -1.144776
5579  sd_tsmom5 -36.953552

[5580 rows x 2 columns]
Unique subset names: ['tsmom1' 'tsmom2' 'tsmom3' 'tsmom4' 'tsmom5' 'sd_tsmom1' 'sd_tsmom2'
 'sd_tsmom3' 'sd_tsmom4' 'sd_tsmom5']
Subset Averages:
      subset     tstat
0  sd_tsmom1  7.069136
1  sd_tsmom2  5.227156
2  sd_tsmom3  5.019928
3  sd_tsmom4  4.048101
4  sd_tsmom5  2.506116
5     tsmom1  7.069136
6     tsmom2  5.227156
7     tsmom3  5.019928
8     tsmom4  4.048101
9     tsmom5  2.506116


In [39]:
import pandas as pd

# Read the dataset
data = pd.read_stata("oos_tsmom_scs.dta")

# Multiply 'tsmom*' variables by 100 and generate corresponding 'sd_tsmom*' variables
for var in data.filter(like='tsmom').columns:
    data[var] *= 100
    data[f"sd_{var}"] = data[var]

# Reshape the data to long format
reshape_data = pd.melt(data, id_vars=data.columns[data.columns.str.startswith('tsmom')].tolist(), 
                       value_vars=data.filter(like='tsmom').columns, var_name='subset', value_name='tsmom')

# Calculate the average of 'tsmom' per 'subset'
subset_averages = reshape_data.groupby('subset')['tsmom'].mean().reset_index()

# Display the subset averages
print("Subset Averages:")
print(subset_averages)


Subset Averages:
      subset     tsmom
0  sd_tsmom1  0.192298
1  sd_tsmom2  0.126633
2  sd_tsmom3  0.101961
3  sd_tsmom4  0.100712
4  sd_tsmom5  0.072299
