In [71]:
##########################################
# January effect in US samll companies   #
# An stock return seasonality pattern    #
# NYSE/AMEX/NASDAQ stocks                #
# Sample period 1933-1977                #
# December 2024                          #                        
# Lexuan Chen                            #
##########################################
import pandas as pd
import numpy as np
from pandas.tseries.offsets import MonthEnd 
from dateutil.relativedelta import *
from pandas.tseries.offsets import *
from scipy import stats  

In [1]:
import pandas as pd  
from sas7bdat import SAS7BDAT  
file_path = 'msf.sas7bdat'
df = pd.read_sas('msf.sas7bdat') 

In [73]:
# Derive Sample 'NYSE/AMEX/NASDAQ stocks in the sample period 1933-1977'
columns_to_drop = ['CUSIP', 'PERMCO','ISSUNO','HSICCD','BIDLO','ASKHI','VOL','BID',\
                   'ASK','CFACPR','CFACSHR','SPREAD','ALTPRC','ALTPRCDT']
df.drop(columns=columns_to_drop, axis=1, inplace=True)

df['DATE'] = pd.to_datetime(df['DATE'])  
start_date = '1933-01-31'  
end_date = '1977-12-31'  
df = df[(df['DATE'] >= start_date) & (df['DATE'] <= end_date)] 

df = df[df['HEXCD']== 1]

df = df[(df['PRC'].notna()) & (df['SHROUT'].notna()) & (df['RET'].notna())]

df[['PERMNO','SHROUT','HEXCD']] = df[['PERMNO','SHROUT','HEXCD']].astype(int)
df = df.sort_values(['PERMNO','DATE']).reset_index()

In [74]:
# Derive market cap
df['Mkt_cap'] = df['SHROUT'] * df['PRC'].abs()

#Monthly observations
df['Year'] = df['DATE'].dt.year
df['Month'] = df['DATE'].dt.month
df['Date'] = df['DATE'].dt.to_period('M')  

#Derive more accurate return
df['Return']= df['RETX']
df['Return'] = np.where(
    df['Return'].isna(),
    df['RET'],
    df['RETX']
)

df.drop(columns=['HEXCD','RET','RETX','DATE'],axis=1,inplace=True)  

In [75]:
# Calculate lag market cap
df['cap_lag'] = df.groupby(['PERMNO'])['Mkt_cap'].shift(1)

# if first month of a permno, cap_lag is mkt_Cap/(1+ret)
df['cap_lag'] = np.where(df['cap_lag'].isna(), df['Mkt_cap']/(1+df['Return']),df['cap_lag'])

In [76]:
# Calculate Break Point for lag market cap
Port_bp = df.groupby(['Date'])['cap_lag']\
    .describe(percentiles=[0, .2, .4, .6, .8, 1]).reset_index()

Port_bp = Port_bp.rename(columns={'0%':'pct0', '20%':'pct20','40%':'pct40','60%':'pct60',\
                                  '80%':'pct80', '100%':'pct100'})

Port_bp = Port_bp.drop(['count','mean','std','min','max'], axis=1)

In [77]:
Month_decile = pd.merge(df,Port_bp,how='left', on='Date')

In [78]:
# Function to assign size group
def Mktcap_grp(row):
    if (row['pct0']<=row['cap_lag']) and (row['cap_lag']<row['pct20']):
        group='1'
    elif (row['pct20']<=row['cap_lag']) and (row['cap_lag']<row['pct40']):
        group='2'
    elif (row['pct40']<=row['cap_lag']) and (row['cap_lag']<row['pct60']):
        group='3'
    elif (row['pct60']<=row['cap_lag']) and (row['cap_lag']<row['pct80']):
        group='4'
    elif (row['pct80']<=row['cap_lag']) and (row['cap_lag']<row['pct100']):
        group='5'
    else:
        group=np.nan
    return group

# Assign stocks to portfolios at each month 
Month_decile['Group'] = Month_decile.apply(Mktcap_grp, axis=1)
Month_decile = Month_decile[(Month_decile['Group'].notna())]
Month_decile = Month_decile[['Date','Year','Month','Return','Group']]\
    .sort_values(['Group', 'Year', 'Month']).reset_index() 

# Derive portfolio monthly return
Month_decile['Port_ret'] = Month_decile.groupby(['Year','Month','Group'])['Return']\
    .transform('mean')

In [79]:
# Retain necessary data and remove duplicate values
Port_month = Month_decile[['Group','Date','Port_ret']]
Port_month = Port_month.drop_duplicates().reset_index(drop=True)
Port_month['Portfolio'] = 'Portfolio '+ Port_month['Group']

In [80]:
# Summary table for monthly portfolio return
Portfolio_result = Port_month.pivot(index='Date', columns='Portfolio', values='Port_ret')  
Portfolio_result = Portfolio_result.round(6) 
Portfolio_result.to_csv('Portfolio_Results.csv', index=True) 

In [81]:
### January Effect ###

Port_January = Month_decile[Month_decile['Month'] == 1][['Group', 'Year', 'Month', 'Port_ret']]\
    .drop_duplicates().reset_index(drop=True)
Port_Other = Month_decile[Month_decile['Month'] != 1][['Group', 'Year', 'Month', 'Port_ret']]\
    .drop_duplicates().reset_index(drop=True)

# Derive portfolios' average return in January and other months yearly
Port_January['Avg_return_jan'] = Port_January.groupby(['Group', 'Year'])['Port_ret']\
    .transform('mean')
Port_Other['Avg_return_other'] = Port_Other.groupby(['Group', 'Year'])['Port_ret']\
    .transform('mean')

Port_January = Port_January.drop(columns=['Port_ret','Month']).drop_duplicates().reset_index(drop=True)
Port_Other = Port_Other.drop(columns=['Port_ret','Month']).drop_duplicates().reset_index(drop=True)

Port_year = pd.merge(Port_January,Port_Other,how = 'left', on=['Group','Year'])

In [82]:
# Derive return spread between January and other months
Port_year['ret_spread'] = Port_year['Avg_return_jan'] - Port_year['Avg_return_other']
Port_year['Portfolio'] = 'Portfolio '+ Port_year['Group']

In [83]:
# PivotTable for portfolio's return spread 
January_effect = Port_year.pivot(index='Year', columns='Portfolio', values='ret_spread')  
January_effect['1-5'] = January_effect['Portfolio 1']-January_effect['Portfolio 5']

In [84]:
# Summary table for January effect in monthly portfolios
averages = January_effect.mean()  
t_statistics = January_effect.apply(lambda x:stats.ttest_1samp(x, 0)[0])
Summary_sta = pd.DataFrame([averages, t_statistics],index=['Average', 'T-Statistic'])
January_effect = pd.concat([January_effect,Summary_sta])
January_effect = January_effect.round(6)
January_effect.to_csv('January_effect.csv',index=True)
January_effect

Portfolio    Portfolio 1  Portfolio 2  Portfolio 3  Portfolio 4  Portfolio 5  \
Average         0.100814     0.053330     0.037292     0.021104     0.009277   
T-Statistic     4.928581     3.754034     3.231726     2.133611     1.193254   

Portfolio         1-5  
Average      0.091537  
T-Statistic  6.195378  


Portfolio,Portfolio 1,Portfolio 2,Portfolio 3,Portfolio 4,Portfolio 5,1-5
1933,-0.128671,-0.066976,-0.024397,-0.072993,-0.035387,-0.093284
1934,0.575714,0.386316,0.29083,0.209847,0.145691,0.430023
1935,-0.053887,-0.100353,-0.090916,-0.076133,-0.074616,0.020729
1936,0.326322,0.120935,0.068332,0.05094,0.054557,0.271765
1937,0.219197,0.141799,0.130723,0.103447,0.066977,0.15222
1938,0.057674,0.013043,0.001492,-0.021749,-0.031648,0.089322
1939,-0.100803,-0.103812,-0.09819,-0.087747,-0.071184,-0.029619
1940,0.00208,-0.029571,-0.013371,-0.032257,-0.01394,0.01602
1941,0.096324,-0.00227,-0.02749,-0.033295,-0.024902,0.121226
1942,0.383773,0.114484,0.05814,0.02978,0.018599,0.365173


## Analysis Conclusion
In our analysis, we performed a one-tailed test (>) with 44 degrees of freedom at a significance level of 0.05, yielding a critical t-value of approximately 1.676. The observed t-value for column '1-5' is 6.195378, indicating a statistically significant relationship between the January effect and market capitalization. Additionally, we found that Portfolios 1, 2, 3, and 4 can reject the null hypothesis at the 0.05 significance level, confirming the presence of the January effect within these portfolios. This further supports our hypothesis that the January effect exists among small firms but not among large firms.