In [1]:
import pandas as pd
import numpy as np
import scipy
import matplotlib.pyplot as plt
import seaborn as sns 
%matplotlib inline 
import datetime
import calendar

### 1997 - 2007, 2007 - end of 2008, 2009 - 2017 (ALL S&P 500)
#### (with Ordinary Least Square)

### 1997 - 2007: 

In [20]:
before_crisis = pd.read_csv('/Users/Lee/Desktop/research-1/^GSPC_1997-2007.csv')

In [21]:
def clean_data(df):
    df.Date = pd.to_datetime(df.Date)

    df['day_of_week'] = df['Date'].dt.weekday_name

    df = df.drop(['High','Low','Close','Volume'],axis=1)

    df['Consecutive trading day'] = df['day_of_week'].shift(1) + ' to ' + df['day_of_week']

    df['Overnight return'] = (df['Open'] - df['Adj Close'].shift(1)/df['Adj Close'].shift(1))*100
    
    df['Intraday return'] = (df['Adj Close'] - df['Open'])/df['Open']*100
    
    df = df.dropna()

    return df

In [22]:
before_crisis = clean_data(before_crisis)

In [23]:
import statsmodels.api as sm

In [24]:
def do_OLS(df):
    df = df.join(pd.get_dummies(df['day_of_week']))
    df['Difference in return'] = df['Intraday return'] - df['Intraday return'].shift(1)
    df = df.dropna()
    days = df[['Monday','Tuesday','Wednesday','Thursday','Friday']]
    days = sm.add_constant(days)

    OLS = sm.OLS(df['Difference in return'],days)
    
    result = OLS.fit()
    
    return result.params
    
    
    

In [25]:
before_crisis_summary = do_OLS(before_crisis)

In [26]:
before_crisis_summary

const       -0.000350
Monday       0.027897
Tuesday      0.009051
Wednesday   -0.013033
Thursday    -0.013020
Friday      -0.011245
dtype: float64

### Before 2007, Monday has the highest return (0.0279), Wednesday has the lowest return (-0.01303), agreed with the paper

### 2007 - end of 2008

In [17]:
during_crisis = pd.read_csv('/Users/Lee/Desktop/research-1/^GSPC_2007-2008.csv')

In [18]:
during_crisis = clean_data(during_crisis)

In [27]:
during_crisis_summary = do_OLS(during_crisis)

In [28]:
during_crisis_summary

const        0.000499
Monday      -0.277651
Tuesday      0.410449
Wednesday   -0.370111
Thursday     0.060516
Friday       0.177296
dtype: float64

### From 2007 to end of 2008, Tuesday has the highest return (0.41), Wednesday has the lowest return (-0.37) - different from other time period

### 2009 - 2017 

In [29]:
after_till_now = pd.read_csv('/Users/Lee/Desktop/research-1/^GSPC_2009-now.csv')

In [30]:
after_till_now = clean_data(after_till_now)
after_till_now_summary = do_OLS(after_till_now)

In [31]:
after_till_now_summary

const       -0.000121
Monday       0.007463
Tuesday      0.059266
Wednesday    0.003941
Thursday    -0.004227
Friday      -0.066564
dtype: float64

### After 2009, Tuesday has the highest return (0.593), Friday has the lowest return (-0.067)