In [1]:
import numpy as np
from scipy.stats import f_oneway, kruskal
import yfinance as yf

## Getting Data

In [2]:
hist = yf.download('AAPL')
hist.head(5)

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1980-12-12,0.128348,0.128906,0.128348,0.128348,0.099449,469033600
1980-12-15,0.12221,0.12221,0.121652,0.121652,0.094261,175884800
1980-12-16,0.113281,0.113281,0.112723,0.112723,0.087343,105728000
1980-12-17,0.115513,0.116071,0.115513,0.115513,0.089504,86441600
1980-12-18,0.118862,0.11942,0.118862,0.118862,0.092099,73449600


## Calculating Returns

In [3]:
hist["Returns"] = hist["Adj Close"].pct_change()
hist = hist.dropna()

In [4]:
hist.head(5)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1980-12-15,0.12221,0.12221,0.121652,0.121652,0.094261,175884800,-0.05217
1980-12-16,0.113281,0.113281,0.112723,0.112723,0.087343,105728000,-0.073398
1980-12-17,0.115513,0.116071,0.115513,0.115513,0.089504,86441600,0.024751
1980-12-18,0.118862,0.11942,0.118862,0.118862,0.092099,73449600,0.028993
1980-12-19,0.126116,0.126674,0.126116,0.126116,0.09772,48630400,0.061028


## Determining a Weekday

In [5]:
from datetime import datetime

In [6]:
hist["Weekday"] = hist.index.day_name()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hist["Weekday"] = hist.index.day_name()


In [8]:
hist.head(5)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns,Weekday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1980-12-15,0.12221,0.12221,0.121652,0.121652,0.094261,175884800,-0.05217,Monday
1980-12-16,0.113281,0.113281,0.112723,0.112723,0.087343,105728000,-0.073398,Tuesday
1980-12-17,0.115513,0.116071,0.115513,0.115513,0.089504,86441600,0.024751,Wednesday
1980-12-18,0.118862,0.11942,0.118862,0.118862,0.092099,73449600,0.028993,Thursday
1980-12-19,0.126116,0.126674,0.126116,0.126116,0.09772,48630400,0.061028,Friday


# 1) test a hypothesis that daily return and volume are the same by a week day


### Applying ANOVA

In [9]:
grouped_data = [hist['Returns'][hist['Weekday'] == weekday] for weekday in hist['Weekday'].unique()]

f_statistic, p_value = f_oneway(*grouped_data)

if p_value < 0.05:
    print("The daily returns are not the same across weekdays (reject null hypothesis).")
else:
    print("The daily returns are the same across weekdays (fail to reject null hypothesis).")


The daily returns are not the same across weekdays (reject null hypothesis).


### Applying Kruskal-Wallis

In [10]:
grouped_data = [hist['Returns'][hist['Weekday'] == weekday] for weekday in hist['Weekday'].unique()]


h_statistic, p_value = kruskal(*grouped_data)


if p_value < 0.05:
    print("The daily returns are not the same across weekdays (reject null hypothesis).")
else:
    print("The daily returns are the same across weekdays (fail to reject null hypothesis).")


The daily returns are not the same across weekdays (reject null hypothesis).


# 2) test a hypothesis that overnight and over-the-weekend returns are the same statistically


In [18]:
import pandas as pd
from scipy.stats import ttest_ind

In [19]:
hist['Overnight_Returns'] = hist['Open'] - hist['Close'].shift(1)

weekend_data = hist[hist['Weekday'].isin(['Friday', 'Monday'])]
weekend_returns = weekend_data['Open'] - weekend_data['Close'].shift(1)
hist.loc[weekend_returns.index, 'Weekend_Returns'] = weekend_returns

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hist['Overnight_Returns'] = hist['Open'] - hist['Close'].shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hist.loc[weekend_returns.index, 'Weekend_Returns'] = weekend_returns


In [20]:
overnight_returns = hist['Overnight_Returns'].dropna()
weekend_returns = hist['Weekend_Returns'].dropna()

t_statistic, p_value = ttest_ind(overnight_returns, weekend_returns)

if p_value < 0.05:
    print("The overnight and over-the-weekend returns are not the same (reject null hypothesis).")
else:
    print("The overnight and over-the-weekend returns are the same (fail to reject null hypothesis).")


The overnight and over-the-weekend returns are the same (fail to reject null hypothesis).


# 3) test a hypothesis that return and volume on triple-witching days (the third Friday of March, June, September, and December) are different from the average return and volume

In [23]:
from scipy.stats import ttest_1samp, mannwhitneyu

In [24]:
triple_witching_data = hist[hist['Weekday'] == 'Friday']

average_return = hist['Returns'].mean()
average_volume = hist['Volume'].mean()

## Student's t-test

In [25]:
t_statistic_return, p_value_return = ttest_1samp(triple_witching_data['Returns'], average_return)
t_statistic_volume, p_value_volume = ttest_1samp(triple_witching_data['Volume'], average_volume)

In [26]:
if p_value_return < 0.05:
    print("The returns on triple-witching days are different from the average.")
else:
    print("The returns on triple-witching days are not significantly different from the average.")

if p_value_volume < 0.05:
    print("The volume on triple-witching days is different from the average.")
else:
    print("The volume on triple-witching days is not significantly different from the average.")


The returns on triple-witching days are different from the average.
The volume on triple-witching days is not significantly different from the average.


## Mann-Whitney U test

In [27]:
U_statistic_return, p_value_return = mannwhitneyu(triple_witching_data['Returns'], hist['Returns'])
U_statistic_volume, p_value_volume = mannwhitneyu(triple_witching_data['Volume'], hist['Volume'])

if p_value_return < 0.05:
    print("The returns on triple-witching days are different from the average.")
else:
    print("The returns on triple-witching days are not significantly different from the average.")

if p_value_volume < 0.05:
    print("The volume on triple-witching days is different from the average.")
else:
    print("The volume on triple-witching days is not significantly different from the average.")


The returns on triple-witching days are different from the average.
The volume on triple-witching days is not significantly different from the average.


# 4) test a hypothesis that a standard deviation of over-the-weekend and over-the-long-weekend returns are not different from a standard deviation of a weekday return

In [28]:
from scipy.stats import levene

In [29]:
weekday_returns = hist[hist['Weekday'].isin(['Monday', 'Tuesday', 'Wednesday', 'Thursday'])]['Returns']
weekend_returns = hist[hist['Weekday'] == 'Friday']['Returns']
long_weekend_returns = hist[hist['Weekday'] == 'Saturday']['Returns']

## Levene's test to compare the variances

In [30]:
statistic, p_value = levene(weekday_returns, weekend_returns, long_weekend_returns)

if p_value < 0.05:
    print("The standard deviations of returns are different (reject null hypothesis).")
else:
    print("The standard deviations of returns are not significantly different (fail to reject null hypothesis).")


The standard deviations of returns are not significantly different (fail to reject null hypothesis).


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
