In [2]:
# let the statsmodels library to be consistent with latest pandas
!pip -q install --upgrade statsmodels
!pip install yfinance

# allows us to print multiple outputs from a single Colab cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# load important libraries that we might use
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import seaborn as sns
import statsmodels.api as sm
import yfinance as yf

from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import r2_score
from scipy.stats import norm
import scipy.stats as stats


# Increase viewable area of Pandas tables, numpy arrays, plots
pd.set_option('display.max_rows', 6, 'display.max_columns', 500, 'display.max_colwidth', 1, 'display.precision', 2)
np.set_printoptions(linewidth=10000, precision=4, edgeitems=20, suppress=True)
plt.rcParams['figure.figsize'] = [16, 6]



In [3]:
ticker_symbol = 'NFLX'

start_date = '2020-01-01'
end_date = '2023-01-01'

stock_data = yf.download(ticker_symbol, start=start_date, end=end_date)

stock_data['Return'] = stock_data['Adj Close'].pct_change()

stock_data.head()

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-01-02,326.1,329.98,324.78,329.81,329.81,4485800,
2020-01-03,326.78,329.86,325.53,325.9,325.9,3806900,-0.01
2020-01-06,323.12,336.36,321.2,335.83,335.83,5663100,0.03
2020-01-07,336.47,336.7,330.3,330.75,330.75,4703200,-0.02
2020-01-08,331.49,342.7,331.05,339.26,339.26,7104500,0.03


In [15]:
# Group data by weekday
weekday_groups = stock_data.groupby(stock_data.index.weekday)

# Define an empty list to store the p-values
return_p_values = []
volume_p_values = []

# Define the significance level (alpha)
alpha = 0.05

# Perform ANOVA test for daily return
return_data = [group_data['Return'].dropna() for _, group_data in weekday_groups]
return_statistic, return_p_value = stats.f_oneway(*return_data)
return_p_values.append(return_p_value)

# Perform ANOVA test for trading volume
volume_data = [group_data['Volume'].dropna() for _, group_data in weekday_groups]
volume_statistic, volume_p_value = stats.f_oneway(*volume_data)
volume_p_values.append(volume_p_value)

# Interpret the results for daily return
if return_p_value < alpha:
    print("Null hypothesis of equal means for daily return across weekdays is rejected.")
else:
    print("No statistically significant differences in means of daily return across weekdays.")

# Interpret the results for trading volume
if volume_p_value < alpha:
    print("Null hypothesis of equal means for trading volume across weekdays is rejected.")
else:
    print("No statistically significant differences in means of trading volume across weekdays.")

No statistically significant differences in means of daily return across weekdays.
No statistically significant differences in means of trading volume across weekdays.


In [17]:
# Create separate datasets for overnight returns and weekend returns
overnight_returns = stock_data['Return'].loc[stock_data.index.weekday < 4]  # Overnight returns (Monday-Thursday)
weekend_returns = stock_data['Return'].loc[stock_data.index.weekday >= 4]  # Weekend returns (Friday and weekend)

# Perform a t-test to compare the means of overnight and weekend returns
t_statistic, p_value = stats.ttest_ind(overnight_returns, weekend_returns, equal_var=False)

# Set the significance level (alpha)
alpha = 0.05

# Interpret the test results
if p_value < alpha:
    print("Null hypothesis of equal means for overnight and weekend returns is rejected.")
    print("There are statistically significant differences between overnight and weekend returns.")
else:
    print("Null hypothesis is not rejected.")
    print("There are no statistically significant differences between overnight and weekend returns.")

Null hypothesis is not rejected.
There are no statistically significant differences between overnight and weekend returns.


In [19]:
# Define the triple witching days (third Friday of March, June, September, and December)
triple_witching_days = stock_data[(stock_data.index.month.isin([3, 6, 9, 12])) & (stock_data.index.weekday == 4)]

# Create two separate datasets: returns and volume for triple witching days
triple_witching_returns = triple_witching_days['Return'].dropna()
triple_witching_volume = triple_witching_days['Volume'].dropna()

# Perform t-test to compare means of returns
t_return_statistic, return_p_value = stats.ttest_1samp(triple_witching_returns, stock_data['Return'].mean())

# Perform t-test to compare means of volume
t_volume_statistic, volume_p_value = stats.ttest_1samp(triple_witching_volume, stock_data['Volume'].mean())

# Set the significance level (alpha)
alpha = 0.05

# Interpret the test results for returns
if return_p_value < alpha:
    print("Null hypothesis of equal means for returns on triple witching days is rejected.")
    print("There are statistically significant differences in returns on triple witching days.")
else:
    print("Null hypothesis of equal means for returns on triple witching days is not rejected.")
    print("There are no statistically significant differences in returns on triple witching days.")

# Interpret the test results for volume
if volume_p_value < alpha:
    print("Null hypothesis of equal means for volume on triple witching days is rejected.")
    print("There are statistically significant differences in volume on triple witching days.")
else:
    print("Null hypothesis of equal means for volume on triple witching days is not rejected.")
    print("There are no statistically significant differences in volume on triple witching days.")

Null hypothesis of equal means for returns on triple witching days is not rejected.
There are no statistically significant differences in returns on triple witching days.
Null hypothesis of equal means for volume on triple witching days is not rejected.
There are no statistically significant differences in volume on triple witching days.


In [22]:
# Create three separate datasets for returns: weekends, extended weekends, and weekdays
weekend_returns = stock_data['Return'].loc[stock_data.index.weekday >= 4]  # Returns on weekends (Friday and weekend)
extended_weekend_returns = stock_data['Return'].loc[stock_data.index.isin(triple_witching_days.index)]  # Returns on extended weekends
weekday_returns = stock_data['Return'].loc[stock_data.index.weekday < 4]  # Returns on weekdays

# Perform a test for equality of variances among the datasets
levene_statistic, levene_p_value = stats.levene(weekend_returns, extended_weekend_returns, weekday_returns)

# Set the significance level (alpha)
alpha = 0.05

# Interpret the test results
if levene_p_value < alpha:
    print("Null hypothesis of equal variances is not rejected.")
    print("The standard deviation of returns on weekends and extended weekends is not different from the standard deviation of returns on weekdays.")
else:
    print("Null hypothesis of equal variances is rejected.")
    print("The standard deviation of returns on weekends and extended weekends is different from the standard deviation of returns on weekdays.")

Null hypothesis of equal variances is rejected.
The standard deviation of returns on weekends and extended weekends is different from the standard deviation of returns on weekdays.
