## **How different statistically are daily return and volume by a week day?**

## Download stock prices for : SPY (spx500), CAT (Caterpillar), EBAY (ebay)

In [None]:
import pandas as pd
import requests
from datetime import datetime
import calendar
import io
import numpy as np
import matplotlib.pyplot as plt
from copy import deepcopy
import statsmodels.api as sm
from scipy import stats

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
from statsmodels.regression.linear_model import OLS
import statsmodels.api as sm

In [None]:
class YahooData:
  def fetch(ticker, start, end, frequency, events, include_adj_close = 'true'):
    """
    ticker: <str>
      name of the stock
    start/end: <str>
      interval of dates period in format 'yyyy-mm-dd'
    frequency: <str>
      the step of the table: 1d, 1wk, 1mo
    events: <str>
      what kind of data you need: history/splits/capitalGain/div
    include_adj_close: <str>
      true/false for including adjusted close price
      not bool because you add it as a part of str
    """
    headers ={'User-agent': 'Mozilla/5.0'}

    url = "https://query1.finance.yahoo.com/v7/finance/download/" + str(ticker)
    x = int(datetime.strptime(start, '%Y-%m-%d').strftime("%s"))
    y = int(datetime.strptime(end, '%Y-%m-%d').strftime("%s"))
    url += "?period1=" + str(x) + "&period2=" + str(y) + "&events"+str(events) +"&includeAdjustedClose=" + str(include_adj_close)
    
    r = requests.get(url, headers=headers)
    df = pd.read_csv(io.StringIO(r.text), index_col=0, parse_dates=True)

    return df

In [None]:
spx = YahooData.fetch("SPY", start="2015-09-01", end="2021-09-01", 
                             frequency='1d', events = 'history', include_adj_close = 'true' )

cat = YahooData.fetch("CAT", start="2015-09-01", end="2021-09-01", 
                             frequency='1d', events = 'history', include_adj_close = 'true' )

ebay = YahooData.fetch("EBAY", start="2015-09-01", end="2021-09-01", 
                             frequency='1d', events = 'history', include_adj_close = 'true' )

In [None]:
spx.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-09-01,193.119995,194.770004,190.729996,191.770004,169.398621,256000400
2015-09-02,194.619995,195.460007,192.419998,195.410004,172.613968,160269300
2015-09-03,196.259995,198.050003,194.960007,195.550003,172.737656,152087800
2015-09-04,192.850006,193.860001,191.610001,192.589996,170.122971,207081000
2015-09-08,195.940002,197.610001,195.169998,197.429993,174.398315,116025700


In [None]:
cat.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-09-01,74.5,75.940002,74.459999,74.900002,62.151791,9053100
2015-09-02,75.900002,76.160004,74.949997,76.099998,63.147541,5325900
2015-09-03,75.949997,76.010002,73.779999,74.449997,61.778389,11077100
2015-09-04,73.0,73.660004,72.349998,73.099998,60.658146,12007300
2015-09-08,74.580002,74.620003,73.559998,74.300003,61.6539,5647800


In [None]:
ebay.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-09-01,26.540001,27.09,26.299999,26.48,25.328167,15491900
2015-09-02,26.860001,26.92,26.41,26.870001,25.701202,10907600
2015-09-03,27.360001,27.6,26.93,27.040001,25.863808,12281600
2015-09-04,26.57,26.75,26.290001,26.48,25.328167,9066900
2015-09-08,26.690001,26.75,26.209999,26.360001,25.213387,14685900


## Calculate daily returns 

In [None]:
# code for the returns calculation
spx['Daily Returns']= spx['Adj Close'].pct_change(1)
cat['Daily Returns']= cat['Adj Close'].pct_change(1)
ebay['Daily Returns']= ebay['Adj Close'].pct_change(1)

In [None]:
spx = spx.dropna()
cat = cat.dropna()
ebay = ebay.dropna()

Add the day of week into each dataset
`'Mon': 0, 'Tue': 1, 'Wed': 2, 'Thu': 3, 'Fr': 4`

In [None]:
def addWeekday(df):
  df['Year'] = df.index.year
  df['Month'] = df.index.month
  df['Weekday'] = df.index.weekday
  return df

In [None]:
spx = addWeekday(spx)
cat = addWeekday(cat)
ebay = addWeekday(ebay)

In [None]:
spx.head(8)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Daily Returns,Year,Month,Weekday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-09-02,194.619995,195.460007,192.419998,195.410004,172.613968,160269300,0.018981,2015,9,2
2015-09-03,196.259995,198.050003,194.960007,195.550003,172.737656,152087800,0.000717,2015,9,3
2015-09-04,192.850006,193.860001,191.610001,192.589996,170.122971,207081000,-0.015137,2015,9,4
2015-09-08,195.940002,197.610001,195.169998,197.429993,174.398315,116025700,0.025131,2015,9,1
2015-09-09,199.320007,199.470001,194.350006,194.789993,172.066269,149347700,-0.013372,2015,9,2
2015-09-10,194.559998,197.220001,194.25,195.850006,173.00264,158611100,0.005442,2015,9,3
2015-09-11,195.380005,196.820007,194.529999,196.740005,173.788818,119691200,0.004544,2015,9,4
2015-09-14,196.949997,197.009995,195.429993,196.009995,173.143967,79452000,-0.003711,2015,9,0


In [None]:
ebay.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1510 entries, 2015-09-02 to 2021-08-31
Columns: 10 entries, Open to Weekday
dtypes: float64(6), int64(4)
memory usage: 129.8 KB


# Distribution checking

In [None]:
from scipy.stats import kstest, norm
from scipy import stats

In [None]:
rng = np.random.default_rng(62893267)
def ks_test(df1, df2, feature):
  rvs1 = stats.norm.rvs(size=len(df1[feature]), loc=0.5, scale=1, random_state=rng)
  rvs2 = stats.norm.rvs(size=len(df2[feature]), loc=0.5, scale=1, random_state=rng)
  return(stats.ks_2samp(rvs1, rvs2).pvalue < 0.01)

In [None]:
def analyze_df(df, field):
  combinations = []
  rejections = []
  plot_data_x = []
  plot_data_y = []

  for year1, iy1 in zip(range(2015, 2021), range(0,6)):
      for day1, id1 in zip(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"], range(0, 5)):
          tmp = []

          for year2, iy2 in zip(range(2015, 2021), range(0, 6)):
              for day2, id2 in zip(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"], range(0, 5)): 
                  if (year1 == year2 and day1 == day2) or day1==day2:
                      tmp.append(None)
                  else:
                      tmp.append(ks_test(df[df.index.year == year1][df['Weekday']==id1], df[df.index.year == year2][df['Weekday']==id2], field))
                  if tmp[-1]:
                      plot_data_y.append(iy1 * 5 + id1)
                      plot_data_x.append(iy2 * 5 + id2)
                      rejections.append([
                          {
                              "year": year1,
                              "day": day1
                          },
                          {
                              "year": year2,
                              "day": day2
                          }
                      ])
          combinations.append(tmp)
  return combinations, rejections, plot_data_x, plot_data_y

In [None]:
def print_combinations_rejections(combinations, rejections):
  print("\nRejections:")
  for rejection in rejections:
      print(f"{rejection[0]['year']}_{rejection[0]['day']} : {rejection[1]['year']}_{rejection[1]['day']}")
def draw_plots(x, y, labels):
  plt.plot(x,y, 'r', marker="x", linestyle="",)
  plt.xticks(range(0, 35), labels, fontsize = 7, rotation="vertical")
  plt.yticks(range(0, 35), labels, fontsize = 8, rotation="horizontal")
  plt.tick_params(axis="both", direction="in", pad=15)
  plt.show()

In [None]:
print("EBAY")
ebay_2 = pd.DataFrame()
labels = []
for year in [2015, 2016,2017, 2018, 2019, 2020, 2021]:
    ebay_2[str(year)] = {}
    df_tmp = ebay[ebay['Year'] == year]
    for day in ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]:
        labels.append(f"{year}_{day}")
        ebay_2[str(year)][day] = df_tmp[df_tmp['Weekday'] == day]
print("\nBy Volume:")
combinations, rejections, aaplVolumeX, aaplVolumeY = analyze_df(ebay, 'Volume')
print_combinations_rejections(combinations, rejections)

print("\nBy Returns:")
combinations, rejections, aaplVolumeX, aaplVolumeY = analyze_df(ebay, 'Daily Returns')
print_combinations_rejections(combinations, rejections)

EBAY

By Volume:

Rejections:
2016_Monday : 2019_Friday
2016_Wednesday : 2016_Monday
2016_Friday : 2017_Tuesday
2017_Wednesday : 2019_Tuesday
2017_Thursday : 2019_Tuesday
2017_Friday : 2016_Monday
2018_Friday : 2020_Monday
2019_Tuesday : 2015_Friday
2019_Wednesday : 2016_Thursday
2020_Monday : 2015_Tuesday
2020_Monday : 2019_Thursday

By Returns:

Rejections:
2015_Tuesday : 2020_Friday
2015_Thursday : 2020_Tuesday
2017_Monday : 2018_Thursday
2017_Tuesday : 2020_Friday
2018_Wednesday : 2017_Thursday
2018_Thursday : 2018_Wednesday
2018_Friday : 2015_Wednesday
2019_Friday : 2020_Monday


In [None]:
print("SPX")
spx_2 = pd.DataFrame()
labels = []
for year in [2015, 2016,2017, 2018, 2019, 2020, 2021]:
    spx_2[str(year)] = {}
    df_tmp = spx[spx['Year'] == year]
    for day in ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]:
        labels.append(f"{year}_{day}")
        spx_2[str(year)][day] = df_tmp[df_tmp['Weekday'] == day]
print("\nBy Volume:")
combinations, rejections, aaplVolumeX, aaplVolumeY = analyze_df(spx, 'Volume')
print_combinations_rejections(combinations, rejections)

print("\nBy Returns:")
combinations, rejections, aaplVolumeX, aaplVolumeY = analyze_df(spx, 'Daily Returns')
print_combinations_rejections(combinations, rejections)

SPX

By Volume:

Rejections:
2016_Monday : 2016_Wednesday
2017_Tuesday : 2017_Wednesday
2018_Tuesday : 2020_Monday
2020_Thursday : 2019_Monday

By Returns:

Rejections:
2015_Monday : 2017_Thursday
2019_Friday : 2017_Tuesday
2020_Monday : 2019_Wednesday
2020_Tuesday : 2017_Monday
2020_Thursday : 2015_Tuesday
2020_Thursday : 2019_Wednesday


In [None]:
print("CAT")
cat_2 = pd.DataFrame()
labels = []
for year in [2015, 2016,2017, 2018, 2019, 2020, 2021]:
    cat_2[str(year)] = {}
    df_tmp = cat[cat['Year'] == year]
    for day in ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]:
        labels.append(f"{year}_{day}")
        cat_2[str(year)][day] = df_tmp[df_tmp['Weekday'] == day]
print("\nBy Volume:")
combinations, rejections, aaplVolumeX, aaplVolumeY = analyze_df(cat, 'Volume')
print_combinations_rejections(combinations, rejections)

print("\nBy Returns:")
combinations, rejections, aaplVolumeX, aaplVolumeY = analyze_df(cat, 'Daily Returns')
print_combinations_rejections(combinations, rejections)

CAT

By Volume:

Rejections:
2015_Friday : 2020_Thursday
2016_Monday : 2017_Wednesday
2016_Wednesday : 2016_Tuesday
2017_Monday : 2015_Wednesday
2017_Monday : 2017_Wednesday
2017_Friday : 2015_Tuesday
2018_Monday : 2018_Wednesday
2018_Tuesday : 2019_Thursday
2019_Thursday : 2015_Wednesday
2019_Thursday : 2019_Wednesday
2020_Thursday : 2020_Monday

By Returns:

Rejections:
2015_Thursday : 2019_Friday
2016_Thursday : 2020_Friday
2017_Wednesday : 2016_Monday
2019_Monday : 2018_Friday
2019_Thursday : 2016_Tuesday
2020_Thursday : 2017_Friday
