# Using Pandas for Time Series Analysis


In [None]:
!pip install yfinance
import yfinance as yf


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting yfinance
  Downloading yfinance-0.2.11-py2.py3-none-any.whl (59 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.2/59.2 KB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting html5lib>=1.1
  Downloading html5lib-1.1-py2.py3-none-any.whl (112 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.2/112.2 KB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
Collecting cryptography>=3.3.2
  Downloading cryptography-39.0.1-cp36-abi3-manylinux_2_28_x86_64.whl (4.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.2/4.2 MB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting requests>=2.26
  Downloading requests-2.28.2-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.8/62.8 KB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting beautifulsoup4>=4.11.1
  Downloading beautiful

# Get data for SPY -- S&P 500 ETF.

In [None]:
SPY=yf.download(['SPY'])

[*********************100%***********************]  1 of 1 completed


In [None]:
import pandas as pd
import numpy as np
pd.options.plotting.backend = "plotly"
SPY[['Adj Close','Close']].plot()

# Adj Close and Close
* Why is Adj Close and Close different far back in time, but same today?


In [None]:
SPY['Adj Close']/SPY['Adj Close'].shift(1)-1

Date
1993-01-29         NaN
1993-02-01    0.007112
1993-02-02    0.002119
1993-02-03    0.010571
1993-02-04    0.004183
                ...   
2023-02-06   -0.006111
2023-02-07    0.013079
2023-02-08   -0.010935
2023-02-09   -0.008669
2023-02-10    0.002334
Name: Adj Close, Length: 7564, dtype: float64

In [None]:
SPY['Adj Close'].pct_change()

Date
1993-01-29         NaN
1993-02-01    0.007112
1993-02-02    0.002119
1993-02-03    0.010571
1993-02-04    0.004183
                ...   
2023-02-06   -0.006111
2023-02-07    0.013079
2023-02-08   -0.010935
2023-02-09   -0.008669
2023-02-10    0.002334
Name: Adj Close, Length: 7564, dtype: float64

In [None]:
df=SPY[['Adj Close','Close']].pct_change()

Unnamed: 0_level_0,Adj Close,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1993-03-19,-0.001513,-0.006207
1993-04-14,-0.001388,-0.001389
1993-05-19,0.020539,0.020538
1993-06-18,-0.008235,-0.015214
1993-09-17,-0.001939,-0.008119
...,...,...
2021-12-17,-0.010643,-0.014107
2022-03-18,0.010953,0.007822
2022-06-17,0.002156,-0.002155
2022-09-16,-0.007629,-0.011689


In [None]:
# Looks like the difference is due to splits and dividends. Divs are quarterly.
df.loc[(df['Adj Close']-df['Close'])>1E-6]

## Let's plot daily returns


In [None]:
SPY['Adj Close'].pct_change().plot()

In [None]:
SPY['Adj Close'].pct_change().std()

0.011919463469663421

# Let's calculate the Rolling 1 Month (21 days) volatility of the S&P 500 ETF, SPY


In [None]:
SPY['Adj Close'].pct_change().rolling(21).std().plot()

## We assumed that Yahoo Finance did not miss any days. But we don't know that.

* When days are missing, we would be calculating more than 21 business days, which would be a problem if using this data to price financial contracts that have a stated number of days.
* Let's ensure the index has all business days.
* If prices are missing, forward fill (ffill)


In [None]:
pd.bdate_range(SPY.index[0],SPY.index[-1]) #Our list of business days

DatetimeIndex(['1993-01-29', '1993-02-01', '1993-02-02', '1993-02-03',
               '1993-02-04', '1993-02-05', '1993-02-08', '1993-02-09',
               '1993-02-10', '1993-02-11',
               ...
               '2023-01-30', '2023-01-31', '2023-02-01', '2023-02-02',
               '2023-02-03', '2023-02-06', '2023-02-07', '2023-02-08',
               '2023-02-09', '2023-02-10'],
              dtype='datetime64[ns]', length=7836, freq='B')

In [None]:
SPY.index #What Yahoo Finance Returns. (Which excludes holidays)

DatetimeIndex(['1993-01-29', '1993-02-01', '1993-02-02', '1993-02-03',
               '1993-02-04', '1993-02-05', '1993-02-08', '1993-02-09',
               '1993-02-10', '1993-02-11',
               ...
               '2023-01-27', '2023-01-30', '2023-01-31', '2023-02-01',
               '2023-02-02', '2023-02-03', '2023-02-06', '2023-02-07',
               '2023-02-08', '2023-02-09'],
              dtype='datetime64[ns]', name='Date', length=7563, freq=None)

In [None]:
#Let's just assume we want our list for whatever reason.
#Not all holidays are announced, such as a president's funeral, and so it might not be stipulated as a holiday in a financial contract.
#After 9/11/2001, the exchange was closed for a few days.

SPY=SPY.reindex(pd.bdate_range(SPY.index[0],SPY.index[-1]))
SPY

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
1993-01-29,43.968750,43.968750,43.750000,43.937500,25.218218,1003200.0
1993-02-01,43.968750,44.250000,43.968750,44.250000,25.397581,480500.0
1993-02-02,44.218750,44.375000,44.125000,44.343750,25.451393,201300.0
1993-02-03,44.406250,44.843750,44.375000,44.812500,25.720446,529400.0
1993-02-04,44.968750,45.093750,44.468750,45.000000,25.828041,531500.0
...,...,...,...,...,...,...
2023-02-06,409.790009,411.290009,408.100006,409.829987,409.829987,60295300.0
2023-02-07,408.869995,416.489990,407.570007,415.190002,415.190002,90990700.0
2023-02-08,413.130005,414.529999,409.929993,410.649994,410.649994,76227500.0
2023-02-09,414.410004,414.570007,405.809998,407.089996,407.089996,78694900.0


In [None]:
#What are nan?
SPY.loc[SPY['Adj Close'].isna()]

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
1993-02-15,,,,,,
1993-04-09,,,,,,
1993-05-31,,,,,,
1993-07-05,,,,,,
1993-09-06,,,,,,
...,...,...,...,...,...,...
2022-09-05,,,,,,
2022-11-24,,,,,,
2022-12-26,,,,,,
2023-01-02,,,,,,


## Stock markets to close Tuesday for Ford funeral
https://www.nbcnews.com/id/wbna16384589

* '2007-01-01' <- Expected New Year Holiday
* '2007-01-02' <- Unexpected New Year Holiday

In [None]:
idx=SPY.loc[SPY['Adj Close'].isna()].index
idx[idx>'2006-12-20']

DatetimeIndex(['2006-12-25', '2007-01-01', '2007-01-02', '2007-01-15',
               '2007-02-19', '2007-04-06', '2007-05-28', '2007-07-04',
               '2007-09-03', '2007-11-22',
               ...
               '2022-02-21', '2022-04-15', '2022-05-30', '2022-06-20',
               '2022-07-04', '2022-09-05', '2022-11-24', '2022-12-26',
               '2023-01-02', '2023-01-16'],
              dtype='datetime64[ns]', length=150, freq=None)

## Forward Fill (Impute missing prices with previous day)


In [None]:
SPY['Adj Close']=SPY['Adj Close'].ffill()

In [None]:
SPY.loc[SPY['Adj Close'].isna()]

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume


In [None]:
SPY.loc[SPY['Close'].isna()]

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
1993-02-15,,,,,25.594887,
1993-04-09,,,,,25.535543,
1993-05-31,,,,,26.076180,
1993-07-05,,,,,25.952461,
1993-09-06,,,,,26.932493,
...,...,...,...,...,...,...
2022-09-05,,,,,388.849731,
2022-11-24,,,,,400.580536,
2022-12-26,,,,,382.910004,
2023-01-02,,,,,382.429993,


# Let's calculate the Rolling 1 Month (21 days) volatility of the S&P 500 ETF, SPY


In [None]:
SPY['Adj Close'].pct_change().rolling(21).std().plot()

## Seasonality analysis - Are Mondays more volatile than other days?



In [None]:
SPY['Adj Close'].pct_change().groupby(SPY.index.map(lambda dt:dt.weekday)).std()

0    0.012612
1    0.011907
2    0.011296
3    0.011706
4    0.010971
Name: Adj Close, dtype: float64

In [None]:
SPY['Adj Close'].pct_change().groupby(SPY.index.map(lambda dt:dt.weekday)).std().plot.bar()

## What if we reindex by calendar days, not week days?


In [None]:
SPY=SPY.reindex(pd.date_range(SPY.index[0],SPY.index[-1]))
SPY

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Weekday
1993-01-29,43.968750,43.968750,43.750000,43.937500,25.218218,1003200.0,4.0
1993-01-30,,,,,,,
1993-01-31,,,,,,,
1993-02-01,43.968750,44.250000,43.968750,44.250000,25.397581,480500.0,0.0
1993-02-02,44.218750,44.375000,44.125000,44.343750,25.451393,201300.0,1.0
...,...,...,...,...,...,...,...
2023-02-06,409.790009,411.290009,408.100006,409.829987,409.829987,60295300.0,0.0
2023-02-07,408.869995,416.489990,407.570007,415.190002,415.190002,90990700.0,1.0
2023-02-08,413.130005,414.529999,409.929993,410.649994,410.649994,76227500.0,2.0
2023-02-09,414.410004,414.570007,405.809998,407.089996,407.089996,78694900.0,3.0


In [None]:
SPY['Adj Close']=SPY['Adj Close'].ffill()

* Now plot 1M = 30 cdays rolling volatility

In [None]:
SPY['Adj Close'].pct_change().rolling(30).std().plot()

In [None]:
SPY['Adj Close'].pct_change().groupby(SPY.index.map(lambda dt:dt.weekday)).std()

0    0.012612
1    0.011907
2    0.011296
3    0.011704
4    0.010975
5    0.000000
6    0.000000
Name: Adj Close, dtype: float64

# Class discussion

* When pricing options, the option pricing calculator usually takes business days or calendar days to expiration.
* Based on the above, is it better to use business days or calendar days?
