In [68]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.io
from tqdm.auto import tqdm

# Import Data

In [40]:
ticker_1s_df = pd.read_csv('../data/clean/ticker_1s.csv')

In [73]:
ticker_1s_df[['ticker', 'ticker_desc', 'ticker_desc_short', 'date', 'datetime', 'price']]

Unnamed: 0,ticker,ticker_desc,ticker_desc_short,date,datetime,price
0,IEF,iShares 7-10 Year Treasury Bond ETF,7-10 Year,2020-01-03,2020-01-03 09:30:00,111.150
1,IEF,iShares 7-10 Year Treasury Bond ETF,7-10 Year,2020-01-03,2020-01-03 09:30:01,111.150
2,IEF,iShares 7-10 Year Treasury Bond ETF,7-10 Year,2020-01-03,2020-01-03 09:30:02,111.150
3,IEF,iShares 7-10 Year Treasury Bond ETF,7-10 Year,2020-01-03,2020-01-03 09:30:03,111.150
4,IEF,iShares 7-10 Year Treasury Bond ETF,7-10 Year,2020-01-03,2020-01-03 09:30:04,111.150
...,...,...,...,...,...,...
701959,TLT,iShares 20+ Year Treasury Bond ETF,20+ Year,2020-08-19,2020-08-19 15:59:55,163.735
701960,TLT,iShares 20+ Year Treasury Bond ETF,20+ Year,2020-08-19,2020-08-19 15:59:56,163.745
701961,TLT,iShares 20+ Year Treasury Bond ETF,20+ Year,2020-08-19,2020-08-19 15:59:57,163.745
701962,TLT,iShares 20+ Year Treasury Bond ETF,20+ Year,2020-08-19,2020-08-19 15:59:58,163.750


# Process

In [74]:
# Add log price
ticker_1s_df['log_price'] = np.log(ticker_1s_df['price'])

In [75]:
def RV(returns):
    return np.sum(np.square(np.array(returns.dropna())))

In [76]:
ticker_s_df_dict = {}

# List of sample rates to consider (in seconds)
sec_samplerate_range = [1, 5, 10, 15, 30, 60, 120, 180]

# Create new dataframes for each sample rate
for sec_samplerate in tqdm(sec_samplerate_range):

    # Sample rate as string, passed to pd.Grouper
    sec_samplerate_str = str(sec_samplerate) + "s"

    # Create new dataframe with last log price every {sec_samplerate} seconds
    ticker_s_df = (
        ticker_1s_df.groupby(
            ["ticker", "ticker_desc", "date", pd.Grouper(key="datetime", freq=sec_samplerate_str)]
        )["log_price"]
        .last()
        .reset_index()
    )

    # Add log returns
    ticker_s_df["return"] = (
        ticker_s_df.sort_values(by=["datetime"])
        .groupby(["date", "ticker", "ticker_desc"])["log_price"]
        .diff(1)
    )
    
    # Add realized volatility for each day
    ticker_s_df['rv_1day'] = ticker_s_df.groupby(['ticker', 'ticker_desc', 'date'])['return'].transform(RV)

    # Save dataframe into dictionary
    ticker_s_df_dict[sec_samplerate_str] = ticker_s_df.copy()

HBox(children=(FloatProgress(value=0.0, max=8.0), HTML(value='')))




In [70]:
ticker_s_df_dict

{'1s':        ticker                          ticker_desc        date  \
 0         IEF  iShares 7-10 Year Treasury Bond ETF  2020-01-03   
 1         IEF  iShares 7-10 Year Treasury Bond ETF  2020-01-03   
 2         IEF  iShares 7-10 Year Treasury Bond ETF  2020-01-03   
 3         IEF  iShares 7-10 Year Treasury Bond ETF  2020-01-03   
 4         IEF  iShares 7-10 Year Treasury Bond ETF  2020-01-03   
 ...       ...                                  ...         ...   
 701959    TLT   iShares 20+ Year Treasury Bond ETF  2020-08-19   
 701960    TLT   iShares 20+ Year Treasury Bond ETF  2020-08-19   
 701961    TLT   iShares 20+ Year Treasury Bond ETF  2020-08-19   
 701962    TLT   iShares 20+ Year Treasury Bond ETF  2020-08-19   
 701963    TLT   iShares 20+ Year Treasury Bond ETF  2020-08-19   
 
                   datetime  log_price    return   rv_1day  
 0      2020-01-03 09:30:00   4.710881       NaN  0.000011  
 1      2020-01-03 09:30:01   4.710881  0.000000  0.000011  
 2   

In [4]:
data_df = ticker_1s_df.query('ticker == "TLT"').copy()

In [5]:
data_df

Unnamed: 0,ticker,ticker_desc,ticker_desc_short,date,datetime,price,log_price,return
561564,TLT,iShares 20+ Year Treasury Bond ETF,20+ Year,2020-01-03,2020-01-03 09:30:00,138.250,4.929064,0.000000
561565,TLT,iShares 20+ Year Treasury Bond ETF,20+ Year,2020-01-03,2020-01-03 09:30:01,138.235,4.928955,-0.000109
561566,TLT,iShares 20+ Year Treasury Bond ETF,20+ Year,2020-01-03,2020-01-03 09:30:02,138.220,4.928847,-0.000109
561567,TLT,iShares 20+ Year Treasury Bond ETF,20+ Year,2020-01-03,2020-01-03 09:30:03,138.230,4.928919,0.000072
561568,TLT,iShares 20+ Year Treasury Bond ETF,20+ Year,2020-01-03,2020-01-03 09:30:04,138.250,4.929064,0.000145
...,...,...,...,...,...,...,...,...
701959,TLT,iShares 20+ Year Treasury Bond ETF,20+ Year,2020-08-19,2020-08-19 15:59:55,163.735,5.098249,-0.000031
701960,TLT,iShares 20+ Year Treasury Bond ETF,20+ Year,2020-08-19,2020-08-19 15:59:56,163.745,5.098310,0.000061
701961,TLT,iShares 20+ Year Treasury Bond ETF,20+ Year,2020-08-19,2020-08-19 15:59:57,163.745,5.098310,0.000000
701962,TLT,iShares 20+ Year Treasury Bond ETF,20+ Year,2020-08-19,2020-08-19 15:59:58,163.750,5.098341,0.000031
