# 1.6.2 Historical Tick Data with Aggregation

In order to download tick level data, we had to subscribe to Polygon.io Professional API. 

Each stock is queried individually from the API, and the response is limited to 50,000 trades. 

We will need to collect for all 505 SP500 stocks over a 2 month period for training data. 

In [2]:
import config
import pandas as pd 
from polygon import RESTClient
import datetime
from functools import reduce
import pandas_market_calendars as pmc
#from modules import timing

In [2]:
table=pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
df = table[0]
spdf.loc[spdf[spdf['Symbol'] == 'LUMN'].index[0], 'Symbol'] = 'CTL'
df.to_csv('../data/sp500/S&P500-Symbols.csv', columns=['Symbol'])
print(df.shape)
df.head()

(505, 9)


Unnamed: 0,Symbol,Security,SEC filings,GICS Sector,GICS Sub Industry,Headquarters Location,Date first added,CIK,Founded
0,MMM,3M Company,reports,Industrials,Industrial Conglomerates,"St. Paul, Minnesota",1976-08-09,66740,1902
1,ABT,Abbott Laboratories,reports,Health Care,Health Care Equipment,"North Chicago, Illinois",1964-03-31,1800,1888
2,ABBV,AbbVie Inc.,reports,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
3,ABMD,ABIOMED Inc,reports,Health Care,Health Care Equipment,"Danvers, Massachusetts",2018-05-31,815094,1981
4,ACN,Accenture plc,reports,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989


In [3]:
df[df['Symbol'] == 'LUMN']

Unnamed: 0,Symbol,Security,SEC filings,GICS Sector,GICS Sub Industry,Headquarters Location,Date first added,CIK,Founded
297,LUMN,Lumen Technologies,reports,Communication Services,Alternative Carriers,"Monroe, Louisiana",1999-03-25,18926,1983 (1877)


In [4]:
df.loc[297, 'Symbol'] = 'CTL'
df.loc[297]

Symbol                                      CTL
Security                     Lumen Technologies
SEC filings                             reports
GICS Sector              Communication Services
GICS Sub Industry          Alternative Carriers
Headquarters Location         Monroe, Louisiana
Date first added                     1999-03-25
CIK                                       18926
Founded                             1983 (1877)
Name: 297, dtype: object

In [5]:
df[df['Symbol'] == 'BRK.B']

Unnamed: 0,Symbol,Security,SEC filings,GICS Sector,GICS Sub Industry,Headquarters Location,Date first added,CIK,Founded
66,BRK.B,Berkshire Hathaway,reports,Financials,Multi-Sector Holdings,"Omaha, Nebraska",2010-02-16,1067983,1839


In [6]:
with RESTClient(config.POLYGON) as client:
    resp = client.historic_trades_v2("BRK.B", "2018-03-02")
    
key_map = {key:resp.map[key]['name'] for key in resp.map}

In [7]:
key_map

{'p': 'price',
 't': 'sip_timestamp',
 'f': 'trf_timestamp',
 'i': 'id',
 'I': 'orig_id',
 'r': 'trf_id',
 's': 'size',
 'y': 'participant_timestamp',
 'q': 'sequence_number',
 'c': 'conditions',
 'e': 'correction',
 'x': 'exchange',
 'z': 'tape'}

In [8]:
sdf = pd.DataFrame(resp.results)
sdf.rename(key_map, axis=1, inplace=True)
sdf['sip_timestamp'] = pd.to_datetime(sdf['sip_timestamp'])
sdf['participant_timestamp'] = pd.to_datetime(sdf['participant_timestamp'])
sdf.head()

Unnamed: 0,sip_timestamp,participant_timestamp,sequence_number,id,exchange,size,conditions,price,tape,trf_timestamp,trf_id
0,2018-03-02 09:00:00.041691,2018-03-02 09:00:00.041398528,61701,52983525027892,11,15,"[12, 37]",202.0,1,,
1,2018-03-02 09:36:43.611813,2018-03-02 09:36:43.610889635,101901,62879129946177,12,5,"[12, 37]",200.55,1,,
2,2018-03-02 09:44:45.193672,2018-03-02 09:44:45.192772582,104501,62879129946476,12,3,"[12, 37]",202.44,1,,
3,2018-03-02 10:21:23.855016,2018-03-02 10:21:23.854120667,122101,62879129948780,12,2,"[12, 37]",201.53,1,,
4,2018-03-02 10:21:23.855049,2018-03-02 10:21:23.854321527,122201,62879129948781,12,8,"[14, 12, 37, 41]",201.53,1,,


50,000 rows comprises less than a full day of data from the market. 



In [9]:
nyse = pmc.get_calendar('NASDAQ')
days = nyse.schedule(start_date='2020-06-23', end_date='2020-09-17').index
# Show available calendars
print(pmc.get_calendar_names())

['ASX', 'BMF', 'CFE', 'NYSE', 'stock', 'NASDAQ', 'BATS', 'CME', 'CBOT', 'COMEX', 'NYMEX', 'CME_Equity', 'CBOT_Equity', 'CME_Agriculture', 'CBOT_Agriculture', 'COMEX_Agriculture', 'NYMEX_Agriculture', 'CME_Rate', 'CBOT_Rate', 'CME_InterestRate', 'CBOT_InterestRate', 'CME_Bond', 'CBOT_Bond', 'EUREX', 'HKEX', 'ICE', 'ICEUS', 'NYFE', 'JPX', 'LSE', 'OSE', 'SIX', 'SSE', 'TSX', 'TSXV', 'XBOM', 'ASEX', 'BVMF', 'CMES', 'IEPA', 'XAMS', 'XASX', 'XBKK', 'XBOG', 'XBRU', 'XBUD', 'XBUE', 'XCBF', 'XCSE', 'XDUB', 'XFRA', 'XHEL', 'XHKG', 'XICE', 'XIDX', 'XIST', 'XJSE', 'XKAR', 'XKLS', 'XKRX', 'XLIM', 'XLIS', 'XLON', 'XMAD', 'XMEX', 'XMIL', 'XMOS', 'XNYS', 'XNZE', 'XOSL', 'XPAR', 'XPHS', 'XPRA', 'XSES', 'XSGO', 'XSHG', 'XSTO', 'XSWX', 'XTAI', 'XTKS', 'XTSE', 'XWAR', 'XWBO', 'us_futures', '24/7', '24/5']


In [10]:
days = nyse.schedule(start_date='2020-06-23', end_date='2020-09-17').index

In [11]:
days

DatetimeIndex(['2020-06-23', '2020-06-24', '2020-06-25', '2020-06-26',
               '2020-06-29', '2020-06-30', '2020-07-01', '2020-07-02',
               '2020-07-06', '2020-07-07', '2020-07-08', '2020-07-09',
               '2020-07-10', '2020-07-13', '2020-07-14', '2020-07-15',
               '2020-07-16', '2020-07-17', '2020-07-20', '2020-07-21',
               '2020-07-22', '2020-07-23', '2020-07-24', '2020-07-27',
               '2020-07-28', '2020-07-29', '2020-07-30', '2020-07-31',
               '2020-08-03', '2020-08-04', '2020-08-05', '2020-08-06',
               '2020-08-07', '2020-08-10', '2020-08-11', '2020-08-12',
               '2020-08-13', '2020-08-14', '2020-08-17', '2020-08-18',
               '2020-08-19', '2020-08-20', '2020-08-21', '2020-08-24',
               '2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28',
               '2020-08-31', '2020-09-01', '2020-09-02', '2020-09-03',
               '2020-09-04', '2020-09-08', '2020-09-09', '2020-09-10',
      

In [12]:
len(days)

61

In [15]:
stocks = df['Symbol']
# stocks

In [16]:
stocks[297]

'CTL'

In [17]:
missing_stocks = []

In [45]:
def recursive_ask(stock, strdate, limit, timestamp):
    try:
        current_ticks = client.historic_trades_v2(stock, 
                                                  strdate, 
                                                  limit=limit, 
                                                  timestamp=timestamp
                                                     ).results
    except HTTPError as error:
        print(error)
        print('Asking recursively')
        time.sleep(1)
        currrent_ticks = recursive_ask(stock, strdate, limit, timestamp)
    
    return currrent_ticks
    

In [None]:
def timing(start=None):
    if start == None:
        now_time = time.time()
        print('Start time {}'.format(time.strftime('%c', time.localtime(now_time))))
    else:
        now_time = time.time()
        elapsed = now_time - start
        mins, secs = divmod(elapsed, 60)
        hours, mins = divmod(mins, 60)
        print(f'Time elapsed {hours} hours, {mins} minues, {secs} seconds')
        print('Iteration start elapsed {}'.format(time.strftime('%c', time.localtime())))
    return now_time

In [1]:
# initialize an aggregation dataframe
agg_df = pd.DataFrame()
ticks_df = pd.DataFrame()
increment = 1000000000
sp500 = stocks
limit_size = 50000
start = None
# timer = timing()

table=pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
df = table[0]
df.loc[spdf[spdf['Symbol'] == 'LUMN'].index[0], 'Symbol'] = 'CTL'
stocks = df['Symbol']
del(df)

nyse = pmc.get_calendar('NASDAQ')
days = [nyse.schedule(start_date='2019-09-01', end_date='2020-09-17').index[0]]

for date in days: 
    start = timing(start)
    #timer.iterate()
    
    ## TO DO 
    ## Ensure that the loop gathers all the data for each day before moving on to the next day 
    stock_tracker = {stock:{'laststamp':None, 'complete':False} for stock in sp500}

    strdate = date.date().strftime('%Y-%m-%d')
    laststamps = {stock:None for stock in sp500}
    print(strdate)

    for stock in sp500:
        print(stock)
        # download a batch of data and add it to the list 
        while stock_tracker[stock]['complete'] == False:
            with RESTClient(config.POLYGON) as client:
                try:
                    current_ticks = client.historic_trades_v2(stock, 
                                                          strdate, 
                                                          limit=1, 
                                                          timestamp=stock_tracker[stock]['laststamp']
                                                         ).results
                except HTTPError as error:
                    print(error)
                    print('Asking recursively')
                    time.sleep(1)
                    current_ticks = recursive_ask(stock, 
                                                  strdate, 
                                                  limit = limit_size, 
                                                  timestamp = stock_tracker[stock]['laststamp'])
                    
                try:
                    print(current_ticks[-1])
                except:
                    missing_stocks.append(stock)
                    print(f'exception {stock}')
                    stock_tracker[stock]['complete'] = True
                    continue
                    
                stock_tracker[stock]['laststamp'] = current_ticks[-1]['t']
                current_df = pd.DataFrame(current_ticks)
                current_df.rename(key_map, axis=1, inplace=True)
                current_df['sip_timestamp'] = pd.to_datetime(current_df['sip_timestamp'])
                current_df['participant_timestamp'] = pd.to_datetime(current_df['participant_timestamp'])
                current_df['SYMBOL'] = stock

                ticks_df = pd.concat([ticks_df, current_df], axis=0)
                
                if len(current_ticks) < limit_size:
                    stock_tracker[stock]['complete'] = True

            
    ticks_df.sort_values(by='sip_timestamp', ascending=True, inplace=True)
    ticks_df.drop_duplicates(subset=list(ticks_df.columns.drop('conditions')), inplace=True)
    ticks_df['dollar_volume'] = ticks_df['size'] * ticks_df['price']
    
    if 'dv_cumsum' in ticks_df.columns:
        cumsum_start = ticks_df['dv_cumsum'].iloc[0] 
    else:
        cumsum_start = 0
        
    ticks_df['dv_cumsum'] = ticks_df['dollar_volume'].cumsum() +  cumsum_start
    
    start_increment = ticks_df['dv_cumsum'].min()// increment * increment
    end_increment = ticks_df['dv_cumsum'].max()// increment * increment + increment
    int_val = pd.interval_range(start_increment, end_increment, freq=increment)
    ticks_df['interval_range'] = pd.cut(ticks_df['dv_cumsum'], int_val)

    last_interval_ticks = ticks_df['interval_range'].max()
#     if 'interval_range' in agg_df.columns:
#         last_interval_agg = agg_df['interval_range'].iloc[-1] + 2 * increment
#     else:
#         last_interval_agg = ticks_df['interval_range'].min() + 2 * increment
    last_interval_agg = ticks_df['interval_range'].min() + 2 * increment
    
    if last_interval_ticks > last_interval_agg: 
        ## aggregate the data, insert it into agg_df, and then drop what has been aggregated from ticks_df
        mask = ticks_df['interval_range'] < ticks_df['interval_range'].max()
        agged = reduce(lambda left,right: pd.merge(left,right, how='outer', left_index=True, right_index=True), [
                                    ticks_df.groupby(['interval_range','SYMBOL'])['sip_timestamp'].first().rename('open_timestamp'),
                                    ticks_df.groupby(['interval_range','SYMBOL'])['sip_timestamp'].last().rename('close_timestamp'),
                                    ticks_df.groupby(['interval_range','SYMBOL'])['size'].sum(),
                                    ticks_df.groupby(['interval_range','SYMBOL'])['price'].first().rename('open'),
                                    ticks_df.groupby(['interval_range','SYMBOL'])['price'].min().rename('low'),
                                    ticks_df.groupby(['interval_range','SYMBOL'])['price'].max().rename('high'),
                                    ticks_df.groupby(['interval_range','SYMBOL'])['price'].last().rename('close'),
                                    ticks_df.groupby(['interval_range','SYMBOL'])['dollar_volume'].sum()
                                                                                                                ])
        agged = agged[agged.index.get_level_values(0) != agged.index.get_level_values(0)[-1]]
        agged = agged.reset_index()
        
        with open('../data/sp500/agg_1mm_bars_623.csv', 'a') as f:
            agged.to_csv(f, header=f.tell()==0)
        # agg_df = pd.concat([agg_df, agged], axis=0)
        ticks_df = ticks_df[~mask]
    ticks_df.to_csv('../data/sp500/ticks_df.csv')
    # agg_df.to_csv('../data/sp500/aggregated_1mm_bars.csv')

NameError: name 'pd' is not defined

In [19]:
agg_df.shape

(123220, 10)

In [None]:
pd.set_option('max_rows', 1000)
ticks_df[:100]

In [21]:
agg_df['open_timestamp'] =pd.DatetimeIndex(agg_df['open_timestamp']).tz_localize('UTC').tz_convert('US/EASTERN')

In [22]:
agg_df['close_timestamp'] =pd.DatetimeIndex(agg_df['close_timestamp']).tz_localize('UTC').tz_convert('US/EASTERN')

In [23]:
agg_df['period_timedelta'] = agg_df['close_timestamp'] - agg_df['open_timestamp']

In [None]:
agg_df[agg_df['SYMBOL'] == 'AAPL'][:1000]

In [39]:
agg_df[agg_df['interval_range'] == agg_df.iloc[-1]['interval_range']].shape

(505, 11)

In [None]:
pd.set_option('max_columns', 1000)
pd.pivot(agg_df, columns='SYMBOL', index='interval_range', values=['open_timestamp','close_timestamp'])

In [301]:
agg_df.head(1)

Unnamed: 0,interval_range,SYMBOL,open_timestamp,close_timestamp,size,open,low,high,close,dollar_volume,period_timedelta
0,"(0.0, 100000000.0]",AAPL,2018-02-27 04:00:00.017746820-05:00,2018-02-27 09:30:00.321054557-05:00,348233.0,179.05,178.26,179.17,179.13,62248730.0,0 days 05:30:00.303307737


In [304]:
ticks_df.shape

(380, 16)

In [303]:
agg_df.shape

(363, 11)

In [302]:
2.8e8

280000000.0

In [160]:
agg_df['interval_range'].iloc[-1] + 2 * increment

KeyError: 'interval_range'

In [132]:
ints = pd.interval_range(start_increment, 
                                                          end_increment+increment, 
                                                          freq=increment)

In [127]:
from functools import reduce

agged = reduce(lambda left,right: pd.merge(left,right, how='outer', left_index=True, right_index=True), [
                                    ticks_df.groupby(['interval_range','SYMBOL'])['sip_timestamp'].first().rename('open_timestamp'),
                                    ticks_df.groupby(['interval_range','SYMBOL'])['sip_timestamp'].last().rename('close_timestamp'),
                                    ticks_df.groupby(['interval_range','SYMBOL'])['size'].sum(),
                                    ticks_df.groupby(['interval_range','SYMBOL'])['price'].first(),
                                    ticks_df.groupby(['interval_range','SYMBOL'])['price'].min(),
                                    ticks_df.groupby(['interval_range','SYMBOL'])['price'].max(),
                                    ticks_df.groupby(['interval_range','SYMBOL'])['price'].last(),
                                    ticks_df.groupby(['interval_range','SYMBOL'])['dollar_volume'].sum()
                                                                                                                ])

In [128]:
agged

Unnamed: 0_level_0,Unnamed: 1_level_0,open_timestamp,close_timestamp,size,price_x,price_y,price_x,price_y,dollar_volume
interval_range,SYMBOL,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"(0.0, 100000000.0]",AAPL,2018-02-27 09:00:00.017746820,2018-03-01 09:00:03.108088398,1461,179.05,176.69,179.15,178.77,261250.57
"(0.0, 100000000.0]",GOOG,2018-02-27 09:16:53.485852356,2018-03-01 09:00:02.972611340,539,1136.0,1103.68,1145.0,1117.16,607449.34
"(0.0, 100000000.0]",MSFT,2018-02-27 09:22:02.429403583,2018-03-01 09:00:03.272244407,707,95.13,93.41,95.46,94.44,66937.51


In [133]:
last = ticks_df['interval_range'].max()

In [136]:
ints

IntervalIndex([(0.0, 100000000.0], (100000000.0, 200000000.0]],
              closed='right',
              dtype='interval[float64]')

In [138]:
last < ints[-1]

True

In [139]:
last

Interval(0.0, 100000000.0, closed='right')

In [147]:
ints[0]+100000000 

Interval(100000000.0, 200000000.0, closed='right')

In [154]:
ticks_df

Unnamed: 0,sip_timestamp,participant_timestamp,sequence_number,id,exchange,size,conditions,price,tape,trf_id,trf_timestamp,SYMBOL,dollar_volume,dv_cumsum,interval_range
0,2018-02-27 09:00:00.017746820,2018-02-27 09:00:00.017360384,1074,1,11,10,"[12, 37]",179.05,3,,,AAPL,1790.50,3.581000e+03,"(0.0, 100000000.0]"
1,2018-02-27 09:00:00.017750369,2018-02-27 09:00:00.017361152,1075,2,11,5,"[12, 37]",179.05,3,,,AAPL,895.25,4.476250e+03,"(0.0, 100000000.0]"
2,2018-02-27 09:01:26.483949291,2018-02-27 09:01:26.483924880,1102,1,12,50,"[14, 12, 37, 41]",178.60,3,,,AAPL,8930.00,1.340625e+04,"(0.0, 100000000.0]"
3,2018-02-27 09:01:50.209147098,2018-02-27 09:01:50.208771328,1103,3,11,10,"[12, 37]",179.04,3,,,AAPL,1790.40,1.519665e+04,"(0.0, 100000000.0]"
4,2018-02-27 09:06:15.080563660,2018-02-27 09:06:15.080186112,1107,4,11,400,[12],179.02,3,,,AAPL,71608.00,8.680465e+04,"(0.0, 100000000.0]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,2018-02-28 14:30:28.128631352,2018-02-28 14:30:28.128609671,15852,349,12,100,"[14, 41]",94.95,3,,,MSFT,9495.00,4.420780e+08,"(400000000.0, 500000000.0]"
996,2018-02-28 14:30:28.130594398,2018-02-28 14:30:28.130216960,15853,220,11,55,"[14, 37, 41]",94.96,3,,,MSFT,5222.80,4.420832e+08,"(400000000.0, 500000000.0]"
997,2018-02-28 14:30:28.130601454,2018-02-28 14:30:28.130216960,15854,221,11,100,"[14, 41]",94.94,3,,,MSFT,9494.00,4.420927e+08,"(400000000.0, 500000000.0]"
998,2018-02-28 14:30:28.139430801,2018-02-28 14:30:28.139057152,15855,222,11,200,"[14, 41]",94.93,3,,,MSFT,18986.00,4.421117e+08,"(400000000.0, 500000000.0]"


In [155]:
mask

[0       True
 1       True
 2       True
 3       True
 4       True
        ...  
 995    False
 996    False
 997    False
 998    False
 999    False
 Name: interval_range, Length: 6000, dtype: bool]

In [202]:
agged.reset_index()['interval_range'].astype('Interval') < ticks_df['interval_range'].max()

TypeError: '<' not supported between instances of 'IntervalArray' and 'pandas._libs.interval.Interval'

In [203]:
ticks_df['interval_range'].max()

Interval(600000000.0, 700000000.0, closed='right')

In [217]:
agged.reset_index(inplace=True)

In [228]:
agged['interval_range'] = pd.IntervalIndex(agged['interval_range'])

In [235]:
agged['interval_range'] < agged['interval_range'].iloc[-1]

TypeError: '<' not supported between instances of 'IntervalArray' and 'pandas._libs.interval.Interval'

In [248]:
agged.loc[~agged.index[-1][0], :]

TypeError: bad operand type for unary ~: 'pandas._libs.interval.Interval'

In [256]:
agged.index.get_level_values(0) != agged.index.get_level_values(0)[-1]

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True, False, False, False])

In [267]:
agged

Unnamed: 0_level_0,Unnamed: 1_level_0,open_timestamp,close_timestamp,size,open,low,high,close,dollar_volume
interval_range,SYMBOL,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"(1000000000.0, 1100000000.0]",AAPL,2018-02-28 09:00:00.077538685,2018-02-28 10:51:53.133542627,14552.0,178.75,178.25,178.98,178.7,2601554.0
"(1000000000.0, 1100000000.0]",GOOG,2018-02-27 15:03:08.331565905,2018-02-28 10:29:39.214420986,85471.0,1134.325,1123.24,1141.73,1124.6,97248280.0
"(1000000000.0, 1100000000.0]",MSFT,2018-02-28 09:11:41.413178569,2018-02-28 10:53:02.897739446,515.0,94.62,94.51,94.82,94.52,48719.65
"(1100000000.0, 1200000000.0]",AAPL,2018-02-28 10:55:08.003881272,2018-02-28 14:30:00.345015436,243553.0,178.57,178.01,179.5,179.27,43626820.0
"(1100000000.0, 1200000000.0]",GOOG,2018-02-28 11:15:13.611714156,2018-02-28 14:30:00.310116160,36502.0,1124.3,1118.29,1126.59,1123.37,40885450.0
"(1100000000.0, 1200000000.0]",MSFT,2018-02-28 10:53:02.898555058,2018-02-28 14:30:00.324254666,30994.0,94.51,94.2,94.87,94.8,2932497.0
"(1200000000.0, 1300000000.0]",AAPL,2018-02-28 14:30:00.380433549,2018-02-28 14:30:00.460421439,925.0,179.26,179.26,179.3478,179.29,165854.8
"(1200000000.0, 1300000000.0]",GOOG,2018-02-28 14:30:00.412165961,2018-02-28 14:30:00.483442984,38563.0,1122.07,1121.0,1122.79,1121.0,43229190.0
"(1200000000.0, 1300000000.0]",MSFT,2018-02-28 14:30:00.367498203,2018-02-28 14:30:00.476411062,478037.0,94.8,94.8,94.94,94.825,45318010.0
"(1300000000.0, 1400000000.0]",AAPL,2018-02-28 14:30:00.492554835,2018-02-28 14:30:00.647495593,373866.0,179.29,179.2,179.29,179.2,66996900.0


In [265]:
agg_df

Unnamed: 0_level_0,Unnamed: 1_level_0,open_timestamp,close_timestamp,size,open,low,high,close,dollar_volume
interval_range,SYMBOL,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"(0.0, 100000000.0]",AAPL,2018-02-27 09:00:00.017746820,2018-02-27 14:30:00.321054557,348233.0,179.05,178.26,179.17,179.13,62248730.0
"(0.0, 100000000.0]",GOOG,2018-02-27 09:16:53.485852356,2018-02-27 14:30:00.231620614,9408.0,1136.0,1135.91,1145.79,1141.94,10744570.0
"(0.0, 100000000.0]",MSFT,2018-02-27 09:22:02.429403583,2018-02-27 14:30:00.303879885,39594.0,95.13,94.93,95.84,95.8,3781872.0
"(100000000.0, 200000000.0]",AAPL,2018-02-27 14:30:00.327751363,2018-02-27 14:30:00.363324273,498.0,179.18,179.17,179.23,179.23,89237.35
"(100000000.0, 200000000.0]",GOOG,2018-02-27 14:30:00.383065713,2018-02-27 14:30:00.383065713,32789.0,1140.39,1140.39,1140.39,1140.39,37392250.0
"(100000000.0, 200000000.0]",MSFT,2018-02-27 14:30:00.324491210,2018-02-27 14:30:00.381137224,708837.0,95.66,95.66,95.84,95.7,67807910.0
"(200000000.0, 300000000.0]",AAPL,2018-02-27 14:30:00.403137653,2018-02-27 14:30:00.524262924,365527.0,179.15,179.0,179.23,179.0,65429560.0
"(200000000.0, 300000000.0]",GOOG,2018-02-27 14:30:00.383171676,2018-02-27 14:30:00.479865501,32979.0,1140.39,1139.1,1144.4,1141.3,37609450.0
"(200000000.0, 300000000.0]",MSFT,2018-02-27 14:30:00.386273300,2018-02-27 14:30:00.515652702,2803.0,95.7,95.7,95.75,95.75,268287.7
"(300000000.0, 400000000.0]",AAPL,2018-02-27 14:30:00.524303295,2018-02-27 14:30:20.773753651,519594.0,179.0,178.37,179.44,179.44,93031690.0


In [282]:
5e7

50000000.0