<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#READ-ME" data-toc-modified-id="READ-ME-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>READ ME</a></span></li><li><span><a href="#Define-paths" data-toc-modified-id="Define-paths-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Define paths</a></span></li><li><span><a href="#Load-contract-expiration-table" data-toc-modified-id="Load-contract-expiration-table-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Load contract expiration table</a></span><ul class="toc-item"><li><span><a href="#Change-format" data-toc-modified-id="Change-format-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Change format</a></span></li></ul></li><li><span><a href="#Build-trades-5S-timebars" data-toc-modified-id="Build-trades-5S-timebars-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Build trades 5S timebars</a></span></li></ul></div>

# READ ME

This notebook creates time bars from transaction data.

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pandas as pd
import numpy as np

In [3]:
from cleaning_functions import *

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters


# Define paths

In [4]:
file_path = './../raw_data/oil_trades/'

trade_files = [file_path+f for f in os.listdir(file_path) if f.endswith('.csv')]

# Load contract expiration table

In [5]:
expiration_dates = pd.read_csv('./../expiration_dates.csv', index_col=0)
expiration_dates['expiration_date'] = pd.to_datetime(expiration_dates['expiration_date'])
expiration_dates['expiration'] = pd.to_datetime(expiration_dates['expiration'])

## Change format

In [6]:
contract_expiration = expiration_dates.drop(['Symbol_start'], axis=1)
contract_expiration.rename(columns = {'Symbol_end':'Symbol'}, inplace=True)

In [7]:
contract_expiration['data_start'] = contract_expiration['expiration'].shift(1)  - datetime.timedelta(hours=23, days=28)
contract_expiration['data_start'].fillna(datetime.datetime(2008,12,1), inplace=True)

In [8]:
contract_expiration = contract_expiration[['Symbol', 'data_start', 'expiration']]\
                            .set_index('Symbol')

In [9]:
contract_expiration = contract_expiration.to_dict('index')

In [10]:
contract_expiration

{'COF10': {'data_start': Timestamp('2009-10-16 00:00:00'),
  'expiration': Timestamp('2009-12-16 23:00:00')},
 'COF11': {'data_start': Timestamp('2010-10-18 00:00:00'),
  'expiration': Timestamp('2010-12-16 23:00:00')},
 'COF12': {'data_start': Timestamp('2011-10-18 00:00:00'),
  'expiration': Timestamp('2011-12-15 23:00:00')},
 'COF13': {'data_start': Timestamp('2012-10-18 00:00:00'),
  'expiration': Timestamp('2012-12-14 23:00:00')},
 'COF14': {'data_start': Timestamp('2013-10-17 00:00:00'),
  'expiration': Timestamp('2013-12-16 23:00:00')},
 'COF15': {'data_start': Timestamp('2014-10-16 00:00:00'),
  'expiration': Timestamp('2014-12-16 23:00:00')},
 'COF16': {'data_start': Timestamp('2015-10-16 00:00:00'),
  'expiration': Timestamp('2015-12-16 23:00:00')},
 'COF17': {'data_start': Timestamp('2016-10-03 00:00:00'),
  'expiration': Timestamp('2016-11-30 23:00:00')},
 'COF18': {'data_start': Timestamp('2017-10-03 00:00:00'),
  'expiration': Timestamp('2017-11-30 23:00:00')},
 'COF19': 

# Build trades 5S timebars

In [11]:
def wrapper_function(file_path):
    
    RESOLUTION='5S'
    
    symbol = file_path.replace('.csv', '')[-5:]
    
    print('Processing {}...'.format(symbol))
    df = pd.read_csv(file_path)
    
    clean_df = clean_trades(df)
    
    mask = (clean_df.index>=contract_expiration[symbol]['data_start']) & (clean_df.index<=contract_expiration[symbol]['expiration'])
    clean_df = clean_df.loc[mask,:]
    
    bars = trades_to_bars(clean_df, RESOLUTION)
    
    bars.to_pickle('../clean_data/time_bars_{}/{}.pickle'.format(RESOLUTION,symbol))
    print('Processing {}...DONE'.format(symbol))
    del bars, clean_df, df

In [12]:
from joblib import Parallel, delayed
import multiprocessing
    
results = Parallel(n_jobs=10)(delayed(wrapper_function)(file_path) for file_path in trade_files)