In [279]:
import csv
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import statsmodels.formula.api as smf
import os
from datetime import datetime

%matplotlib inline

# Functions

In [280]:
def import_dict(dict_loc):
    """ Loads a dictionary from a csv """
    df = pd.read_csv(dict_loc)
    df.index = df.iloc[:,0]
    df = df.drop(df.columns[0], axis = 1)
    return df.to_dict()

def convertDateToQuarter(date):
    quarter = (date.month-1)//3 + 1
    return (str(date.year) + 'Q' + str(quarter))

# Data Import

## Directories

In [281]:
# 605 and 606 csv directory
dir_605 = '../data/605/'
dir_606 = '../data/606/'

## Raw Data

In [282]:
## Import 605 data
# Find market center csvs
marketcenter_csv_list = [x for x in os.listdir(dir_605) if x[-4:] == '.csv']
# Get .csv directories
marketcenter_csv_dirs = [dir_605 + x for x in marketcenter_csv_list]
# Read .csv files
csv_df_list_605 = [pd.read_csv(file, sep = ',') for file in marketcenter_csv_dirs]
# Merge each marketcenter's data
rawdata_605 = pd.concat(csv_df_list_605)
# Clean up
del(csv_df_list_605)

In [283]:
## Import 606 Data

csv_df_list_606 = []
# Find broker folders
broker_folders = [x for x in os.listdir(dir_606) if '.' not in x]
# Merge .csv's for each broker
for broker in broker_folders:
    # Get file locations of csv's for each broker
    directory = dir_606 + broker
    broker_csv_list  = [x for x in os.listdir(directory)]
    broker_csv_dirs  = [dir_606 + broker + '/' + csv for csv in broker_csv_list]
    # Read csv's as dataframes
    csv_df_list_606_broker = [pd.read_csv(file) for file in broker_csv_dirs]
    csv_df_list_606.append(pd.concat(csv_df_list_606_broker))
    
# Merge each broker's data
rawdata_606 = pd.concat(csv_df_list_606)
# Clean up
del(csv_df_list_606)

## Dictionaries

In [284]:
symbol_dict = import_dict('../data/keys/symbols.csv')['Exchange']
mktctr_mpid_dict = import_dict('../data/keys/mpids.csv')['MPID']
ordertype_dict = {11: 'Market', 12: 'Limit'}
broker_vol_dict = import_dict('../data/keys/broker_volumes.csv')

# Data Prep

## Broker Data

### Prepare Raw Data

In [320]:
# Import Data
data_606 = rawdata_606.copy()

# Fix Routing Venue labels
data_606['RoutingVenue'] = data_606['RoutingVenue'].apply(
    lambda x: mktctr_mpid_dict.get(x.strip(), "(Unknown) " + str(x.strip())))

# Drop unknown routing venues
data_606 = data_606[data_606['RoutingVenue'].apply(lambda x: not x.startswith('(Unk'))]

# Convert date to quarter
data_606['Quarter'] = data_606['Date'].apply(
    lambda x: convertDateToQuarter(datetime.strptime(str(x), '%Y%m')))
data_606['Quarter'] = pd.PeriodIndex(data_606['Quarter'], freq='Q').values
data_606 = data_606.drop('Date', axis=1)

# Change column names
data_606 = data_606.rename(
    columns={'RoutingVenue': 'MarketCenter', 'Pct': 'MktShare'})

# Merge known marketcenters of same firm
data_606 = data_606.groupby(['Broker', 'Exchange', 'OrderType', 'Quarter', 'Rebate', 'MarketCenter']).sum().reset_index()

# Add binary var for presence of rebates
data_606['Rebate_Dummy'] = (data_606['Rebate'].apply(lambda x: (x > 0))
                            | data_606['Broker'].apply(lambda x: x == 'TD_Ameritrade')).apply(lambda x: int(x))


### Fill in missing 0's

In [322]:
data_606['Obs_id'] = data_606['Broker'] + '-' + data_606['MarketCenter'] + '-' + data_606['Exchange'] + '-' + data_606['OrderType']
data_606_new = data_606.copy()

dates_set = pd.Series(list(data_606['Quarter'].unique())).sort_values()

rebate_dummy_dict = {broker: data_606.query('Broker == "' + broker + '"').iloc[0]['Rebate_Dummy'] for broker in data_606['Broker'].unique()}

# from second element onwards
for quarter in dates_set.iloc[0:]: 
    
    print(quarter, end = ' ')
    mask_1 = (data_606['Quarter'] <  quarter) & (data_606['Quarter'] >= (quarter - 2)) # within last given period
    mask_2 = (data_606['Quarter'] == quarter)
    
#     print('Elements in previous quarters')
#     display(data_606.loc[mask_1].sort_values(by = 'Quarter'))
    
#     print('Elements in this quarter')
#     display(data_606.loc[mask_2])
            
    set_1 = set(data_606.loc[mask_1]['Obs_id'].unique())
    set_2 = set(data_606.loc[mask_2]['Obs_id'].unique())
    
#     display(set_2 - set_1)
    
    # missing id's (last period obs that are not in this period)
    id_list = [list(x.split('-')) for x in (set_1 - set_2)]    
#     display(id_list)
    
    # add missing id's
    print('(%d)' % len(id_list), end = ', ')
    for missing_id in id_list:
        #print(missing_id)
        
        data_606_new = data_606_new.append({'Broker': missing_id[0], 'MarketCenter': missing_id[1], 'Exchange': missing_id[2], 
                            'OrderType': missing_id[3], 'Quarter': quarter, 'Obs_id': '-'.join(missing_id),
                            'Rebate_Dummy': rebate_dummy_dict.get(missing_id[0], np.nan), 'MktShare': 0}, 
                           ignore_index = True)
    
#     print('Updated for this quarter')
#     display(data_606.loc[(data_606['Quarter'] == quarter)])

data_606 = data_606_new.copy()
data_606.head()

2010Q1 (0), 2010Q2 (0), 2010Q3 (0), 2012Q3 (0), 2014Q1 (0), 2014Q2 (0), 2014Q3 (0), 2014Q4 (52), 2015Q1 (120), 2015Q2 (140), 2015Q3 (124), 2015Q4 (60), 2016Q1 (116), 2016Q2 (104), 2016Q3 (104), 2016Q4 (224), 2017Q1 (528), 2017Q2 (376), 2017Q3 (24), 

Unnamed: 0,Broker,Exchange,OrderType,Quarter,Rebate,MarketCenter,MktShare,Rebate_Dummy,Obs_id
0,AXA,NASDAQ,Limit,2014Q1,0.0,ATDF,0.1601,0,AXA-ATDF-NASDAQ-Limit
1,AXA,NASDAQ,Limit,2014Q1,0.0,CDRG,0.1997,0,AXA-CDRG-NASDAQ-Limit
2,AXA,NASDAQ,Limit,2014Q1,0.0,G1ES,0.0293,0,AXA-G1ES-NASDAQ-Limit
3,AXA,NASDAQ,Limit,2014Q1,0.0,KCG,0.4148,0,AXA-KCG-NASDAQ-Limit
4,AXA,NASDAQ,Limit,2014Q2,0.0,ATDF,0.2115,0,AXA-ATDF-NASDAQ-Limit


## Market Center Data

In [323]:
# Import data
data_605 = rawdata_605.copy()

# Quarter column
data_605['Quarter'] = data_605['idate'].apply(lambda x: convertDateToQuarter(datetime.strptime(str(x), '%Y%m')))
data_605['Quarter'] = pd.PeriodIndex(data_605['Quarter'], freq='Q').values
data_605 = data_605.drop('idate', axis = 1)

# Temporary Variables for Aggregation
data_605['PrImp_TotalT']     = data_605['PrImpShares']    * data_605['PrImp_AvgT']
data_605['PrImp_TotalAmt']   = data_605['PrImpShares']    * data_605['PrImp_AvgAmt']
data_605['ATQ_TotalT']       = data_605['ATQShares']      * data_605['ATQ_AvgT']
data_605['OTQ_TotalT']       = data_605['OTQShares']      * data_605['OTQ_AvgT']
data_605['AvgRealSpread_T']  = data_605['AvgRealSpread']  * data_605['ExecShares']
data_605['AvgEffecSpread_T'] = data_605['AvgEffecSpread'] * data_605['ExecShares']

data_605 = data_605.groupby(['MarketCenter', 'Quarter', 'Exchange', 'OrderCode']) \
        .sum().reset_index()

# Reconstruct original variables
data_605['PrImp_AvgT']     = data_605['PrImp_TotalT']     / data_605['PrImpShares']
data_605['PrImp_AvgAmt']   = data_605['PrImp_TotalAmt']   / data_605['PrImpShares'] 
data_605['ATQ_AvgT']       = data_605['ATQ_TotalT']       / data_605['ATQShares']
data_605['OTQ_AvgT']       = data_605['OTQ_TotalT']       / data_605['OTQShares']
data_605['AvgRealSpread']  = data_605['AvgRealSpread_T']  / data_605['ExecShares']
data_605['AvgEffecSpread'] = data_605['AvgEffecSpread_T'] / data_605['ExecShares'] 
data_605['PrImp_Pct']      = data_605['PrImpShares']      / data_605['ExecShares']
data_605['ATQ_Pct']        = data_605['ATQShares']        / data_605['ExecShares']
data_605['OTQ_Pct']        = data_605['OTQShares']        / data_605['ExecShares']

## New Vars

# Absolute
data_605['OrderType']    = data_605['OrderCode'].apply(lambda x: ordertype_dict.get(x, 'Other'))
data_605['PrImp_ExpAmt'] = data_605['PrImp_AvgAmt'] * data_605['PrImp_Pct']
data_605['All_AvgT']     = (data_605['PrImp_TotalT'] + data_605['ATQ_TotalT'] + data_605['OTQ_TotalT']) \
                            / data_605['ExecShares']
data_605 = data_605.rename(columns = {'idate': 'Date'})

# Relative values
data_605_grouped = data_605.groupby(['Exchange', 'OrderType', 'Quarter'])

data_605['MktCtrAvg_PrImp_Pct']  = data_605_grouped['PrImp_Pct'].transform("mean")
data_605['Rel_PrImp_Pct']        = data_605['PrImp_Pct'] - data_605['MktCtrAvg_PrImp_Pct']
data_605['MktCtrAvg_PrImp_AvgT'] = data_605_grouped['PrImp_AvgT'].transform("mean")
data_605['Rel_PrImp_AvgT']       = data_605['PrImp_AvgT'] - data_605['MktCtrAvg_PrImp_AvgT']    
data_605['MktCtrAvg_PrImp_ExpAmt'] = data_605_grouped['PrImp_ExpAmt'].transform("mean")
data_605['Rel_PrImp_ExpAmt']       = data_605['PrImp_ExpAmt'] - data_605['MktCtrAvg_PrImp_ExpAmt']   
data_605['MktCtrAvg_All_AvgT'] = data_605_grouped['All_AvgT'].transform("mean")
data_605['Rel_All_AvgT']       = data_605['All_AvgT'] - data_605['MktCtrAvg_All_AvgT']   

data_605.head()

Unnamed: 0,MarketCenter,Quarter,Exchange,OrderCode,CoveredOrders,CoveredShares,CancelledShares,MktCtrExecShares,AwayExecShares,ExecShares_0_9,...,PrImp_ExpAmt,All_AvgT,MktCtrAvg_PrImp_Pct,Rel_PrImp_Pct,MktCtrAvg_PrImp_AvgT,Rel_PrImp_AvgT,MktCtrAvg_PrImp_ExpAmt,Rel_PrImp_ExpAmt,MktCtrAvg_All_AvgT,Rel_All_AvgT
0,ARCA,2015Q3,NASDAQ,11,167917,72636881,10384868,40788074,21463939,62072012,...,0.019544,0.671599,0.745672,-0.306562,0.438198,0.636061,0.010542,0.009003,0.474635,0.196964
1,ARCA,2015Q3,NASDAQ,12,11963921,3132598691,428908786,1832975495,73922935,1906190605,...,0.001719,0.156047,0.417352,-0.316506,0.395793,-0.372792,0.00279,-0.001072,21.679366,-21.523319
2,ARCA,2015Q3,NASDAQ,13,2049676,429176278,245833327,139132834,12298742,151102939,...,,0.0,0.0,0.0,,,,,0.0,0.0
3,ARCA,2015Q3,NASDAQ,14,30626255,7740311867,6336374765,1310626905,28550727,685596002,...,,0.0,0.0,0.0,,,,,0.0,0.0
4,ARCA,2015Q3,NASDAQ,15,87537916,54307617429,53536207786,551030623,5705856,162391561,...,,0.0,0.0,0.0,,,,,0.0,0.0


# Merge Datasets

In [324]:
data_merged = data_605.merge(data_606)

data_merged = data_merged.query('OrderCode < 13')
data_merged = data_merged.drop('Obs_id', axis = 1)
data_merged.set_index(['Quarter'])
data_merged['Broker_Size'] = data_merged['Broker'].apply(lambda x: broker_vol_dict['Size'].get(x))


print('Total Observations: ' + str(len(data_merged)))
print('Brokers: ' + str(len(set(list(data_merged['Broker'])))))
print('Market Centers: ' + str(len(set(list(data_merged['MarketCenter'])))))

data_merged.head()

Total Observations: 4690
Brokers: 21
Market Centers: 10


Unnamed: 0,MarketCenter,Quarter,Exchange,OrderCode,CoveredOrders,CoveredShares,CancelledShares,MktCtrExecShares,AwayExecShares,ExecShares_0_9,...,Rel_PrImp_AvgT,MktCtrAvg_PrImp_ExpAmt,Rel_PrImp_ExpAmt,MktCtrAvg_All_AvgT,Rel_All_AvgT,Broker,Rebate,MktShare,Rebate_Dummy,Broker_Size
0,ARCA,2015Q3,NASDAQ,11,167917,72636881,10384868,40788074,21463939,62072012,...,0.636061,0.010542,0.009003,0.474635,0.196964,Barclays Capital,1.0,0.0,1,23958270000.0
1,ARCA,2015Q3,NASDAQ,11,167917,72636881,10384868,40788074,21463939,62072012,...,0.636061,0.010542,0.009003,0.474635,0.196964,Cowen Execution,0.0,0.0,0,50187100.0
2,ARCA,2015Q3,NASDAQ,11,167917,72636881,10384868,40788074,21463939,62072012,...,0.636061,0.010542,0.009003,0.474635,0.196964,Credit Suisse,1.0,0.0,1,32667210000.0
3,ARCA,2015Q3,NASDAQ,11,167917,72636881,10384868,40788074,21463939,62072012,...,0.636061,0.010542,0.009003,0.474635,0.196964,Deutsche,1.0,0.0151,1,20450090000.0
4,ARCA,2015Q3,NASDAQ,12,11963921,3132598691,428908786,1832975495,73922935,1906190605,...,-0.372792,0.00279,-0.001072,21.679366,-21.523319,Barclays Capital,1.0,0.1375,1,23958270000.0


# Data Export

## Panel

In [273]:
# test
data_merged[data_merged['Quarter'] == pd.Period('2017Q1')].query('Broker == "Deutsche" & OrderType == "Market" & Exchange == "NASDAQ"')

Unnamed: 0,MarketCenter,Quarter,Exchange,OrderCode,CoveredOrders,CoveredShares,CancelledShares,MktCtrExecShares,AwayExecShares,ExecShares_0_9,...,Rel_PrImp_AvgT,MktCtrAvg_PrImp_ExpAmt,Rel_PrImp_ExpAmt,MktCtrAvg_All_AvgT,Rel_All_AvgT,Broker,Rebate,MktShare,Rebate_Dummy,Broker_Size
363,ARCA,2017Q1,NASDAQ,11,104475,61806548,19612,36313895,25472391,61785886,...,-0.100621,0.005574,-0.003742,0.198371,-0.19543,Deutsche,1.0,0.0002,1,20450090000.0
827,BNYC,2017Q1,NASDAQ,11,345661,230246176,195583,230031653,0,229524331,...,0.343289,0.005574,-0.000604,0.198371,0.365521,Deutsche,,0.0,1,20450090000.0
2924,CDRG,2017Q1,NASDAQ,11,4338273,3869897008,7387327,3861393673,673251,3860791263,...,-0.057035,0.005574,0.002848,0.198371,-0.106257,Deutsche,,0.0,1,20450090000.0
4515,CITI,2017Q1,NASDAQ,11,6,11095,9802,0,0,0,...,,0.005574,,0.198371,,Deutsche,1.0,0.0,1,20450090000.0
7860,SGMA,2017Q1,NASDAQ,11,1138174,832502596,3819344,828673009,0,827855088,...,-0.036854,0.005574,0.002131,0.198371,-0.043589,Deutsche,,0.0,1,20450090000.0
11178,UBSS,2017Q1,NASDAQ,11,940822,698972967,845336,469982092,223946827,693152926,...,0.155265,0.005574,0.003358,0.198371,0.474875,Deutsche,1.0,0.0016,1,20450090000.0
12019,VRTU,2017Q1,NASDAQ,11,11221,5500313,5003571,496742,0,496742,...,-0.106754,0.005574,-0.004581,0.198371,-0.197968,Deutsche,1.0,0.0021,1,20450090000.0


## First Difference

In [274]:
def tempfunc(data, broker, marketcenter, exchange, ordertype):
    
    data = data.drop('Rebate_Dummy', axis = 1)
    data['Broker'] = broker
    data['MarketCenter'] = marketcenter
    data['Exchange'] = exchange
    data['OrderType'] = ordertype
    data['Rebate_Dummy'] = data_606.query(query).iloc[0]['Rebate_Dummy']
    
    return data

def getDifferencedData(data_merged, query_command):
    
    if query_command != "":
        data_merged = data_merged.query(query_command)

    data_merged_diff = data_merged.groupby(['Broker', 'MarketCenter', 'Exchange', 'OrderType']).diff(1).dropna()
    
    return data_merged_diff

data_merged_diff_list = []
data_merged_diff_lag_list = []

brokers = list(set(list(data_merged['Broker'])))
marketcenters = list(set(list(data_merged['MarketCenter'])))
exchanges = list(set(list(data_merged['Exchange'])))
ordertypes = ['Market','Limit']

for broker in brokers:
    for marketcenter in marketcenters:
        for exchange in exchanges:
            for ordertype in ordertypes:
                query = "Broker == '" + broker + "' & OrderType == '" + ordertype \
                + "' & Exchange == '" + exchange + "' & MarketCenter == '" + marketcenter + "'"
                data_merged_diff_temp = getDifferencedData(data_merged, query)
                #data_merged_diff_lag_temp = getDifferencedData(data_merged_lag, query)
                
                if len(data_merged_diff_temp) > 0:
                    data_merged_diff_temp = tempfunc(data_merged_diff_temp, broker, marketcenter, 
                                                     exchange, ordertype)
                    #data_merged_diff_lag_temp = tempfunc(data_merged_diff_lag_temp, broker, marketcenter, 
                    #                                 exchange, ordertype)
                    
                    data_merged_diff_list.append(data_merged_diff_temp)
                    #data_merged_diff_lag_list.append(data_merged_diff_lag_temp)

## With Binaries

In [275]:
data_clean = data_merged.dropna().copy()

# {Market Center, Broker, Order Type, Exchange} dummies
temp_dict = {}

for mktctr in list(set(list(data_clean['MarketCenter'])))[:-1]:
    
    print('Processing: ' + mktctr)
    
    column_label = (mktctr + '_ind')
    data_clean[column_label] = data_clean['MarketCenter'].apply(lambda x: int(x == mktctr))
    
    # query only brokers that send to this market center
    query_txt = 'MarketCenter == "' + mktctr + '"'
    
    # all brokers except the last to prevent multicollinearity
    for brk in list(set(list(data_clean.query(query_txt)['Broker'])))[:-1]: 
        
        # Add column
        column_label = mktctr + '_' + brk.replace(' ', '_') + '_ind'
        data_clean[column_label] = data_clean[mktctr + '_ind'] & data_clean['Broker'].apply(lambda x: int(x == brk))
        
        query_txt2 = 'MarketCenter == "' + mktctr + '" & Broker == "' + brk + '"'
        
        for exc in list(set(list(data_clean.query(query_txt2)['Exchange'])))[:-1]: 
            
            # Add column
            column_label = mktctr + '_' + brk.replace(' ', '_') + '_' + exc + '_ind'
            data_clean[column_label] = data_clean[mktctr + '_ind'] & data_clean['Broker'].apply(lambda x: int(x == brk)) & \
                data_clean['Exchange'].apply(lambda x: int(x == exc))
                
            query_txt3 = 'MarketCenter == "' + mktctr + '" & Broker == "' + brk + '" & Exchange == "' + exc + '"'    
                
            for ot in list(set(list(data_clean.query(query_txt3)['OrderType'])))[:-1]:     
                
                # Add column
                column_label = mktctr + '_' + brk.replace(' ', '_') + '_' + exc + '_' + ot + '_ind'
                data_clean[column_label] = data_clean[mktctr + '_ind'] & data_clean['Broker'].apply(lambda x: int(x == brk)) & \
                    data_clean['Exchange'].apply(lambda x: int(x == exc)) & data_clean['OrderType'].apply(lambda x: int(x == ot))
        
        
print('Complete')

Processing: CDRG
Processing: SGMA
Processing: G1ES
Processing: UBSS
Processing: ARCA
Processing: BNYC
Processing: CITI
Complete


## Fixed Effects

In [276]:
data_merged_demeaned = data_merged.dropna() - data_merged.dropna().groupby(
    ['Broker', 'MarketCenter', 'Exchange', 'OrderType']).transform("mean")

data_merged_demeaned[['Broker', 'Exchange', 'MarketCenter', 'OrderType', 'Quarter']
                     ] = data_merged.dropna()[['Broker', 'Exchange', 'MarketCenter', 'OrderType', 'Quarter']]

data_merged_demeaned['Rebate_Dummy'] = data_merged.dropna()['Rebate_Dummy']

## To CSV

In [326]:
# Panel
data_merged.to_csv('../data/processed/regression_data_levels.csv', index=False)
# Differenced
pd.concat(data_merged_diff_list).to_csv(
    '../data/processed/regression_data_fdiffs.csv', index=False)
# With Binary Vars
data_clean.to_csv(
    '../data/processed/regression_data_levels_binaries.csv', index=False)
# Demeaned
data_merged_demeaned.to_csv(
    '../data/processed/regression_data_levels_demeaned.csv', index=False)

# 605 and 606
data_605.to_csv('../data/processed/605_processed.csv')
data_606.to_csv('../data/processed/606_processed.csv')

# Statistics

In [159]:
data_605_q = data_605[data_605.apply(lambda x: x.Quarter == pd.Period('2015Q3'), axis = 1)]
print(data_605.columns)
data_605_q.query('Exchange == "NASDAQ" & OrderCode == 11')[['MarketCenter', 'MktCtrExecShares', 'PrImp_Pct', 'PrImp_AvgAmt', 'All_AvgT']]

Index(['MarketCenter', 'Quarter', 'Exchange', 'OrderCode', 'CoveredOrders',
       'CoveredShares', 'CancelledShares', 'MktCtrExecShares',
       'AwayExecShares', 'ExecShares_0_9', 'ExecShares_10_29',
       'ExecShares_30_59', 'ExecShares_60_299', 'ExecShares_5_30',
       'AvgRealSpread', 'AvgEffecSpread', 'PrImpShares', 'PrImp_AvgAmt',
       'PrImp_AvgT', 'ATQShares', 'ATQ_AvgT', 'OTQShares', 'OTQ_AvgAmt',
       'OTQ_AvgT', 'ExecShares', 'PrImp_TotalT', 'PrImp_TotalAmt',
       'ATQ_TotalT', 'OTQ_TotalT', 'AvgRealSpread_T', 'AvgEffecSpread_T',
       'PrImp_Pct', 'ATQ_Pct', 'OTQ_Pct', 'OrderType', 'PrImp_ExpAmt',
       'All_AvgT', 'MktCtrAvg_PrImp_Pct', 'Rel_PrImp_Pct',
       'MktCtrAvg_PrImp_AvgT', 'Rel_PrImp_AvgT', 'MktCtrAvg_PrImp_ExpAmt',
       'Rel_PrImp_ExpAmt', 'MktCtrAvg_All_AvgT', 'Rel_All_AvgT'],
      dtype='object')


Unnamed: 0,MarketCenter,MktCtrExecShares,PrImp_Pct,PrImp_AvgAmt,All_AvgT
0,ARCA,40788074,0.43911,0.044509,0.671599
177,BNYC,187389877,0.624134,0.007284,1.276349
357,CDRG,1684972233,0.812235,0.010775,0.161658
942,CITI,0,,,
1661,G1ES,848647217,0.877246,0.011175,0.217041
2090,SGMA,654205209,0.846468,0.011098,0.212152
2675,UBSS,894802777,0.874837,0.012816,0.309009


# Scratch

In [263]:
temp = data_merged
temp = temp.drop(['Rebate', 'Broker_Size'], axis = 1)
temp[temp.isnull().any(axis=1)].iloc[1]

MarketCenter                          CITI
Quarter                             2014Q4
Exchange                            NASDAQ
OrderCode                               11
CoveredOrders                          129
CoveredShares                       295715
CancelledShares                     295715
MktCtrExecShares                         0
AwayExecShares                           0
ExecShares_0_9                           0
ExecShares_10_29                         0
ExecShares_30_59                         0
ExecShares_60_299                        0
ExecShares_5_30                          0
AvgRealSpread                          NaN
AvgEffecSpread                         NaN
PrImpShares                              0
PrImp_AvgAmt                           NaN
PrImp_AvgT                             NaN
ATQShares                                0
ATQ_AvgT                               NaN
OTQShares                                0
OTQ_AvgAmt                               0
OTQ_AvgT   

In [271]:
data_merged.query('MktShare == 0')

Unnamed: 0,MarketCenter,Quarter,Exchange,OrderCode,CoveredOrders,CoveredShares,CancelledShares,MktCtrExecShares,AwayExecShares,ExecShares_0_9,...,Rel_PrImp_AvgT,MktCtrAvg_PrImp_ExpAmt,Rel_PrImp_ExpAmt,MktCtrAvg_All_AvgT,Rel_All_AvgT,Broker,Rebate,MktShare,Rebate_Dummy,Broker_Size
0,ARCA,2015Q3,NASDAQ,11,167917,72636881,10384868,40788074,21463939,62072012,...,0.636061,0.010542,0.009003,0.474635,0.196964,Barclays Capital,1.0,0.0,1,2.395827e+10
1,ARCA,2015Q3,NASDAQ,11,167917,72636881,10384868,40788074,21463939,62072012,...,0.636061,0.010542,0.009003,0.474635,0.196964,Cowen Execution,0.0,0.0,0,5.018710e+07
2,ARCA,2015Q3,NASDAQ,11,167917,72636881,10384868,40788074,21463939,62072012,...,0.636061,0.010542,0.009003,0.474635,0.196964,Credit Suisse,1.0,0.0,1,3.266721e+10
5,ARCA,2015Q3,NASDAQ,12,11963921,3132598691,428908786,1832975495,73922935,1906190605,...,-0.372792,0.002790,-0.001072,21.679366,-21.523319,Cowen Execution,0.0,0.0,0,5.018710e+07
6,ARCA,2015Q3,NASDAQ,12,11963921,3132598691,428908786,1832975495,73922935,1906190605,...,-0.372792,0.002790,-0.001072,21.679366,-21.523319,Credit Suisse,1.0,0.0,1,3.266721e+10
20,ARCA,2015Q3,NYSE,11,332290,93147866,8771479,52918769,31457618,84371992,...,-0.279227,0.006620,0.004540,0.372442,-0.369889,Barclays Capital,1.0,0.0,1,2.395827e+10
21,ARCA,2015Q3,NYSE,11,332290,93147866,8771479,52918769,31457618,84371992,...,-0.279227,0.006620,0.004540,0.372442,-0.369889,Credit Suisse,1.0,0.0,1,3.266721e+10
23,ARCA,2015Q3,NYSE,11,332290,93147866,8771479,52918769,31457618,84371992,...,-0.279227,0.006620,0.004540,0.372442,-0.369889,Cowen Execution,,0.0,0,5.018710e+07
27,ARCA,2015Q3,NYSE,12,13739199,3370000622,163251974,2251886893,129130353,2380118966,...,-0.371367,0.001832,-0.000623,15.795013,-15.729887,Cowen Execution,,0.0,0,5.018710e+07
40,ARCA,2015Q3,Other,11,176592,118206619,15788526,73489680,28923610,101728072,...,2.503556,0.008339,0.013281,0.583612,1.134756,Barclays Capital,1.0,0.0,1,2.395827e+10
