In [446]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.stats.sandwich_covariance import cov_hac
import matplotlib.pyplot as plt
from scipy.stats import chi2, norm, t

%matplotlib inline

# Functions

In [447]:
def hausman_test(result_a, result_b, r):
    ''' Null  => both consistent, b is efficient
    Alternate => b is inconsistent
    '''
    cov_a = cov_hac(result_a)[0:r,0:r]
    cov_b = cov_hac(result_b)[0:r,0:r]
    
    beta_a = result_a.params[0:r]
    beta_b = result_b.params[0:r]
    
    wald_stat = np.abs(np.matrix(beta_b - beta_a) * np.linalg.pinv(np.matrix(cov_a - cov_b)) * np.matrix(beta_b - beta_a).T)
    pval = 1 - chi2.cdf(wald_stat, r-1)
    
    return wald_stat, pval

def get_sig_stars(p_val, p_value_labels = {0.05: '*', 0.01: '**', 0.001: '***'}, as_latex = False):

    below_ind = np.where([p_val < x for x in p_value_labels.keys()])[0]
    below_vals = [list(p_value_labels.keys())[i] for i in below_ind]
    
    if not below_vals:
        stars = ''
    else:
        min_p_val = np.min(below_vals)
        stars = p_value_labels[min_p_val]
        
    if as_latex:
        if not stars:
            return ''
        else:
            return '$^{' + stars + '}$'
    else:
        return stars

# Params

In [451]:
workbook_file_loc = '../analysis/results/parametric_regressions_temp.xlsx'
sample_query = 'OrderType == "Market"'# & Broker != "COR Clearing"'

# Load Data

In [452]:
# demeaned for fixed effects
data_df = pd.read_csv('../data/processed/regression_data_levels_demeaned.csv').query(sample_query)
data_df['PrImp_Pct_Rebate_Dummy'] = data_df['PrImp_Pct'] * data_df['Rebate_Dummy']
data_df['PrImp_AvgAmt_Rebate_Dummy'] = data_df['PrImp_AvgAmt'] * data_df['Rebate_Dummy']
data_df['PrImp_ExpAmt_Rebate_Dummy'] = data_df['PrImp_ExpAmt'] * data_df['Rebate_Dummy']
data_df['PrImp_AvgT_Rebate_Dummy'] = data_df['PrImp_AvgT'] * data_df['Rebate_Dummy']
data_df['All_AvgT_Rebate_Dummy'] = data_df['All_AvgT'] * data_df['Rebate_Dummy']
data_df_dmd = data_df.copy()

# levels for random effects
data_df = pd.read_csv('../data/processed/regression_data_levels.csv').query(sample_query)
data_df['PrImp_Pct_Rebate_Dummy'] = data_df['PrImp_Pct'] * data_df['Rebate_Dummy']
data_df['PrImp_AvgAmt_Rebate_Dummy'] = data_df['PrImp_AvgAmt'] * data_df['Rebate_Dummy']
data_df['PrImp_ExpAmt_Rebate_Dummy'] = data_df['PrImp_ExpAmt'] * data_df['Rebate_Dummy']
data_df['PrImp_AvgT_Rebate_Dummy'] = data_df['PrImp_AvgT'] * data_df['Rebate_Dummy']
data_df['All_AvgT_Rebate_Dummy'] = data_df['All_AvgT'] * data_df['Rebate_Dummy']

# Info
print('Market Centers: ', end = '') 
print(data_df['MarketCenter'].unique())

print('Brokers: ', end = '') 
print(np.sort(data_df['Broker'].unique()))

print('Samples: %d' % data_df.shape[0])
print('Sparsity: %0.2f%%' % (100*data_df.query('MktShare == 0').shape[0] / data_df.shape[0]))

# Add dummy vars to levels

data_df['obs_id'] = data_df['Broker'].apply(lambda x: x.replace(" ", "")) + data_df['MarketCenter'] + data_df['Exchange'] 
data_df = pd.get_dummies(data_df, columns = ['obs_id'], prefix = 'dummy')

dummy_coeff_cols = [x for x in list(data_df.columns) if x[:5] == 'dummy']
dummy_coeff_formula = ' + '.join(dummy_coeff_cols)

data_df.head()

Market Centers: ['BNYC' 'CDRG' 'FBCO' 'G1ES' 'SGMA' 'UBSS' 'VRTU' 'WOLV']
Brokers: ['AXA' 'Aurora Capital' 'BMO Capital' 'BTIG' 'Bank of the West'
 'Barclays Capital' 'Benjamin Jerold' 'Boenning Scattergood'
 'Bull Market Securities' 'Cambria Capital' 'Credit Suisse'
 'Dakota Securities' 'Deutsche' 'E1 Asset Mgmt' 'Edward Jones'
 'Elish Elish' 'Euro Pacific Capital' 'Evercore Group' 'Fifth Third'
 'Florida Atlantic' 'Hollencrest Securities' 'INTL FCStone'
 'Inlet Securities' 'Insigneo Securities' 'JP Morgan' 'LPL'
 'Lightspeed Trading' 'TD Ameritrade' 'Two Sigma' 'Wells Fargo']
Samples: 2982
Sparsity: 38.70%


Unnamed: 0,MarketCenter,Quarter,Exchange,OrderCode,CoveredOrders,CoveredShares,CancelledShares,MktCtrExecShares,AwayExecShares,ExecShares_0_9,...,dummy_WellsFargoCDRGOther,dummy_WellsFargoFBCONASDAQ,dummy_WellsFargoFBCONYSE,dummy_WellsFargoFBCOOther,dummy_WellsFargoUBSSNASDAQ,dummy_WellsFargoUBSSNYSE,dummy_WellsFargoUBSSOther,dummy_WellsFargoVRTUNASDAQ,dummy_WellsFargoVRTUNYSE,dummy_WellsFargoVRTUOther
0,BNYC,2015Q1,NASDAQ,11,213581,43704397,70249,43628268,0,43526470,...,0,0,0,0,0,0,0,0,0,0
2,BNYC,2015Q1,NYSE,11,396811,81015802,108105,80902077,0,80833088,...,0,0,0,0,0,0,0,0,0,0
4,BNYC,2015Q1,Other,11,257920,55831472,108233,55715793,0,55640416,...,0,0,0,0,0,0,0,0,0,0
6,BNYC,2015Q2,NASDAQ,11,182798,37375992,112287,37259229,0,37191775,...,0,0,0,0,0,0,0,0,0,0
8,BNYC,2015Q2,NYSE,11,319748,65791725,70489,65713305,0,65635191,...,0,0,0,0,0,0,0,0,0,0


# Regressions 

In [453]:
# Fits
# fit1_formula = 'MktShare ~ PrImp_Pct + PrImp_AvgAmt + PrImp_AvgT'
# fit2_formula = 'MktShare ~ PrImp_ExpAmt + PrImp_AvgT'
# fit3_formula = 'MktShare ~ PrImp_Pct + PrImp_AvgAmt + All_AvgT'
# fit4_formula = 'MktShare ~ PrImp_ExpAmt + All_AvgT'

fit1_formula = 'MktShare ~ PrImp_Pct + PrImp_Pct_Rebate_Dummy + PrImp_AvgAmt + PrImp_AvgAmt_Rebate_Dummy + PrImp_AvgT + PrImp_AvgT_Rebate_Dummy'
fit2_formula = 'MktShare ~ PrImp_ExpAmt + PrImp_ExpAmt_Rebate_Dummy + PrImp_AvgT + PrImp_AvgT_Rebate_Dummy'
fit3_formula = 'MktShare ~ PrImp_Pct + PrImp_Pct_Rebate_Dummy + PrImp_AvgAmt + PrImp_AvgAmt_Rebate_Dummy + All_AvgT + All_AvgT_Rebate_Dummy'
fit4_formula = 'MktShare ~ PrImp_ExpAmt + PrImp_ExpAmt_Rebate_Dummy + All_AvgT + All_AvgT_Rebate_Dummy'


formulaCols = lambda x: x.replace(' ', '').replace('~', '+').split('+') 
fit_formulae = [fit1_formula, fit2_formula, fit3_formula, fit4_formula]
fit_formulae = [formulaCols(x) for x in fit_formulae]

# Store results
fit_results_re = [None] * len(fit_formulae)
fit_results_fe = [None] * len(fit_formulae)

## Results

In [454]:
i = 1

## Get results
for i in range(0, len(fit_formulae)):
    
    print('Regressing with fit %d...' % (i + 1) )

    data = data_df_dmd[fit_formulae[i]]
    
    X = data.iloc[:,1:]
    Y = data.iloc[:,0]

    fit_results_fe[i] = sm.OLS(Y, X, missing = 'drop').fit().get_robustcov_results(cov_type='HAC', maxlags = 1)
    
    # include dummies and drop one 
    data = data_df[fit_formulae[i] + dummy_coeff_cols[:-1]]
    
    X = data.iloc[:,1:]
    Y = data.iloc[:,0]

    fit_results_re[i] = sm.OLS(Y, sm.add_constant(X), missing = 'drop').fit().get_robustcov_results(cov_type='HAC', maxlags = 1)
    

Regressing with fit 1...
Regressing with fit 2...
Regressing with fit 3...
Regressing with fit 4...


## Run Hausman Tests

In [465]:
fit_results = [None] * len(fit_formulae)
hausman_results = [None] * len(fit_formulae)

for i in range(0, len(fit_formulae)):

    n_var = len(fit_formulae[i]) - 1

    wald_stat, pval = hausman_test(fit_results_re[i], fit_results_fe[i], n_var)
    
    # removeee
    pval = 0.01
    
    if pval < 0.05:
        fit_results[i] = fit_results_fe[i]
        hausman_results[i] = 'FE'
    else:
        fit_results[i] = fit_results_re[i]
        hausman_results[i] = 'RE'
        
hausman_results

['FE', 'FE', 'FE', 'FE']

# Export

In [466]:
from openpyxl import Workbook, load_workbook, utils
from openpyxl.styles import Alignment, Font
import string
import csv

In [467]:
## Open workbook
wb = Workbook()

## Coefficient Results

In [468]:
for i in range(0, len(fit_results)):
    
    ws = wb.create_sheet(title = 'Fit ' + str(i+1) +' Results')
    
    # space out columns
    ws.column_dimensions["A"].width = 35
    
    for col in list('BCDEFG'):
        ws.column_dimensions[col].width = 15
    
    # add data
    reader = (fit_results[i].summary().as_csv()).split('\n')
    
    random_effects_model = True

    for row in reader:
        if row[:5] != 'dummy':
            ws.append(row.split(','))
        else:
            random_effects_model = False
            
    if random_effects_model:
        ws['C1'] = 'Random Effects'
    else:
        ws['C1'] = 'Fixed Effects'

## Save and Close

In [469]:
del wb['Sheet']
wb.save(workbook_file_loc)
wb.close()

# As LaTeX

In [470]:
fit_results_dict = [None] * 4

for i in range(0, len(fit_results)):
    
    coeffs = {}
    stders = {}
    
    # Coefficients
    for j in range(0, len(fit_formulae[i][1:])):
    
        stder = fit_results[i].HC0_se[j]
        coeff = fit_results[i].params[j]
        key = fit_formulae[i][1:][j]
        
        coeffs[key] = coeff
        stders[key] = stder
        
    fit_results_dict[i] = {}
    fit_results_dict[i]['coeffs'] = coeffs.copy()
    fit_results_dict[i]['stders'] = stders.copy()

In [471]:
regressors = ['PrImp_Pct', 'PrImp_Pct_Rebate_Dummy', 'PrImp_AvgAmt', 'PrImp_AvgAmt_Rebate_Dummy','PrImp_ExpAmt', 
     'PrImp_ExpAmt_Rebate_Dummy', 'PrImp_AvgT', 'PrImp_AvgT_Rebate_Dummy', 'All_AvgT', 'All_AvgT_Rebate_Dummy']


for reg in regressors:
    
    line = reg.replace('_Rebate', '$*$Rebate').replace('_', '\\_')
    line2 = ' '
    
    for i in range(0, len(fit_results_dict)):
        
        fit = fit_results_dict[i]
        
        if reg in fit['coeffs']:
            
            coef  = fit['coeffs'][reg]
            stder = fit['stders'][reg]
            tstat = coef / stder
            pval  = 2 * (1- t.cdf(np.abs(tstat), int(fit_results[i].nobs) - len(fit['coeffs']) - 1) )
         
            line = line + ' & ' + str(np.round(coef, decimals = 4)) + get_sig_stars(pval, as_latex = True)
            line2 = line2 + ' & (' + str(np.round(stder, decimals = 4)) + ')'
        
        else:
            
            line = line + ' & '
            line2 = line2 + ' & '
    
    print(line + '\\\\')
    print(line2 + '\\\\ [0.5ex]')

PrImp\_Pct & -0.0959 &  & -0.0956 & \\
  & (0.0955) &  & (0.0955) & \\ [0.5ex]
PrImp\_Pct$*$Rebate\_Dummy & -0.3085$^{*}$ &  & -0.3079$^{*}$ & \\
  & (0.1399) &  & (0.14) & \\ [0.5ex]
PrImp\_AvgAmt & 8.5362$^{***}$ &  & 8.4626$^{***}$ & \\
  & (2.0766) &  & (2.0863) & \\ [0.5ex]
PrImp\_AvgAmt$*$Rebate\_Dummy & -4.415 &  & -4.8303 & \\
  & (3.0109) &  & (3.0307) & \\ [0.5ex]
PrImp\_ExpAmt &  & 8.0954$^{***}$ &  & 8.0103$^{***}$\\
  &  & (2.2921) &  & (2.2992)\\ [0.5ex]
PrImp\_ExpAmt$*$Rebate\_Dummy &  & -8.5685$^{**}$ &  & -8.9484$^{**}$\\
  &  & (3.1505) &  & (3.165)\\ [0.5ex]
PrImp\_AvgT & -0.0196$^{**}$ & -0.0195$^{**}$ &  & \\
  & (0.0062) & (0.0064) &  & \\ [0.5ex]
PrImp\_AvgT$*$Rebate\_Dummy & 0.0112 & 0.0147 &  & \\
  & (0.0079) & (0.008) &  & \\ [0.5ex]
All\_AvgT &  &  & -0.0084$^{**}$ & -0.0084$^{**}$\\
  &  &  & (0.003) & (0.0031)\\ [0.5ex]
All\_AvgT$*$Rebate\_Dummy &  &  & 0.0061 & 0.0078\\
  &  &  & (0.0041) & (0.0041)\\ [0.5ex]


In [472]:
obs_line = 'N'
model_line = 'Model'
r_2_line = 'R$^{2}$'
r_2adj_line = 'Adjusted R$^{2}$'
f_stat = 'F Statistic'

for i in range(0, len(fit_results)):

    fit = fit_results[i]
    
    obs_line = obs_line + ' & ' + str(int(fit.nobs))
    model_line = model_line + ' & ' +  hausman_results[i]
    r_2_line = r_2_line + ' & ' +  str(np.round(fit.rsquared, decimals = 3))
    r_2adj_line = r_2adj_line + ' & ' +  str(np.round(fit.rsquared_adj, decimals = 3))
    
    if np.abs(fit.fvalue) > 100:
        fvalue_label = '$>$100$^{***}$'
    else:
        fvalue_label = str(np.round(float(fit.fvalue), decimals = 3)) + get_sig_stars(fit.f_pvalue, as_latex = True) 
        
    f_stat = f_stat + ' & ' + fvalue_label
    

In [473]:
print(' \\\\ \n'.join([model_line, obs_line, r_2_line, r_2adj_line, f_stat]), end = ' \\\\ \n')

Model & FE & FE & FE & FE \\ 
N & 2982 & 2982 & 2982 & 2982 \\ 
R$^{2}$ & 0.027 & 0.007 & 0.026 & 0.006 \\ 
Adjusted R$^{2}$ & 0.025 & 0.006 & 0.024 & 0.005 \\ 
F Statistic & 6.314$^{***}$ & 4.609$^{**}$ & 5.787$^{***}$ & 4.082$^{**}$ \\ 


# Scratch

In [290]:
fit_for =['MktShare',
 'PrImp_Pct',
 'PrImp_AvgAmt',
 'PrImp_AvgT',]

In [371]:
temp = pd.DataFrame()

In [372]:
for broker in data_df_dmd['Broker'].unique():
    
    data = data_df_dmd[data_df_dmd['Broker'] == broker][fit_for]

    X = data.iloc[:,1:]
    Y = data.iloc[:,0]

    result = sm.OLS(Y, X, missing = 'drop').fit().get_robustcov_results(cov_type='HAC', maxlags = 1)
    
    rd = data_df[data_df['Broker'] == broker]['Rebate_Dummy'].iloc[0]

    print(broker + ': ' + str(rd))
    print(result.tvalues)
    
    temp = temp.append({'broker': broker, 'Pct': result.tvalues[0], 'Amt': result.tvalues[1], 'T': result.tvalues[2], 'RD': int(rd)}, ignore_index = True)

Insigneo Securities: 0
[ 2.74891489 -0.41870101  0.91890654]
Bank of the West: 0
[-0.58599214  0.87766025 -0.16124814]
Deutsche: 1
[-2.73458306  3.25389744 -1.37382205]
Boenning Scattergood: 1
[ 0.96651934  2.9037415  -0.67398159]
Edward Jones: 0
[ 2.38125928  3.86442702 -3.11125345]
Credit Suisse: 1
[-3.13816435  2.40494274 -2.06577779]
Hollencrest Securities: 1
[-0.37336217 -0.76673804 -1.50034844]
AXA: 0
[ 2.09036498  1.29658881 -0.60183766]
BTIG: 1
[-0.75288324 -2.54902589  3.81894354]
COR Clearing: 1
[-1.49770603  1.68402819 -1.23702484]
Euro Pacific Capital: 0
[ 2.44059525  2.03358142 -1.52606903]
Florida Atlantic: 0
[ 1.21906731  1.34755017 -1.37759319]
LPL: 0
[ 1.26197754  2.03650976 -2.127629  ]
Wells Fargo: 1
[-0.57678793 -0.87296557  0.56501784]
Aurora Capital: 0
[-1.10497522  1.11383417 -1.22088915]
E1 Asset Mgmt: 1
[ 0.16026593  2.22592704 -1.39093022]
Elish Elish: 0
[-0.1137994   0.11336837 -0.39155637]
Benjamin Jerold: 0
[-0.42242637  3.73788666 -0.90837728]
Barclays Cap

  return self.params / self.bse


In [373]:
temp.sort_values(by = 'Pct')

Unnamed: 0,Amt,Pct,RD,T,broker
5,2.404943,-3.138164,1.0,-2.065778,Credit Suisse
26,0.575132,-3.094218,1.0,7.584857,Evercore Group
2,3.253897,-2.734583,1.0,-1.373822,Deutsche
24,-2.435203,-2.609167,1.0,-0.059639,Cambria Capital
25,1.564702,-2.099907,1.0,1.371684,JP Morgan
30,3.096021,-1.800028,0.0,0.617491,Dakota Securities
9,1.684028,-1.497706,1.0,-1.237025,COR Clearing
14,1.113834,-1.104975,0.0,-1.220889,Aurora Capital
8,-2.549026,-0.752883,1.0,3.818944,BTIG
1,0.87766,-0.585992,0.0,-0.161248,Bank of the West


In [418]:
disclude = [] + ['COR Clearing'] #+ disclude2

data = data_df_dmd[data_df_dmd['Broker'].apply(lambda x: x not in disclude)][fit_formulae[0]]

X = data.iloc[:,1:]
Y = data.iloc[:,0]

sm.OLS(Y, X, missing = 'drop').fit().get_robustcov_results(cov_type='HAC', maxlags = 1).summary()

0,1,2,3
Dep. Variable:,MktShare,R-squared:,0.051
Model:,OLS,Adj. R-squared:,0.048
Method:,Least Squares,F-statistic:,10.64
Date:,"Tue, 27 Mar 2018",Prob (F-statistic):,1.08e-11
Time:,19:28:54,Log-Likelihood:,767.3
No. Observations:,2373,AIC:,-1523.0
Df Residuals:,2367,BIC:,-1488.0
Df Model:,6,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
PrImp_Pct,-0.0244,0.105,-0.233,0.816,-0.230,0.181
PrImp_Pct_Rebate_Dummy,-0.3720,0.173,-2.145,0.032,-0.712,-0.032
PrImp_AvgAmt,13.6350,2.159,6.316,0.000,9.401,17.869
PrImp_AvgAmt_Rebate_Dummy,-6.5831,3.290,-2.001,0.045,-13.034,-0.132
PrImp_AvgT,-0.0211,0.006,-3.255,0.001,-0.034,-0.008
PrImp_AvgT_Rebate_Dummy,0.0071,0.010,0.727,0.467,-0.012,0.026

0,1,2,3
Omnibus:,792.122,Durbin-Watson:,1.882
Prob(Omnibus):,0.0,Jarque-Bera (JB):,23047.298
Skew:,0.961,Prob(JB):,0.0
Kurtosis:,18.146,Cond. No.,1020.0


In [327]:
disclude2 = ['Boenning Scattergood', 'INTL FCStone', 'Deutsche']

print(temp[temp['broker'].apply(lambda x: x not in disclude2)].query('RD == 1').sort_values(by = 'Amt'))
data = data_df_dmd[data_df_dmd['Broker'].apply(lambda x: x not in disclude2)].query('Rebate_Dummy == 1')[fit_for]

X = data.iloc[:,1:]
Y = data.iloc[:,0]

sm.OLS(Y, X, missing = 'drop').fit().get_robustcov_results(cov_type='HAC', maxlags = 1).summary()

         Amt       Pct   RD         T                  broker
23 -3.670570  7.140982  1.0  3.990499           TD Ameritrade
8  -2.549026 -0.752883  1.0  3.818944                    BTIG
24 -2.435203 -2.609167  1.0 -0.059639         Cambria Capital
13 -0.872966 -0.576788  1.0  0.565018             Wells Fargo
6  -0.766738 -0.373362  1.0 -1.500348  Hollencrest Securities
28 -0.151622  6.060034  1.0 -0.354057      Lightspeed Trading
26  0.575132 -3.094218  1.0  7.584857          Evercore Group
25  1.564702 -2.099907  1.0  1.371684               JP Morgan
9   1.684028 -1.497706  1.0 -1.237025            COR Clearing
18  2.103939  2.200563  1.0 -1.721149        Barclays Capital
15  2.225927  0.160266  1.0 -1.390930           E1 Asset Mgmt
5   2.404943 -3.138164  1.0 -2.065778           Credit Suisse
29       NaN       NaN  1.0       NaN               Two Sigma


0,1,2,3
Dep. Variable:,MktShare,R-squared:,0.037
Model:,OLS,Adj. R-squared:,0.036
Method:,Least Squares,F-statistic:,10.44
Date:,"Tue, 27 Mar 2018",Prob (F-statistic):,8.25e-07
Time:,19:12:53,Log-Likelihood:,316.56
No. Observations:,1890,AIC:,-627.1
Df Residuals:,1887,BIC:,-610.5
Df Model:,3,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
PrImp_Pct,-0.2709,0.067,-4.071,0.000,-0.401,-0.140
PrImp_AvgAmt,3.0400,1.829,1.663,0.097,-0.546,6.626
PrImp_AvgT,0.0049,0.002,2.940,0.003,0.002,0.008

0,1,2,3
Omnibus:,697.984,Durbin-Watson:,1.848
Prob(Omnibus):,0.0,Jarque-Bera (JB):,11810.459
Skew:,1.28,Prob(JB):,0.0
Kurtosis:,14.976,Cond. No.,901.0


In [323]:


disclude1 = ['Inlet Securities', 'Bull Market Securities', 'Dakota Securities']

print(temp[temp['broker'].apply(lambda x: x not in disclude1)].query('RD == 0').sort_values(by = 'T'))
data = data_df_dmd[data_df_dmd['Broker'].apply(lambda x: x not in disclude1)].query('Rebate_Dummy == 0')[fit_for]

X = data.iloc[:,1:]
Y = data.iloc[:,0]

sm.OLS(Y, X, missing = 'drop').fit().get_robustcov_results(cov_type='HAC', maxlags = 1).summary()

         Amt       Pct   RD         T                broker
4   3.864427  2.381259  0.0 -3.111253          Edward Jones
12  2.036510  1.261978  0.0 -2.127629                   LPL
10  2.033581  2.440595  0.0 -1.526069  Euro Pacific Capital
11  1.347550  1.219067  0.0 -1.377593      Florida Atlantic
14  1.113834 -1.104975  0.0 -1.220889        Aurora Capital
17  3.737887 -0.422426  0.0 -0.908377       Benjamin Jerold
27  1.761055  0.608416  0.0 -0.788603           BMO Capital
7   1.296589  2.090365  0.0 -0.601838                   AXA
16  0.113368 -0.113799  0.0 -0.391556           Elish Elish
1   0.877660 -0.585992  0.0 -0.161248      Bank of the West
21 -1.391505  1.139528  0.0  0.838902           Fifth Third
0  -0.418701  2.748915  0.0  0.918907   Insigneo Securities


0,1,2,3
Dep. Variable:,MktShare,R-squared:,0.013
Model:,OLS,Adj. R-squared:,0.012
Method:,Least Squares,F-statistic:,5.013
Date:,"Tue, 27 Mar 2018",Prob (F-statistic):,0.00183
Time:,19:12:05,Log-Likelihood:,1124.0
No. Observations:,2162,AIC:,-2242.0
Df Residuals:,2159,BIC:,-2225.0
Df Model:,3,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
PrImp_Pct,0.0601,0.054,1.115,0.265,-0.046,0.166
PrImp_AvgAmt,4.2681,1.199,3.558,0.000,1.916,6.620
PrImp_AvgT,-0.0183,0.009,-2.011,0.044,-0.036,-0.000

0,1,2,3
Omnibus:,266.678,Durbin-Watson:,1.734
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2513.62
Skew:,0.184,Prob(JB):,0.0
Kurtosis:,8.27,Cond. No.,176.0


In [294]:
data_df.query('Broker == "Two Sigma"')

Unnamed: 0,MarketCenter,Quarter,Exchange,OrderCode,CoveredOrders,CoveredShares,CancelledShares,MktCtrExecShares,AwayExecShares,ExecShares_0_9,...,dummy_WellsFargoCDRGOther,dummy_WellsFargoFBCONASDAQ,dummy_WellsFargoFBCONYSE,dummy_WellsFargoFBCOOther,dummy_WellsFargoUBSSNASDAQ,dummy_WellsFargoUBSSNYSE,dummy_WellsFargoUBSSOther,dummy_WellsFargoVRTUNASDAQ,dummy_WellsFargoVRTUNYSE,dummy_WellsFargoVRTUOther
2747,SGMA,2014Q4,NASDAQ,11,404899,81047575,5737,81041838,0,81037995,...,0,0,0,0,0,0,0,0,0,0
2652,SGMA,2012Q4,NASDAQ,11,128172,26582915,3121,26578106,0,26575362,...,0,0,0,0,0,0,0,0,0,0
2788,SGMA,2015Q3,NASDAQ,11,564783,111547710,15985,111531425,0,111493950,...,0,0,0,0,0,0,0,0,0,0
2775,SGMA,2015Q2,NASDAQ,11,522599,103456752,12306,103443961,0,103438867,...,0,0,0,0,0,0,0,0,0,0
2864,SGMA,2016Q2,NYSE,11,1076341,217333446,2491479,214841317,0,214827941,...,0,0,0,0,0,0,0,0,0,0
2805,SGMA,2015Q4,NASDAQ,11,538331,108385083,600971,107783060,0,107773071,...,0,0,0,0,0,0,0,0,0,0
2940,SGMA,2016Q4,NYSE,11,1173502,242045570,2155710,239888589,0,239873155,...,0,0,0,0,0,0,0,0,0,0
2766,SGMA,2015Q1,NYSE,11,953119,192281387,14199,192266950,0,192261104,...,0,0,0,0,0,0,0,0,0,0
2911,SGMA,2016Q3,NYSE,11,1092970,222825506,1815108,221010198,0,220994268,...,0,0,0,0,0,0,0,0,0,0
2979,SGMA,2017Q1,NYSE,11,1235545,253342022,2339397,251001059,0,250998147,...,0,0,0,0,0,0,0,0,0,0
