In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

# Functions

# Naive Broker Analysis

In [2]:
panel_data = pd.read_csv('../data/processed/regression_data_levels.csv')
display(panel_data.head())

Unnamed: 0,MarketCenter,Quarter,Exchange,OrderCode,CoveredOrders,CoveredShares,CancelledShares,MktCtrExecShares,AwayExecShares,ExecShares_0_9,...,Rel_PrImp_AvgT,MktCtrAvg_PrImp_ExpAmt,Rel_PrImp_ExpAmt,MktCtrAvg_All_AvgT,Rel_All_AvgT,Broker,MktShare,Rebate,Rebate_Dummy,Broker_Size
0,ARCA,2015Q3,NASDAQ,11,167917,72636881,10384868,40788074,21463939,62072012,...,0.636061,0.010542,0.009003,0.474635,0.196964,Barclays Capital,0.0,1.0,1,23958270000.0
1,ARCA,2015Q3,NASDAQ,11,167917,72636881,10384868,40788074,21463939,62072012,...,0.636061,0.010542,0.009003,0.474635,0.196964,Cowen Execution,0.0,0.0,0,50187100.0
2,ARCA,2015Q3,NASDAQ,11,167917,72636881,10384868,40788074,21463939,62072012,...,0.636061,0.010542,0.009003,0.474635,0.196964,Credit Suisse,0.0,1.0,1,32667210000.0
3,ARCA,2015Q3,NASDAQ,11,167917,72636881,10384868,40788074,21463939,62072012,...,0.636061,0.010542,0.009003,0.474635,0.196964,Deutsche,0.0151,1.0,1,20450090000.0
4,ARCA,2015Q3,NASDAQ,12,11963921,3132598691,428908786,1832975495,73922935,1906190605,...,-0.372792,0.00279,-0.001072,21.679366,-21.523319,Barclays Capital,0.1375,1.0,1,23958270000.0


In [3]:
# add market center weighted cols
index_cols     = ['Broker', 'MarketCenter', 'Quarter', 'Exchange', 'OrderType']
exec_qual_cols = ['PrImp_Pct', 'PrImp_AvgAmt', 'PrImp_ExpAmt', 'PrImp_AvgT', 'All_AvgT']

for eq_col in exec_qual_cols:
    # market share weighted amounts
    panel_data[(eq_col + '_MktShWgtd')] = panel_data[eq_col] * panel_data['MktShare']
    
exec_qual_wgtd_cols = [x + '_MktShWgtd' for x in exec_qual_cols]

In [4]:
# get broker averages
cols_temp = ['Broker', 'Quarter', 'Exchange', 'OrderType'] + (exec_qual_wgtd_cols)
panel_data_sum = panel_data.groupby(['Broker', 'Quarter', 'Exchange', 'OrderType']).sum().reset_index()

# fix weighted averages due to non 100% mkt share (missing data)
for eq_col in exec_qual_wgtd_cols:
    panel_data_sum[eq_col] = panel_data_sum[eq_col]/panel_data_sum['MktShare']

# Coefficients

In [84]:
import statsmodels.formula.api as smf
import warnings
warnings.filterwarnings('ignore')

In [86]:
data_dmd = pd.read_csv('../data/processed/regression_data_levels_demeaned.csv').query('OrderType == "Market"')
display(data_dmd.head())

Unnamed: 0,ATQShares,ATQ_AvgT,ATQ_Pct,ATQ_TotalT,All_AvgT,AvgEffecSpread,AvgEffecSpread_T,AvgRealSpread,AvgRealSpread_T,AwayExecShares,...,PrImp_Pct,PrImp_TotalAmt,PrImp_TotalT,Quarter,Rebate,Rebate_Dummy,Rel_All_AvgT,Rel_PrImp_AvgT,Rel_PrImp_ExpAmt,Rel_PrImp_Pct
0,-2254117.0,0.003149,-0.105713,20470.911111,0.57356,-0.010679,-266743.860889,-0.016655,-933359.5,672333.6,...,0.151548,542140.617311,25617320.0,2015Q3,0.0,1,0.303729,0.595502,0.006215,0.084099
1,-1923241.0,0.004073,-0.131532,74723.957143,0.55848,-0.014135,-288266.242343,-0.013174,-457344.8,1619192.0,...,0.195151,671055.459443,24760990.0,2015Q3,0.0,0,0.287919,0.552198,0.007323,0.105949
2,-3964498.0,0.003016,-0.050819,48958.6,0.539658,-0.000578,-129163.972183,-0.017645,-1325726.0,-1601207.0,...,0.055061,278451.504883,24184470.0,2015Q3,0.0,1,0.304427,0.615859,0.00343,0.020022
3,-2254117.0,0.003149,-0.105713,20470.911111,0.57356,-0.010679,-266743.860889,-0.016655,-933359.5,672333.6,...,0.151548,542140.617311,25617320.0,2015Q3,0.0,1,0.303729,0.595502,0.006215,0.084099
8,-3566037.0,-0.009826,-0.101571,-503348.988889,-0.106853,-0.011942,-664488.365378,-0.020707,-1609578.0,1604422.0,...,0.162055,274204.7396,-610157.2,2015Q3,0.0,1,-0.319162,-0.191245,0.135201,0.158075


In [183]:
cols = ['Broker', 'PrImp_ExpAmt_coef','PrImp_AvgT_coef', 'PrImp_ExpAmt_var','PrImp_AvgT_var']
reg_results = pd.DataFrame(columns = cols)


for broker in set(list(data_dmd['Broker'])):
    
    result = smf.ols('MktShare ~ PrImp_ExpAmt + PrImp_AvgT', data = data_dmd.query('Broker == "' + broker + '"').dropna()).fit()
    
    # get non-intercept coefficients 
    row_data = [broker] + list(result.params[1::]) + list(np.diag(np.matrix(result.cov_params()))[1::])
    
    # check if results are nonsensical (issue with data)
    regression_error = np.sum(np.matrix(result.cov_params())) < 1e-05
    if not regression_error:
        # update dataframe
        reg_results = reg_results.append({cols[i]: row_data[i] for i in range(0,5)}, ignore_index = True)
    
reg_results['PrImp_ExpAmt_tstat'] = reg_results['PrImp_ExpAmt_coef'] / np.sqrt(reg_results['PrImp_ExpAmt_var'])
reg_results['PrImp_AvgT_tstat']   = reg_results['PrImp_AvgT_coef']   / np.sqrt(reg_results['PrImp_AvgT_var'])
reg_results['Quality_Index']      = reg_results['PrImp_ExpAmt_tstat'] + -1*reg_results['PrImp_AvgT_tstat']

# Excel Export

In [37]:
from openpyxl import Workbook, load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows

## Naive Analysis

In [74]:
# Params

workbook_file_loc = '../exhibits/welfare/broker_ratings_naive.xlsx'
quarters_list = [x + 'Q' + str(y) for x in ['2017', '2016'] for y in range(4, 0, -1)]
exchange_list = ['NASDAQ', 'NYSE']

In [75]:
wb = Workbook()

for quarter in quarters_list:
    for exchange in exchange_list:

        temp_data = panel_data_sum[cols_temp].query('Quarter == "' + quarter + '" & OrderType == "Market" & Exchange == "' + exchange + '"').dropna()
        temp_data['Rank_1'] = temp_data['PrImp_ExpAmt_MktShWgtd'].rank()

        ws = wb.create_sheet(title = quarter + '_' + exchange)

        for r in dataframe_to_rows(temp_data, index=False, header=True):

            ws.append(r)

del wb['Sheet']
wb.save(workbook_file_loc)

## Coefficient Analysis

In [180]:
# Params

workbook_file_loc = '../exhibits/welfare/broker_ratings_coefficients.xlsx'

In [184]:
wb = Workbook()

# Summary
ws = wb.create_she+et(title = 'Summary')

ws['A1'] = '''The coefficients in the results sheet are obtained by running a Fixed Effects (demeaned) regression on each broker'''
ws['A2'] = '''The program used to generate the results is in /notebooks/Welfare.ipynb'''

# Add results
ws = wb.create_sheet(title = 'Results')

for r in dataframe_to_rows(reg_results, index=False, header=True):

     ws.append(r)

del wb['Sheet']
wb.save(workbook_file_loc)

In [182]:
len(reg_results.columns)

7