## Imports

In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import matplotlib.pyplot as plt
import json

## Read stuff ###

In [54]:
data = pd.read_csv("chix.csv", sep = ";")

with open('bankHack.json', 'r') as file:
    data2 = json.load(file)

## Brokers

In [3]:
c = list(pd.unique(data["Contra Broker"].dropna()))
b = list(pd.unique(data["Broker"].dropna()))

for num in c:
    if num not in b:
        b.append(num)

brokers = []
for num in b:
    if num != np.nan and num != 1.0:
        brokers.append(int(num))

## Contra brokers
Count the number of times a broker is a contra broker

Contra brokers have more advantages since they will only buy at a price lower or equal than their limit or sell at a price higher or equal to their limit.

In [38]:
count_contrabro = {}

for bro_num in brokers:
    contrabro = data[data["Contra Broker"] == bro_num]
    count = len(contrabro)
    count_contrabro[bro_num] = count

## Notionals
Notional = number of shares $\times$ price of the share

Higher notional means more money handling

In [26]:
orders = {}
notional = {}
for i in range(len(data2)):
    if data2[i]["Message Type"] != "X":
        notional[data2[i]["Broker"]] = 0
        orders[data2[i]["Order Reference"]] = {}
    if data2[i]["Message Type"] == "E":
        notional[data2[i]["Contra Broker"]] = 0
for i in range(len(data2)):
    if data2[i]['Message Type'] == 'A':
        orders[data2[i]['Order Reference']] = {
            "action" : data2[i]['Buy/Sell Indicator'],
            "Price": data2[i]['Price']
        }
    if data2[i]['Message Type'] == 'E':
        order = data2[i]['Order Reference']
        price = orders[order]["Price"]
        shares = data2[i]["Executed Shares"]

        notional[data2[i]["Broker"]] += price*shares
        notional[data2[i]["Contra Broker"]] += price*shares     

del notional[1]

## Transaction scores
Score for the transactions of the brokers depending on the price variation around the time of transaction

Uses data2, y_*some variable here* because we are combining different codes 

In [55]:
for i in range(len(data2)):
    data2[i]['Time Stamp'] = data2[i]['Time Stamp']['$numberLong'][:-6]

In [56]:
for i in range(len(data2)):
    data2[i]['Time Stamp'] = (int)(data2[i]['Time Stamp'])

In [57]:
y_brokers = [1, 2, 7, 9, 14, 19, 33, 39, 53, 65, 70, 72, 79, 80, 84, 85, 88, 124]
y_scores = {broker:[] for broker in y_brokers}
offers = {}    

In [58]:
#sign = 1 is sell, -1 if buy
window_size = 1e5
def update_score(idx, y_broker,price, sign):
    nb_scores = []
    shares = data2[idx]['Executed Shares']
    trade_time = data2[idx]['Time Stamp']
    i = idx
    while i>=0 and (trade_time-data2[i]['Time Stamp'] < window_size):
        if data2[i]["Message Type"] == 'A':
            nb_scores.append(sign*(price-data2[i]['Price Decimal']))
        i-=1
    i = idx
    while i<len(data2) and (data2[i]['Time Stamp']-trade_time < window_size):
        if data2[i]["Message Type"] == 'A':
            nb_scores.append(sign*(price-data2[i]['Price Decimal']))
        i+=1
    y_scores[y_broker].append(np.mean(nb_scores)*shares)

In [70]:
for i in range(len(data2)):
    if data2[i]["Message Type"] == 'A':
        offer = data2[i]["Order Reference"]
        offers[offer] = {"action" : data2[i]['Buy/Sell Indicator'],"Price": data2[i]['Price Decimal']}
    if data2[i]["Message Type"] == 'E':
        offer = data2[i]["Order Reference"]
        price = offers[offer]["Price"]
        if offers[offer]["action"] == 'S':
            update_score(i, data2[i]['Broker'], price,  1)
            update_score(i, data2[i]['Contra Broker'], price, -1)
        else:
            update_score(i, data2[i]['Broker'], price, -1)
            update_score(i, data2[i]['Contra Broker'], price, 1)

In [72]:
scorePerTransaction = {}
for y_broker in y_scores:
    if len(y_scores[y_broker]) != 0:
        scorePerTransaction[y_broker] = np.mean(y_scores[y_broker])
    
    else:
        scorePerTransaction[y_broker] = 0

del scorePerTransaction[1]

## Number of trades
Number of total executed trades done by a broker

In [30]:
exec = data[data["Message Type"] == "E"]
trade = {}

for bro_num in brokers:
    bro_exec = exec[exec["Broker"] == bro_num]
    count = len(bro_exec)
    trade[bro_num] = count

for key in trade:
    new_value = trade[key] + count_contrabro[key]
    trade[key] = new_value

trade

{80: 806,
 79: 899,
 7: 29,
 2: 168,
 39: 65,
 9: 135,
 33: 11,
 85: 35,
 53: 73,
 72: 1,
 84: 0,
 19: 7,
 70: 1,
 14: 4,
 88: 1,
 124: 2,
 65: 5}

## Volume
Total number of executed shares (buy and sell)

In [31]:
volume = {}

for bro_num in brokers:
    bro_exec1 = exec[exec["Broker"] == bro_num]
    bro_exec2 = exec[exec["Contra Broker"] == bro_num]
    total = bro_exec1["Executed Shares"].sum() + bro_exec2["Executed Shares"].sum()
    volume[bro_num] = total

volume

{80: 1945900.0,
 79: 2140000.0,
 7: 32700.0,
 2: 716000.0,
 39: 298200.0,
 9: 491600.0,
 33: 11700.0,
 85: 13400.0,
 53: 47800.0,
 72: 4700.0,
 84: 0.0,
 19: 9700.0,
 70: 500.0,
 14: 18400.0,
 88: 500.0,
 124: 200.0,
 65: 1000.0}

## Ratio
Ratio of notional for trade

Higher ratio means the broker handled more money in less trades, so they are more performant

In [32]:
ratio = {}

for key in notional:
    try:
        r = notional[key]/trade[key]
        ratio[key] = r
    
    except ZeroDivisionError:
        ratio[key] = 0

ratio

{80: 601706042.1836228,
 79: 593176707.4527253,
 7: 280553448.2758621,
 2: 1064189761.9047619,
 39: 1146125692.3076923,
 9: 904398444.4444444,
 19: 346825714.28571427,
 70: 126100000.0,
 14: 1159200000.0,
 33: 264648181.8181818,
 53: 162911506.84931508,
 88: 125350000.0,
 124: 24965000.0,
 85: 95204857.14285715,
 65: 49678000.0,
 72: 1170300000.0,
 84: 0}

## Overall score calculation
Normalize the scores for each criteria and add them up

Use the z score

In [69]:
criterias = [count_contrabro, notional, scorePerTransaction, trade, volume, ratio]

In [35]:
size = len(brokers)
initial_scores = [0.0] * size
scores = dict(zip(brokers, initial_scores))

In [36]:
for crit in criterias:
    crit_val = list(crit.values())
    mean = np.mean(crit_val)
    std = np.std(crit_val)

    for bk in crit:
        z = (crit[bk] - mean) * (size ** 0.5)/std
        scores[bk] += z

scores

{80: 46.99245772955349,
 79: 38.381089453568244,
 7: -9.107932030046442,
 2: 13.64924496448306,
 39: 6.82470530595992,
 9: 4.231285734507006,
 33: -7.772153009150365,
 85: -10.490762641322819,
 53: -8.6442482463384,
 72: 8.170158073982325,
 84: -12.38947759728503,
 19: -8.023212416774786,
 70: -12.00156676797289,
 14: -14.508505507178445,
 88: -11.028058172639556,
 124: -12.110557376258118,
 65: -12.1724674970872}