In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report,log_loss
from mlxtend.plotting import plot_decision_regions
import sys, math, os, json, re, random
import scipy.stats as stats
import seaborn as sns
import datetime as dt

from reticulum import AdaptiveBayesianReticulum

# jupyter magic to display plots directly in the notebook
%matplotlib inline

# use vector graphics format for nicer plots
%config Inline.Backend.figure_format = 'svg'

%config Completer.use_jedi = False

%load_ext autoreload
%autoreload 2

In [61]:
training = pd.read_excel('data/RfqData.xlsx',sheet_name = 'Training RFQs')
test = pd.read_excel('data/RfqData.xlsx',sheet_name = 'OOS RFQs')
competition = pd.read_excel('data/RfqData.xlsx',sheet_name = 'Competition RFQs')

In [3]:
target_mapping = {'MISSED': 0, 'DONE': 1}
side_mapping = {'Offer': -1, 'Bid': 1}

## Simple First

In [42]:
def simple_features(df):
    df['delta_from_mid'] = abs(df['MidPrice'] - df['QuotedPrice'])
    df['Traded'] = df['Traded'].map(target_mapping)
    df['Side'] = df['Side'].map(side_mapping)
    df['PnL']=((df['NextMidPrice']-df['QuotedPrice']>0).astype(float)*2-1)*df['Side']
    df = df[['delta_from_mid','Notional','Traded','PnL']]
    return df

In [62]:
simple_train = simple_features(training)
simple_test = simple_features(test)
simple_train.head()

Unnamed: 0,delta_from_mid,Notional,Traded,PnL
0,0.24,10000000,0,1.0
1,0.01,1000,1,1.0
2,0.1,1000,0,-1.0
3,0.02,20000,1,-1.0
4,0.04,1000,1,-1.0


#### First Model - Predict Traded

In [63]:
model_s = AdaptiveBayesianReticulum(
    prior=(1, 1),
    pruning_factor=1.01,
    n_iter=100,
    learning_rate_init=0.1,
    n_gradient_descent_steps=1,
    initial_relative_stiffness=20)

In [64]:
# extract input and target
X_train_s = simple_train[['delta_from_mid','Notional']].values
y_train_s = simple_train[['Traded']].values.ravel()
X_test_s = simple_test[['delta_from_mid','Notional']].values
y_test_s = simple_test[['Traded']].values.ravel()

In [65]:
model_s.fit(X_train_s, y_train_s, verbose=False)

#### Second Model - Predict + and - PnL

In [66]:
model_s2 = AdaptiveBayesianReticulum(
    prior=(1, 1),
    pruning_factor=1.01,
    n_iter=100,
    learning_rate_init=0.1,
    n_gradient_descent_steps=1,
    initial_relative_stiffness=20)

In [67]:
# extract input and target
y_train_s2 = simple_train[['PnL']].values.ravel()
y_test_s2 = simple_test[['PnL']].values.ravel()

In [68]:
model_s2.fit(X_train_s, y_train_s2, verbose=False)

#### Get Probability for each test set

In [69]:
column = simple_train.columns
column

Index(['delta_from_mid', 'Notional', 'Traded', 'PnL'], dtype='object')

In [56]:
def max_pnl_price(delta_range,trade,cols,model1,model2):
    delta_range =delta_range
    sample = pd.DataFrame()
    sample['delta_from_mid'] = delta_range[::-1]
    cols = cols.drop('delta_from_mid')
    sample[cols] = trade.drop('delta_from_mid')
    
    sample['P_trade'],sample['P_pospnl']=model1.predict_proba(sample.drop(columns=['Traded','PnL']).values)[:, 1],model2.predict_proba(sample.drop(columns=['Traded','PnL']).values)[:, 1]
    
    # 0 for missing trade, 1 for trading and positive pnl, -1 for trading and negative pnl
    sample['Exp_PnL'] = (sample['P_trade']*sample['P_pospnl'])-(sample['P_trade']*(1-sample['P_pospnl']))
    pnl_argmax = np.argmax(sample['Exp_PnL'])
    return sample, sample.iloc[pnl_argmax]['delta_from_mid']

In [70]:
sample,delta = max_pnl_price(np.arange(0,1.5,0.01),simple_test.iloc[0],column,model_s,model_s2)
sample

Unnamed: 0,delta_from_mid,Notional,Traded,PnL,P_trade,P_pospnl,Exp_PnL
0,1.49,320000.0,0.0,1.0,0.080131,0.873240,0.059816
1,1.48,320000.0,0.0,1.0,0.080131,0.873301,0.059826
2,1.47,320000.0,0.0,1.0,0.080131,0.873365,0.059836
3,1.46,320000.0,0.0,1.0,0.080131,0.873432,0.059847
4,1.45,320000.0,0.0,1.0,0.080131,0.873503,0.059858
...,...,...,...,...,...,...,...
145,0.04,320000.0,0.0,1.0,0.612385,0.587360,0.106996
146,0.03,320000.0,0.0,1.0,0.679221,0.530338,0.041213
147,0.02,320000.0,0.0,1.0,0.878341,0.495101,-0.008606
148,0.01,320000.0,0.0,1.0,0.891791,0.413775,-0.153789


In [58]:
delta

0.06

In [71]:
suggested_delta_simple = []
for idx,row in simple_test.iterrows():
    sample,delta = max_pnl_price(np.arange(0,1.5,0.01),row,column,model_s,model_s2)
    suggested_delta_simple.append(delta)

#### Check PnL on test set

In [None]:
target_mapping = {'MISSED': 0, 'DONE': 1}
side_mapping = {'Offer': -1, 'Bid': 1}

In [80]:
test.head(1)

Unnamed: 0,Time,Bond,Side,Notional,Counterparty,MidPrice,QuotedPrice,Competitors,Traded,NextMidPrice,delta_from_mid,PnL
0,30000,Bond_2,1,320000,Ctpy_3,108.2,107.89,2,0,108.32,0.31,1.0


In [88]:
test_perf = pd.DataFrame()
test_perf[['Traded','Side','QuotedPrice','NextMidPrice','PnL']]=test[['Traded','Side','QuotedPrice','NextMidPrice','PnL']]
test_perf['delta']=suggested_delta_simple
test_perf['My_Price']=test['MidPrice']-(test_perf['delta']*test_perf['Side'])

In [89]:
test_perf.head(3)

Unnamed: 0,Traded,Side,QuotedPrice,NextMidPrice,PnL,delta,My_Price
0,0,1,107.89,108.32,1.0,0.06,108.14
1,1,1,84.76,84.74,-1.0,0.04,84.75
2,1,1,84.73,84.61,-1.0,0.04,84.7


In [93]:
performance = 0
unknown_idx = []
for idx,row in test_perf.iterrows():
    if (row['Traded']==1) & (row['Side']==1) & (row['My_Price']<row['QuotedPrice']):
        if row['My_Price'] < row['NextMidPrice']:
            performance+=1
        else:
            performance -=1
    elif (row['Traded']==1) & (row['Side']==-1) & (row['My_Price']>row['QuotedPrice']):
        if row['My_Price'] > row['NextMidPrice']:
            performance+=1
        else:
            performance -=1
    elif (row['Traded']==0) & (row['Side']==1) & (row['My_Price']<row['QuotedPrice']):
        unknown_idx.append(idx)
    elif (row['Traded']==0) & (row['Side']==-1) & (row['My_Price']>row['QuotedPrice']):
        unknown_idx.append(idx)
performance

73

In [94]:
test_perf['PnL'].sum()

290.0

## All Features

In [5]:
def add_features(df):
    df['delta_from_mid'] = abs(df['MidPrice'] - df['QuotedPrice'])
    df['Traded'] = df['Traded'].map(target_mapping)
    df['Side'] = df['Side'].map(side_mapping)
    cols = ['Counterparty']    
    one_hot_encode = pd.get_dummies(df[cols])
    df = df.join(one_hot_encode)
    df['Notional_cuts'] = pd.cut(df['Notional'], [0, 100000, 1000000, 10000000, 50000000], labels=[0, 1, 2, 3], include_lowest=True, right=True)
    df['PnL']=((df['NextMidPrice']-df['QuotedPrice']>0).astype(float)*2-1)*df['Side']
    cols_to_drop = ['Time','Bond','Side','Counterparty','NextMidPrice','Notional','MidPrice','QuotedPrice']
    return df.drop(columns=cols_to_drop)

In [6]:
train_all = add_features(training)
test_all = add_features(test)
train_all.head()

Unnamed: 0,Competitors,Traded,delta_from_mid,Counterparty_Ctpy_0,Counterparty_Ctpy_1,Counterparty_Ctpy_2,Counterparty_Ctpy_3,Notional_cuts,PnL
0,1,0,0.24,1,0,0,0,2,1.0
1,1,1,0.01,0,1,0,0,0,1.0
2,1,0,0.1,0,1,0,0,0,-1.0
3,4,1,0.02,1,0,0,0,0,-1.0
4,2,1,0.04,0,0,0,1,0,-1.0


#### First Model - Predict Traded

In [10]:
# extract input and target
X_train = train_all.drop(['PnL','Traded'],axis=1).values
y_train = train_all[['Traded']].values.ravel()
X_test = test_all.drop(['PnL','Traded'],axis=1).values
y_test = test_all[['Traded']].values.ravel()

In [11]:
# train model
model = AdaptiveBayesianReticulum(
    prior=(1, 1),
    pruning_factor=1.01,
    n_iter=100,
    learning_rate_init=0.1,
    n_gradient_descent_steps=1,
    initial_relative_stiffness=20)

t0 = dt.datetime.utcnow()
model.fit(X_train, y_train, verbose=False)
t1 = dt.datetime.utcnow()

#### Second Model - Predict + and - PnL

In [14]:
# extract input and target
X_train_p = train_all.drop(['PnL','Traded'],axis=1).values
y_train_p = train_all[['PnL']].values.ravel()
X_test_p = test_all.drop(['PnL','Traded'],axis=1).values
y_test_p = test_all[['PnL']].values.ravel()

In [15]:
# train model
model_p = AdaptiveBayesianReticulum(
    prior=(1, 1),
    pruning_factor=1.01,
    n_iter=100,
    learning_rate_init=0.1,
    n_gradient_descent_steps=1,
    initial_relative_stiffness=20)

t0 = dt.datetime.utcnow()
model_p.fit(X_train_p, y_train_p, verbose=False)
t1 = dt.datetime.utcnow()

#### Get Probability for each test set

In [21]:
columns = test_all.columns
columns

Index(['Competitors', 'Traded', 'delta_from_mid', 'Counterparty_Ctpy_0',
       'Counterparty_Ctpy_1', 'Counterparty_Ctpy_2', 'Counterparty_Ctpy_3',
       'Notional_cuts', 'PnL'],
      dtype='object')

In [36]:
sample,delta = max_pnl_price(np.arange(0,1.5,0.01),test_all.iloc[0],columns,model,model_p)
sample

Unnamed: 0,delta_from_mid,Competitors,Traded,Counterparty_Ctpy_0,Counterparty_Ctpy_1,Counterparty_Ctpy_2,Counterparty_Ctpy_3,Notional_cuts,PnL,P_trade,P_pospnl,Exp_PnL
0,1.49,2.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.02997,0.929963,0.025772
1,1.48,2.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.02997,0.929963,0.025772
2,1.47,2.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.02997,0.929963,0.025772
3,1.46,2.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.02997,0.929963,0.025772
4,1.45,2.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.02997,0.929963,0.025772
...,...,...,...,...,...,...,...,...,...,...,...,...
145,0.04,2.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.02997,0.929963,0.025772
146,0.03,2.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.02997,0.929963,0.025772
147,0.02,2.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.02997,0.929963,0.025772
148,0.01,2.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.02997,0.929963,0.025772


In [39]:
suggested_delta = []
for idx,row in test_all.iterrows():
    sample,delta = max_pnl_price(np.arange(0,1.5,0.01),row,columns,model,model_p)
    suggested_delta.append(delta)