In [47]:
import pandas as pd
interval=5
def get_target_value(df, interval=interval):
    if df['window_length_shift'] >= interval:
        if df['arbitrage_opportunity_shift'] == 'arbitrage_exchange_1_to_exchange_2':
            return 'arbitrage_exchange_1_to_exchange_2'
        elif df['arbitrage_opportunity_shift'] == 'arbitrage_exchange_2_to_exchange_1':
            return 'arbitrage_exchange_2_to_exchange_1'
        elif df['arbitrage_opportunity_shift'] == 'no_arbitrage':
            return 'no_arbitrage'
    else:
        return 'no_arbitrage'
    
def get_target(df, interval=interval):
    
    rows_to_shift = int(-1*(interval/5))
    
    df['arbitrage_opportunity_shift'] = df['arbitrage_opportunity'].shift(rows_to_shift)
    df['window_length_shift'] = df['window_length'].shift(rows_to_shift)
    
    df['target'] = df.apply(get_target_value, axis=1)
    
    df = df.drop(columns=['window_length_shift', 'arbitrage_opportunity_shift'])
    
    return df
df = pd.read_csv('arbitrage_data/bitfinex_coinbase_pro_etc_usd.csv', index_col=0)
df = get_target(df)

In [48]:
start_of_oct_2018 = 1538352000
start_of_jan_2019 = 1546300800

target = ['target']

train = df[df['closing_time'] < start_of_oct_2018]
test = df[df['closing_time'] > start_of_jan_2019]

X_train = train.drop(columns=target).reset_index(drop=True)
y_train = train[target].reset_index(drop=True)

X_test = test.drop(columns=target).reset_index(drop=True)
y_test = test[target].reset_index(drop=True)

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((15327, 87), (15327, 1), (86989, 87), (86989, 1))

In [49]:
X_train = X_train.drop(columns=['arbitrage_opportunity'])
X_test = X_test.drop(columns=['arbitrage_opportunity'])

In [50]:
#MODEL

# from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# model = XGBClassifier()
model = RandomForestClassifier(max_depth=50, n_estimators=100, n_jobs=-1)
model.fit(X_train, y_train)

pred_train = model.predict(X_train)
pred_test = model.predict(X_test)

train_accuracy = accuracy_score(y_train, pred_train)
test_accuracy = accuracy_score(y_test, pred_test)

print("Train Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)

  if __name__ == '__main__':


Train Accuracy: 1.0
Test Accuracy: 0.5585188931933923


In [51]:
def binary_pred_exchange_1(df):

  if df['pred_test'] == 'arbitrage_exchange_1_to_exchange_2':
    return 1
  else:
    return 0

def binary_pred_exchange_2(df):

  if df['pred_test'] == 'arbitrage_exchange_2_to_exchange_1':
    return 1
  else:
    return 0

In [17]:
def trade_pct(X_test, pred_test):

  rows_to_shift = int(-1*(interval/5))
  df_pred = X_test
  df_pred['pred_test'] = pred_test

  df_pred['binary_pred_exchange_1'] = df_pred.apply(binary_pred_exchange_1, axis=1)

  df_pred['binary_pred_exchange_2'] = df_pred.apply(binary_pred_exchange_2, axis=1)

  df_pred['pct_diff_interval_exchange_1_to_2'] = (df_pred['close_exchange_2'].shift(rows_to_shift) - df_pred['close_exchange_1'])/ df_pred['close_exchange_1'] 

  df_pred['pct_diff_interval_exchange_2_to_1'] = (df_pred['close_exchange_1'].shift(rows_to_shift) - df_pred['close_exchange_2'])/ df_pred['close_exchange_2'] 

  print(df_pred['pred_test'].value_counts())

  print (((df_pred['binary_pred_exchange_1'] * df_pred['pct_diff_interval_exchange_1_to_2']) + (df_pred['binary_pred_exchange_2'] * df_pred['pct_diff_interval_exchange_2_to_1'])).sum()) 

  return df_pred

In [19]:
# df_pred = trade_pct(X_test, pred_test)

no_arbitrage    87000
Name: pred_test, dtype: int64
0.0


In [52]:
def trade_pct_with_fees(X_test, pred_test):
  
  fee_rate_exchange_1 = 0.275
  fee_rate_exchange_2 = 0.275

  rows_to_shift = int(-1*(interval/5))
  df_pred = X_test
  df_pred['pred_test'] = pred_test

  df_pred['binary_pred_exchange_1'] = df_pred.apply(binary_pred_exchange_1, axis=1)

  df_pred['binary_pred_exchange_2'] = df_pred.apply(binary_pred_exchange_2, axis=1)

  df_pred['pct_diff_interval_exchange_1_to_2'] = (df_pred['close_exchange_2'].shift(rows_to_shift) - df_pred['close_exchange_1'])/ df_pred['close_exchange_1'] 

  df_pred['pct_diff_interval_exchange_2_to_1'] = (df_pred['close_exchange_1'].shift(rows_to_shift) - df_pred['close_exchange_2'])/ df_pred['close_exchange_2'] 

  trades_exchange_1_to_exchange_2 = (df_pred.pred_test == 'arbitrage_exchange_1_to_exchange_2').sum()
    
  trades_exchange_2_to_exchange_1 = (df_pred.pred_test == 'arbitrage_exchange_2_to_exchange_1').sum()

  fees_exchange_1 = trades_exchange_1_to_exchange_2 * fee_rate_exchange_1/100
    
  fees_exchange_2 = trades_exchange_2_to_exchange_1 * fee_rate_exchange_2/100

  total_fees = (fees_exchange_1 + (trades_exchange_1_to_exchange_2 * fee_rate_exchange_2/100) + fees_exchange_2 + (trades_exchange_2_to_exchange_1 * fee_rate_exchange_1/100))

  print(df_pred['pred_test'].value_counts())

  print (((df_pred['binary_pred_exchange_1'] * df_pred['pct_diff_interval_exchange_1_to_2']) + (df_pred['binary_pred_exchange_2'] * df_pred['pct_diff_interval_exchange_2_to_1'])).sum() - total_fees)

  return df_pred

In [53]:
df_pred_with_fees = trade_pct_with_fees(X_test, pred_test)

no_arbitrage                          86982
arbitrage_exchange_2_to_exchange_1        7
Name: pred_test, dtype: int64
-0.06164280949255871
