In [1]:
import os

# the three exchanges we are using...
exchanges = ['bitfinex', 'coinbase_pro', 'hitbtc']

# function to get pairs of ohlcv csvs from which to create arbitrage data
def get_file_pairs(exchanges):
    # empty list to fill with filenames of all ohlcv csvs
    filenames = []
    # i.e., for subdirectory in ohlcv_data directory
    for directory in os.listdir('ohlcv_data'):
        # .DS_Store files can mess things up, since they aren't directories
        if directory != '.DS_Store':
            # for each of the files in the subdirectory...
            for filename in os.listdir('ohlcv_data/' + directory):
                # if the file is a csv...
                if filename.endswith('300.csv'):
                    # add the filename to the list of filenames
                    filenames.append(filename)
    # empty list to fill with pairs of csvs from which to make arbitrage data
    file_pairs = []
    # filename_1, because we will want to compare each filename to another
    for filename_1 in filenames:
        # these are all the filenames we haven't looped through yet
        remaining_filenames = filenames[filenames.index(filename_1)+1:]
        # for each of those filenames we haven't looped through yet...
        for filename_2 in remaining_filenames:
            # exchanges is a list taken as an argument by this function
            for exchange in exchanges:
                # drop the exchange from the first filename, see if the
                # remaining string is contained in the second filename
                if filename_1.replace(exchange, '') in filename_2:
                    # if so, add the pair of filenames to the list of pairs
                    file_pairs.append([filename_1, filename_2])
    # return the list of pairs
    return file_pairs

# getting the list of ohlcv csvs from which to create arbitrage data
get_file_pairs(exchanges)

[['bitfinex_eos_usdt_300.csv', 'hitbtc_eos_usdt_300.csv'],
 ['bitfinex_bch_btc_300.csv', 'coinbase_pro_bch_btc_300.csv'],
 ['bitfinex_bch_btc_300.csv', 'hitbtc_bch_btc_300.csv'],
 ['bitfinex_etc_usd_300.csv', 'coinbase_pro_etc_usd_300.csv'],
 ['bitfinex_btc_usd_300.csv', 'coinbase_pro_btc_usd_300.csv'],
 ['bitfinex_ltc_btc_300.csv', 'coinbase_pro_ltc_btc_300.csv'],
 ['bitfinex_ltc_btc_300.csv', 'hitbtc_ltc_btc_300.csv'],
 ['bitfinex_dash_usd_300.csv', 'coinbase_pro_dash_usd_300.csv'],
 ['bitfinex_dash_btc_300.csv', 'coinbase_pro_dash_btc_300.csv'],
 ['bitfinex_dash_btc_300.csv', 'hitbtc_dash_btc_300.csv'],
 ['bitfinex_ltc_usd_300.csv', 'coinbase_pro_ltc_usd_300.csv'],
 ['bitfinex_bch_usdt_300.csv', 'hitbtc_bch_usdt_300.csv'],
 ['bitfinex_bch_usd_300.csv', 'coinbase_pro_bch_usd_300.csv'],
 ['bitfinex_eos_usd_300.csv', 'coinbase_pro_eos_usd_300.csv'],
 ['bitfinex_xrp_usd_300.csv', 'coinbase_pro_xrp_usd_300.csv'],
 ['bitfinex_eth_btc_300.csv', 'coinbase_pro_eth_btc_300.csv'],
 ['bitfinex_

In [2]:
import pandas as pd

# simple function to turn a csv into a dataframe
def get_df(filename):
    # index_col=0 because csv still has index
    df = pd.read_csv(filename, index_col=0)
    # returning the dataframe
    return df

In [3]:
# this function resamples ohlcv csvs for a specified candle interval; while 
# this can be used to change the candle interval for the data, it can also be
# used to fill in gaps in the ohlcv data without changing the candle interval
def resample_ohlcv(df, period='5T'):
    # set the date as the index; this is needed for the function to run
    df = df.set_index(['date'])
    # dictionary specifying which columns to use for resampling
    ohlc_dict = {                                                                                                             
    'open':'first',                                                                                                    
    'high':'max',                                                                                                       
    'low':'min',                                                                                                        
    'close': 'last',                                                                                                    
    'base_volume': 'sum'
    }
    # overwriting the df taken as input with a resampled df
    df = df.resample(period, how=ohlc_dict, closed='left', label='left')
    # returning the resampled df
    return df

In [4]:
from ta import add_all_ta_features

# function to handle nans in the data introduced by resampling
def fill_nan(df):
    # forward filling the closing price where there were gaps in ohlcv csv
    df['close'] = df['close'].ffill()
    # backfilling the rest of the nans
    df = df.bfill(axis=1)
    # returning the revised dataframe
    return df

# function to engineer features that can be engineered pre-merge...
def engineer_features(df):
    
    # turn the closing_time, which is in Unix time, to datetime...
    df['date'] = pd.to_datetime(df['closing_time'], unit='s')
    # ...which is needed for resampling; resampling fills gaps in data
    df = resample_ohlcv(df)
    # resetting the index
    df = df.reset_index()
    
    # now that df has been resampled, converting back to Unix time...
    # dividing by 1e9 to get seconds, not nanoseconds
    df['date'] = df['date'].astype('int64')//1e9
    # also changing name back to closing_time, to be more precise
    df = df.rename(columns={'date': 'closing_time'})
    
    # adding feature to indicate where rows are just filling gaps in data...
    df['nan_ohlcv'] = df['close'].apply(lambda x: 1 if pd.isnull(x) else 0)
    # now filling in the nan values in those gap-filling rows...
    df = fill_nan(df)
    
    # adding all the technical analysis features...
    df = add_all_ta_features(df, 'open', 'high', 'low', 'close',
                             'base_volume', fillna=True)
    
    # technical analysis library converts some ints to floats; changing back
    df['closing_time'] = df['closing_time'].astype('int64')
    df['nan_ohlcv'] = df['nan_ohlcv'].astype('int64')
    
    # dropping features that are highly correlated with other features
    df = df.drop(columns=['open', 'high', 'low', 'momentum_kama',
                          'momentum_stoch', 'others_cr', 'others_dlr',
                          'trend_ema_fast', 'trend_ema_slow', 
                          'trend_ichimoku_a', 'trend_ichimoku_b', 'trend_kst',
                          'trend_macd', 'trend_visual_ichimoku_a',
                          'trend_visual_ichimoku_b', 'volatility_bbh',
                          'volatility_bbl', 'volatility_bbm',
                          'volatility_dch', 'volatility_dcl',
                          'volatility_kcc', 'volatility_kch',
                          'volatility_kcl'])
    
    # returning resulting dataframe
    return df

In [5]:
# the following functions are used in engineering features post-merge...

# function to create column showing which exchange has a higher closing price
def get_higher_closing_price(df):
    # i.e., if exchange 1 has the higher closing price...
    if (df['close_exchange_1'] - df['close_exchange_2']) > 0:
        # return exchange 1
        return 'exchange_1'
    # otherwise, if exchange 2 has the higher closing price...
    elif (df['close_exchange_1'] - df['close_exchange_2']) < 0:
        # return exchange 2
        return 'exchange_2'
    # otherwise, i.e., if neither has a higher closing price...
    else:
        # return equivalent
        return 'equivalent'

# function to create column showing percentage by which higher price is higher
def get_pct_higher(df):
    # i.e., if exchange 1 has a higher closing price than exchange 2...
    if df['higher_closing_price'] == 'exchange_1':
        # return the percentage by which the exchange 1 closing price is 
        # greater than the exchange 2 closing price
        return ((df['close_exchange_1'] / 
                 df['close_exchange_2'])-1)*100
    # otherwise, if exchange 2 has a higher closing price than exchange 1...
    elif df['higher_closing_price'] == 'exchange_2':
        # return the percentage by which the exchange 2 closing price is
        # greater than the exchange 1 closing price
        return ((df['close_exchange_2'] / 
                 df['close_exchange_1'])-1)*100
    # otherwise, i.e., if the closing prices are equivalent...
    else:
        # return zero
        return 0

# function to create column showing available arbitrage opportunities
def get_arbitrage_opportunity(df):
    # assuming the total fees are 0.55%, if the higher closing price is less
    # than 0.55% higher than the lower closing price...
    if df['pct_higher'] < .55:
        # return 0, for no arbitrage
        return 0
    # otherwise, if the exchange 1 closing price is more than 0.55% higher
    # than the exchange 2 closing price...
    elif df['higher_closing_price'] == 'exchange_1':
        # return -1, for arbitrage from exchange 2 to exchange 1
        return -1
    # otherwise, if the exchange 2 closing price is more than 0.55% higher
    # than the exchange 1 closing price...
    elif df['higher_closing_price'] == 'exchange_2':
        # return 1, for arbitrage from exchange 1 to exchange 2
        return 1
    
# function to create column showing how long arbitrage opportunity has lasted
def get_window_length(df):
    # converting arbitrage_opportunity column to a list...
    target_list = df['arbitrage_opportunity'].to_list()
    # setting initial window length to 5, for 5 minutes; will be updated...
    window_length = 5
    # creating empty list to fill with values and ultimately convert to column
    window_lengths = []
    # for i in the range of the length of the arbitrage_opportunity column...
    for i in range(len(target_list)):
        # if a value in the arbitrage_opportunity column is equal to the
        # previous value in the arbitrage_opportunity column...
        if target_list[i] == target_list[i-1]:
            # increase the window length by five minutes...
            window_length += 5
            # and append that window length to the list.
            window_lengths.append(window_length)
        # otherwise, i.e., if a value in the arbitrage_opportunity column is
        # not equal to the previous value in the arbitrage_opportunity column
        else:
            # reset the window length to five minutes...
            window_length = 5
            # and append that window length to the list
            window_lengths.append(window_length)
    # convert the window lengths list to a column, showing how long arbitrage
    # window / no_arbitrage window has lasted.
    df['window_length'] = window_lengths
    # return the dataframe with the new window length column
    return df
        
# function to merge dataframes and create final features for arbitrage data
def merge_dfs(df1, df2):
    # merging two modified ohlcv dfs on closing time to create arbitrage df
    df = pd.merge(df1, df2, on='closing_time',
                  suffixes=('_exchange_1', '_exchange_2'))
    
    # feature engineering year, month, and day columns
    df['year'] = pd.to_datetime(df['closing_time'], unit='s').dt.year
    df['month'] = pd.to_datetime(df['closing_time'], unit='s').dt.month
    df['day'] = pd.to_datetime(df['closing_time'], unit='s').dt.day

    # getting higher_closing_price feature to create pct_higher feature
    df['higher_closing_price'] = df.apply(get_higher_closing_price, axis=1)
    # getting pct_higher feature to create arbitrage_opportunity feature
    df['pct_higher'] = df.apply(get_pct_higher, axis=1)
    # getting arbitrage_opportunity feature
    df['arbitrage_opportunity'] = df.apply(get_arbitrage_opportunity, axis=1)
    # getting window_length feature
    df = get_window_length(df)
    # dropping higher_closing_price and pct_higher features, which were
    # only needed to feature engineer arbitrage_opportunity and window_length
    df = df.drop(columns=['higher_closing_price', 'pct_higher'])
    # returning df
    return df

In [6]:
# creating target column...

# specifying arbitrage window length to target, in minutes
interval=30

# function to get target values; takes df and window length to target
def get_target_value(df, interval=interval):
    # i.e., if the coming arbitrage window is as long as the targeted interval
    if df['window_length_shift'] >= interval:
        # then if the coming arbitrage window is for exchange 1 to 2...
        if df['arbitrage_opportunity_shift'] == 1:
            # return 1, which means arbitrage from exchange 1 to 2
            return 1
        # otherwise, if the coming arbitrage window is for exchange 2 to 1...
        elif df['arbitrage_opportunity_shift'] == -1:
            # return -1, which means arbitrage from exchange 2 to 1...
            return -1
        # otherwise, if we are coming up on no arbitrage opportunity...
        elif df['arbitrage_opportunity_shift'] == 0:
            # return 0, which means no arbitrage opportunity
            return 0
    # otherwise, i.e., if the coming window is less than our targeted interval
    else:
        # return 0, which means no arbitrage opportunity
        return 0
    
# function to create target column
def get_target(df, interval=interval):
    # used to shift rows; assumes candle length is five minutes, interval is
    # in minutes
    rows_to_shift = int(-1*(interval/5))
    # arbitrage_opportunity feature, shifted by length of targeted interval
    df['arbitrage_opportunity_shift'] = df['arbitrage_opportunity'].shift(
        rows_to_shift)
    # window_length feature, shifted by length of targeted interval
    df['window_length_shift'] = df['window_length'].shift(rows_to_shift)
    # creating target column; this will indicate if an arbitrage opportunity
    # that lasts as long as the targeted interval is forthcoming
    df['target'] = df.apply(get_target_value, axis=1)
    # dropping unncessary columns, which were only needed to engineer target
    df = df.drop(columns=['window_length_shift',
                          'arbitrage_opportunity_shift'])
    # dropping rows where target could not be calculated due to shift
    df = df[:rows_to_shift]
    # returning resulting dataframe
    return df

In [7]:
# defining functions needed to calculate profit...
    
# function to create new features out of closing prices, shifting those
# prices by the targeted interval
def get_close_shift(df, interval=interval):
    rows_to_shift = int(-1*(interval/5))
    df['close_exchange_1_shift'] = df['close_exchange_1'].shift(rows_to_shift)
    df['close_exchange_2_shift'] = df['close_exchange_2'].shift(rows_to_shift)
    return df

# function to create profit feature
def get_profit(df):
    # if exchange 1 has the higher closing price...
    if df['higher_closing_price'] == 'exchange_1':
        # see how much money you would make if you bought on exchange 2, sold
        # on exchange 1, and took account of 0.55% fees
        return (((df['close_exchange_1_shift'] / 
                 df['close_exchange_2'])-1)*100)-.55
    # otherwise, if exchange 2 has the higher closing price...
    elif df['higher_closing_price'] == 'exchange_2':
        # see how much money you would make if you bought on exchange 1, sold
        # on exchange 2, and took account of 0.55% fees
        return (((df['close_exchange_2_shift'] / 
                 df['close_exchange_1'])-1)*100)-.55
    # otherwise, i.e., if the closing prices are the same...
    else:
        # return zero, because in that case you shouldn't make a trade
        return 0

In [8]:
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

# now the master function that creates models from ohlcv csvs...
def create_all_arbitrage_dfs_and_models(exchanges):
    # looping through the file pairs used to generate the arbitrage data...
    for pair in get_file_pairs(exchanges):
        # looping through the specified exchanges...
        for exchange in exchanges:
            # if one of the specified exchanges is in the first filename...
            if exchange in pair[0]:
                # that is the first exchange;
                exchange_1 = exchange
            # if one of the specified exchanges is in the second filename...
            if exchange in pair[1]:
                # that is the second exchange.
                exchange_2 = exchange
        
        # loading first ohlcv csv in pair...
        df1 = get_df('ohlcv_data/' + exchange_1 + '_300/' + pair[0])
        # engineering features for first ohlcv csv...
        print('engineering df1...')
        df1 = engineer_features(df1)
        print('success!')

        # loading second ohlcv csv in pair...
        df2 = get_df('ohlcv_data/' + exchange_2 + '_300/' + pair[1])
        # engineering features for second ohlcv csv...
        print('engineering df2...')
        df2 = engineer_features(df2)
        print('success!')

        # merging two ohlcv dataframes with their engineered features
        print('merging df1 and df2...')
        df = merge_dfs(df1, df2)
        print('success!')
        
        # getting the second half of the filename for the eventual model...
        end_of_model_name = '_' + pair[1].replace('_300.csv', '.pkl')
        # assembling whole of the filename for the eventual model...
        model_name = exchange_1 + end_of_model_name
        # printing the model name to track progress...
        print(model_name.replace('.pkl', '').upper())
        
        # getting the target feature
        df = get_target(df)
        
        # where to split df for 70/30 test/train split...
        test_train_split_row = round(len(df)*.7)
        # getting closing time for row at which test/train split is made...
        test_train_split_time = df['closing_time'][test_train_split_row]

        # subtracting one week from that closing time for training data...
        train_cutoff_time = test_train_split_time - 604800
        # adding one week to that closing time for test data...
        test_cutoff_time = test_train_split_time + 604800
        # used to ensure we have a two week gap between test and train data
        
        # training set will end one week before the 7/10th row in dataframe
        train = df[df['closing_time'] < train_cutoff_time]
        # test set will begin one week after the 7/10th row in dataframe
        test = df[df['closing_time'] > test_cutoff_time]
        # printing shapes to track progress
        print('train and test shape:'.format(model=model_name), 
              train.shape, test.shape)

        # model uses all features; only dropping target
        features = df.drop(columns=['target']).columns.tolist()
        # specifying name of target column
        target = 'target'

        # separating features from target
        X_train = train[features]
        X_test = test[features]
        y_train = train[target]
        y_test = test[target]
        
        # defining model
        model = RandomForestClassifier(max_depth=75, n_estimators=100, 
                                       n_jobs=-1, random_state=42)
        
        # i.e., provided we have enough data to train on...
        if X_train.shape[0] > 1000:
            # fitting the model...
            model.fit(X_train, y_train)
            print('model fitted!')
            # getting accuracy score for train set...
            train_score = model.score(X_train, y_train)
            print('train accuracy:', train_score)
            # making predictions...
            y_preds = model.predict(X_test)
            print('predictions made!')
            # getting accuracy score for test set...
            score = accuracy_score(y_test, y_preds)
            print('test accuracy:', score)

            # saving the model...
            pickle.dump(model, open('pickles/{model}.pkl'.format(
                model=model_name), 'wb'))
            print('pickle saved!'.format(model=model) + '\n')
                
            # getting labels for confusion matrix...
            unique_y_test = y_test.unique().tolist()
            unique_y_preds = list(set(y_preds))
            labels = list(set(unique_y_test + unique_y_preds))
            labels.sort()
            columns = [f'Predicted {label}' for label in labels]
            index = [f'Actual {label}'  for label in labels]
            # creating and printing confusion matrix...
            confusion = pd.DataFrame(confusion_matrix(y_test, y_preds),
                                     columns=columns, index=index)
            print(model_name + ' confusion matrix:')
            print(confusion, '\n')
                
            # creating dataframe from test set to calculate profitability
            test_with_preds = X_test
            # adding column with higher closing price...
            test_with_preds['higher_closing_price'
                           ] = test_with_preds.apply(
                get_higher_closing_price, axis=1)
            # adding column with shifted closing prices...
            test_with_preds = get_close_shift(test_with_preds)
            # adding column with predictions
            test_with_preds['pred'] = y_preds
            # adding column with profitability of predictions
            test_with_preds['pct_profit'] = test_with_preds.apply(
                get_profit, axis=1).shift(-1)
            # filtering out rows where no arbitrage is predicted
            test_with_preds = test_with_preds[test_with_preds['pred'] != 0]
            # calculating mean profit where arbitrage predicted...
            pct_profit_mean = test_with_preds['pct_profit'].mean()
            # calculating median profit where arbitrage predicted...
            pct_profit_median = test_with_preds['pct_profit'].median()
            print('percent profit mean:', pct_profit_mean)
            print('percent profit median:', pct_profit_median, '\n\n')

        # i.e., if there are less than 1000 rows on which to train...
        else:
            print('not enough data!'.format(model=model_name))

# creating all the arbitrage dfs and models from the ohlcv data...
create_all_arbitrage_dfs_and_models(exchanges)

engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dip[i+n] = 100 * (dip_mio[i]/trs[i])
  din[i+n] = 100 * (din_mio[i]/float(trs[i]))


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_HITBTC_EOS_USDT
train and test shape: (39566, 91) (15804, 91)
model fitted!
train accuracy: 0.99994945154931
predictions made!
test accuracy: 0.9500759301442673
pickle saved!

bitfinex_hitbtc_eos_usdt.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             0          375            0
Actual 0              0        14986           21
Actual 1              0          393           29 

percent profit mean: 0.7177506399229494
percent profit median: 0.7650961163791663 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_COINBASE_PRO_BCH_BTC
train and test shape: (69788, 91) (28756, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.9972179719015162
pickle saved!

bitfinex_coinbase_pro_bch_btc.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             0            7            0
Actual 0              0        28661            0
Actual 1              0           73           15 

percent profit mean: 13.172949014477654
percent profit median: 13.366326608443618 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_HITBTC_BCH_BTC
train and test shape: (69788, 91) (28756, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.9983655584921408
pickle saved!

bitfinex_hitbtc_bch_btc.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             0            4            0
Actual 0              0        28709            0
Actual 1              0           43            0 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dip[i+n] = 100 * (dip_mio[i]/trs[i])
  din[i+n] = 100 * (din_mio[i]/float(trs[i]))


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_COINBASE_PRO_ETC_USD
train and test shape: (89288, 91) (37113, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.9682591005847008
pickle saved!

bitfinex_coinbase_pro_etc_usd.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             4          177            0
Actual 0            254        35931            0
Actual 1              0          747            0 

percent profit mean: -0.14993537053564548
percent profit median: -0.15339088735787887 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_COINBASE_PRO_BTC_USD
train and test shape: (314049, 91) (133439, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.5456425782567316
pickle saved!

bitfinex_coinbase_pro_btc_usd.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             0        60000            0
Actual 0              0        72810            0
Actual 1              0          629            0 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_COINBASE_PRO_LTC_BTC
train and test shape: (234763, 91) (99460, 91)
model fitted!
train accuracy: 0.999991480769968
predictions made!
test accuracy: 0.9987633219384677
pickle saved!

bitfinex_coinbase_pro_ltc_btc.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             0           56            0
Actual 0              0        99337            0
Actual 1              0           67            0 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_HITBTC_LTC_BTC
train and test shape: (354974, 91) (150979, 91)
model fitted!
train accuracy: 0.9999859144613409
predictions made!
test accuracy: 0.9993045390418535
pickle saved!

bitfinex_hitbtc_ltc_btc.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             0           53            0
Actual 0              0       150874            0
Actual 1              0           52            0 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_COINBASE_PRO_DASH_USD
train and test shape: (7654, 91) (2128, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.9981203007518797
pickle saved!

bitfinex_coinbase_pro_dash_usd.pkl confusion matrix:
           Predicted -1  Predicted 0
Actual -1             0            4
Actual 0              0         2124 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_COINBASE_PRO_DASH_BTC
train and test shape: (7651, 91) (2126, 91)
model fitted!
train accuracy: 0.9998692981309633
predictions made!
test accuracy: 0.9924741298212606
pickle saved!

bitfinex_coinbase_pro_dash_btc.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             0            2            0
Actual 0              0         2110            0
Actual 1              0           14            0 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_HITBTC_DASH_BTC
train and test shape: (194718, 91) (82298, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.9971688254878612
pickle saved!

bitfinex_hitbtc_dash_btc.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             0           96            0
Actual 0              0        82065            0
Actual 1              0          137            0 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...


  dip[i+n] = 100 * (dip_mio[i]/trs[i])
  din[i+n] = 100 * (din_mio[i]/float(trs[i]))


success!
merging df1 and df2...
success!
BITFINEX_COINBASE_PRO_LTC_USD
train and test shape: (234763, 91) (99460, 91)
model fitted!
train accuracy: 0.999991480769968
predictions made!
test accuracy: 0.8667303438568269
pickle saved!

bitfinex_coinbase_pro_ltc_usd.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1         43033         1701            0
Actual 0          11149        43172            0
Actual 1              4          401            0 

percent profit mean: 1.6875276407295765
percent profit median: 1.7078655151141324 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dip[i+n] = 100 * (dip_mio[i]/trs[i])
  din[i+n] = 100 * (din_mio[i]/float(trs[i]))


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_HITBTC_BCH_USDT
train and test shape: (39565, 91) (15803, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.7171423147503638
pickle saved!

bitfinex_hitbtc_bch_usdt.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1           516         1316            0
Actual 0            265        10066          247
Actual 1              0         2642          751 

percent profit mean: 0.869184202297031
percent profit median: 0.6598744402257555 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...


  dip[i+n] = 100 * (dip_mio[i]/trs[i])
  din[i+n] = 100 * (din_mio[i]/float(trs[i]))


success!
merging df1 and df2...
success!
BITFINEX_COINBASE_PRO_BCH_USD
train and test shape: (69801, 91) (28762, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.9839371392809958
pickle saved!

bitfinex_coinbase_pro_bch_usd.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1            16           54            0
Actual 0            372        28247            0
Actual 1              0           36           37 

percent profit mean: 1.064153360822368
percent profit median: -0.009200765733424321 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_COINBASE_PRO_EOS_USD
train and test shape: (40120, 91) (16041, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.9977557508883486
pickle saved!

bitfinex_coinbase_pro_eos_usd.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             0           14            0
Actual 0              0        16005            0
Actual 1              0           22            0 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_COINBASE_PRO_XRP_USD
train and test shape: (48580, 91) (19667, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.9991864544668735
pickle saved!

bitfinex_coinbase_pro_xrp_usd.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             0           11            0
Actual 0              0        19651            0
Actual 1              0            5            0 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dip[i+n] = 100 * (dip_mio[i]/trs[i])
  din[i+n] = 100 * (din_mio[i]/float(trs[i]))


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_COINBASE_PRO_ETH_BTC
train and test shape: (253140, 91) (107335, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.9989565379419574
pickle saved!

bitfinex_coinbase_pro_eth_btc.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             0           19            0
Actual 0              0       107223            0
Actual 1              0           93            0 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dip[i+n] = 100 * (dip_mio[i]/trs[i])
  din[i+n] = 100 * (din_mio[i]/float(trs[i]))


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_HITBTC_ETH_BTC
train and test shape: (267121, 91) (113328, 91)
model fitted!
train accuracy: 0.9999850255127826
predictions made!
test accuracy: 0.9991264294790343
pickle saved!

bitfinex_hitbtc_eth_btc.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             0           20            0
Actual 0              0       113229            0
Actual 1              0           79            0 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_HITBTC_ETH_USDT
train and test shape: (46017, 91) (18569, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.9988152296838817
pickle saved!

bitfinex_hitbtc_eth_usdt.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             0            8            0
Actual 0              0        18547            0
Actual 1              0           14            0 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dip[i+n] = 100 * (dip_mio[i]/trs[i])
  din[i+n] = 100 * (din_mio[i]/float(trs[i]))


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_COINBASE_PRO_ETH_USD
train and test shape: (253144, 91) (107337, 91)
model fitted!
train accuracy: 0.9999920993584679
predictions made!
test accuracy: 0.8047737499650633
pickle saved!

bitfinex_coinbase_pro_eth_usd.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1         40885        11801            0
Actual 0           8705        45497            0
Actual 1              1          448            0 

percent profit mean: 1.7138864613767912
percent profit median: 1.6425733444950856 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dip[i+n] = 100 * (dip_mio[i]/trs[i])
  din[i+n] = 100 * (din_mio[i]/float(trs[i]))


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_HITBTC_LTC_USDT
train and test shape: (39566, 91) (15804, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.8776259174892432
pickle saved!

bitfinex_hitbtc_ltc_usdt.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1            91          849            0
Actual 0            120        13681           63
Actual 1              0          902           98 

percent profit mean: 0.3715719524679307
percent profit median: 0.29333334384452114 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_COINBASE_PRO_ZRX_USD
train and test shape: (76224, 91) (31514, 91)
model fitted!
train accuracy: 0.9999868807724601
predictions made!
test accuracy: 0.9655073935393793
pickle saved!

bitfinex_coinbase_pro_zrx_usd.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             4          483            0
Actual 0            409        30423            0
Actual 1              5          190            0 

percent profit mean: -0.1881710421029622
percent profit median: -0.28025985792264296 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_COINBASE_PRO_XRP_BTC
train and test shape: (48578, 91) (19666, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.9998474524560155
pickle saved!

bitfinex_coinbase_pro_xrp_btc.pkl confusion matrix:
          Predicted 0  Predicted 1
Actual 0        19663            0
Actual 1            3            0 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...


  dip[i+n] = 100 * (dip_mio[i]/trs[i])
  din[i+n] = 100 * (din_mio[i]/float(trs[i]))


success!
merging df1 and df2...
success!
BITFINEX_HITBTC_XRP_BTC
train and test shape: (171603, 91) (72391, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.9988396347612273
pickle saved!

bitfinex_hitbtc_xrp_btc.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             0           17            0
Actual 0              0        72307            0
Actual 1              0           67            0 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_COINBASE_PRO_EOS_BTC
train and test shape: (40109, 91) (16037, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.9985034607470226
pickle saved!

bitfinex_coinbase_pro_eos_btc.pkl confusion matrix:
          Predicted 0  Predicted 1
Actual 0        16013            0
Actual 1           24            0 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_HITBTC_EOS_BTC
train and test shape: (170547, 91) (71938, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.9991520475965414
pickle saved!

bitfinex_hitbtc_eos_btc.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             0           27            0
Actual 0              0        71877            0
Actual 1              0           34            0 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
BITFINEX_HITBTC_BTC_USDT
train and test shape: (46017, 91) (18568, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 1.0
pickle saved!

bitfinex_hitbtc_btc_usdt.pkl confusion matrix:
          Predicted 0
Actual 0        18568 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
COINBASE_PRO_HITBTC_DASH_BTC
train and test shape: (7654, 91) (2127, 91)
model fitted!
train accuracy: 0.9998693493598119
predictions made!
test accuracy: 0.999529854254819
pickle saved!

coinbase_pro_hitbtc_dash_btc.pkl confusion matrix:
           Predicted -1  Predicted 0
Actual -1             0            1
Actual 0              0         2126 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
COINBASE_PRO_HITBTC_ETH_BTC
train and test shape: (253140, 91) (107335, 91)
model fitted!
train accuracy: 0.9999960496168129
predictions made!
test accuracy: 1.0
pickle saved!

coinbase_pro_hitbtc_eth_btc.pkl confusion matrix:
          Predicted 0
Actual 0       107335 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...


  dip[i+n] = 100 * (dip_mio[i]/trs[i])
  din[i+n] = 100 * (din_mio[i]/float(trs[i]))


success!
merging df1 and df2...
success!
COINBASE_PRO_HITBTC_XRP_BTC
train and test shape: (48579, 91) (19667, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.9998983068083592
pickle saved!

coinbase_pro_hitbtc_xrp_btc.pkl confusion matrix:
           Predicted -1  Predicted 0
Actual -1             0            2
Actual 0              0        19665 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
COINBASE_PRO_HITBTC_EOS_BTC
train and test shape: (40109, 91) (16037, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.998752883955852
pickle saved!

coinbase_pro_hitbtc_eos_btc.pkl confusion matrix:
           Predicted -1  Predicted 0
Actual -1             0           20
Actual 0              0        16017 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
COINBASE_PRO_HITBTC_ETH_USDC
train and test shape: (61056, 91) (25014, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.6866554729351563
pickle saved!

coinbase_pro_hitbtc_eth_usdc.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1          2695         2947            3
Actual 0            717        13368          382
Actual 1              1         3788         1113 

percent profit mean: 0.9274568055039196
percent profit median: 0.6741239661085368 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
COINBASE_PRO_HITBTC_BCH_BTC
train and test shape: (70505, 91) (29063, 91)
model fitted!
train accuracy: 1.0
predictions made!
test accuracy: 0.9991742077555655
pickle saved!

coinbase_pro_hitbtc_bch_btc.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1            62            8            0
Actual 0              1        28977            0
Actual 1              0           15            0 

percent profit mean: 10.654035012799053
percent profit median: 12.301711026615974 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
COINBASE_PRO_HITBTC_LTC_BTC
train and test shape: (234764, 91) (99460, 91)
model fitted!
train accuracy: 0.9999957404031282
predictions made!
test accuracy: 0.999678262618138
pickle saved!

coinbase_pro_hitbtc_ltc_btc.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             0           29            0
Actual 0              0        99428            0
Actual 1              0            3            0 

percent profit mean: nan
percent profit median: nan 


engineering df1...


the new syntax is .resample(...)..apply(<func>)
  app.launch_new_instance()
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


success!
engineering df2...
success!
merging df1 and df2...
success!
COINBASE_PRO_HITBTC_BTC_USDC
train and test shape: (61080, 91) (25024, 91)
model fitted!
train accuracy: 0.9999672560576294
predictions made!
test accuracy: 0.9836157289002557
pickle saved!

coinbase_pro_hitbtc_btc_usdc.pkl confusion matrix:
           Predicted -1  Predicted 0  Predicted 1
Actual -1             0          226            0
Actual 0              0        24614            0
Actual 1              0          184            0 

percent profit mean: nan
percent profit median: nan 


