In [1]:
#Importing required libraries, have included a requirements.txt file as well.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import dask.dataframe as dd
import warnings
warnings.filterwarnings('ignore')
import gc


import xgboost as xgb
import joblib
from datetime import datetime, timedelta
from multiprocessing import Pool
import multiprocessing as mp

In [2]:
## Downloading reuqired Data
!wget "https://data.binance.vision/data/spot/daily/trades/BTCUSDT/BTCUSDT-trades-2022-02-28.zip"
!wget "https://data.binance.vision/data/spot/daily/trades/BTCUSDT/BTCUSDT-trades-2022-03-01.zip"
!wget "https://data.binance.vision/data/spot/daily/trades/BTCUSDT/BTCUSDT-trades-2022-03-02.zip"
!wget "https://data.binance.vision/data/spot/daily/trades/BTCUSDT/BTCUSDT-trades-2022-03-03.zip"

In [2]:
## Reading data using dask
def read_data():
    ddf = dd.read_csv('./BTCUSDT-trades*.zip', header=None, compression='zip')
    ddf = ddf.rename(columns={1: 'tradedPrice', 2: 'tradedSize',
                3: 'tradedNotional', 4: 'tradedTime'})
    ddf['tradedTime'] = dd.to_datetime(ddf['tradedTime'], unit='ms')
    ddf = ddf.drop([0,5,6], axis=1)
    ddf = ddf.compute()
    ddf['tradedSize'] = ddf.groupby(['tradedTime'])['tradedSize'].transform('sum')
    ddf['tradedNotional'] = ddf.groupby(['tradedTime'])['tradedNotional'].transform('sum')
    ddf = ddf.drop_duplicates('tradedTime', keep='last')
    ddf = ddf.set_index('tradedTime')
    ddf = ddf.asfreq('100ms',method='ffill')
    ddf['tradedTime'] = ddf.index
    return ddf

In [3]:
## Dividing data into train and test

def get_data(ddf, start):
    mask1 = (ddf['tradedTime'] < str(start + timedelta(days = 2))[:10]) & (ddf['tradedTime'] > str(start)[:10])
    mask2 = (ddf['tradedTime'] > str(start + timedelta(days = 1))[:10]) & (ddf['tradedTime'] < str(start + timedelta(days = 3))[:10])
    train = ddf.loc[mask1]
    test = ddf.loc[mask2]
    actual_test = ddf.loc[mask2]
    return train, test, actual_test
    
## Using XGBoost for forecasting. (Also tried ARIMA and Prohet but XGBoost gave best results)
def train_model(train, test, start):
    X_train = train[['tradedNotional','tradedSize']][:len(train)-1]
    y_train = train['tradedPrice'][1:]
    X_test = test[['tradedNotional','tradedSize']][:len(test)-1]
    y_test = test['tradedPrice'][1:]
    model_name = str(start + timedelta(days = 2))[:10]
    flag = False
    try:
        model = joblib.load('./'+model_name+'.pkl')
        flag = True
        print('Existing Model Found')
    except Exception as e:
        flag = False
    if(flag):
        pass
    else:
        model = xgb.XGBRegressor(n_estimators=1000,n_jobs = 4)
        model.fit(X_train, y_train,
                eval_set=[(X_train, y_train), (X_test, y_test)],
                early_stopping_rounds=50,
            verbose=False)
    test_forecast = model.predict(X_test)
    model_name = str(start + timedelta(days = 2))[:10]
    joblib.dump(model,'./'+model_name+'.pkl')
    return test_forecast

In [4]:
## Training and predicting future values for TradingPrice
ddf = read_data()
test_forecast = []
test_price = []
test_notional = []
test_size = []
actual_test = []

for i in range(3):
    start = pd.to_datetime('2022-02-27') + timedelta(days = i)
    train, test_temp, actual_test_temp = get_data(ddf,start)
    test_price.append(test_temp.tradedPrice.values)
    test_notional.append(test_temp.tradedNotional.values)
    test_size.append(test_temp.tradedSize.values)
    test_forecast.append(train_model(train,test_temp,start))
    gc.collect()


Existing Model Found
Existing Model Found
Existing Model Found


In [20]:
## Market strategy in which I use forecasted BTC price to decide whether to buy or sell
def market_strategy(test_forecast,test_price, test_notional, test_size):
   
    bitcoin_bought = []
    bitcoin_sold = []
    bitcoin_buying_price = []
    bitcoin_selling_price = []
    bid_size = 1000
    current_position = 0
    current_size = 0
    max_position = -5000
    diff_test = np.diff(test_price)
    diff_test = (diff_test*100)/test_price[:-1]
    diff_forecast = np.diff(test_forecast)
    diff_forecast = (diff_forecast*100)/test_forecast[:-1]
    n = len(diff_forecast)
    max_drawdown = []
    for i in range(n):
        bitcoin_price = test_price[i]
        sell_amount = bid_size/bitcoin_price
        if(current_size > 0):
            sell_transaction = len(bitcoin_selling_price)
            drawdown = np.mean(np.sum(( bitcoin_buying_price[sell_transaction:] -  bitcoin_price)*100/bitcoin_buying_price[sell_transaction:]))
            max_drawdown.append(drawdown)
        if(current_size > 0 and i == (n-1)):
            
            bitcoin_sold.append(current_size)
            bitcoin_selling_price.append(bitcoin_price)
            current_position += (current_size * bitcoin_price)
            current_size -= current_size
        elif( ((current_size//sell_amount + i + 2) == n)):
                    
            bitcoin_sold.append(sell_amount)
            bitcoin_selling_price.append(bitcoin_price)
            current_position += (sell_amount * bitcoin_price)
            current_size -= sell_amount
                   
        elif(diff_forecast[i] > 0.03 and (current_position-1000 >= max_position)):
            buy_amount = sell_amount
            bitcoin_bought.append(buy_amount)
            bitcoin_buying_price.append(bitcoin_price)
            current_position -= (buy_amount * bitcoin_price)
            current_size += buy_amount
            
        elif(current_size > sell_amount):
            temp_diff = (bitcoin_price - bitcoin_buying_price)*100/bitcoin_buying_price
            for i in temp_diff:
                if(i >= 0.03):
                    sell_amount = bid_size/bitcoin_price
                    bitcoin_sold.append(sell_amount)
                    bitcoin_selling_price.append(bitcoin_price)
                    current_position += (sell_amount * bitcoin_price)
                    current_size -= sell_amount
                    
                    break
            
    
    return bitcoin_bought, bitcoin_sold, bitcoin_buying_price , bitcoin_selling_price,min(max_drawdown)

In [6]:
## Slicing data into chunks for multiprocessing
def slice_data(data, nprocs):
    aver, res = divmod(len(data), nprocs)
    nums = []
    for proc in range(nprocs):
        if proc < res:
            nums.append(aver + 1)
        else:
            nums.append(aver)
    count = 0
    slices = []
    for proc in range(nprocs):
        slices.append(data[count: count+nums[proc]])
        count += nums[proc]
    return slices

In [49]:
## Main function which uses multiprocessing to call market strategy

if __name__ == '__main__':
    columns = ['Date','buySize','buyPrice','sellSize','sellPrice']

    columns2 = ['Date','Number_of_Trades', 'Profit/(Loss)$', 'Gross_PNL(BasisPoints)', 'Drawdown']
    drawdown = []
    gross_pnlb = []
    num_trades = []
    profitLoss = []
    date_array = []

    date_index = []
    buySize = []
    buyPrice = []
    sellSize = []
    sellPrice = []
    for i in range(3):
        start = pd.to_datetime('2022-03-01') + timedelta(days = i)
        start = str(start)[:10]
        
        a = np.array(test_forecast[i])
        b = np.array(test_price[i])
        c = np.array(test_notional[i])
        d = np.array(test_size[i])
        nprocs = mp.cpu_count() #Number of parallel processes according to your hardware configuration 
        a_lists = slice_data(a, nprocs)
        b_lists = slice_data(b, nprocs)
        c_lists = slice_data(c, nprocs)
        d_lists = slice_data(d, nprocs)
        with Pool(nprocs) as p:
            multi_results = [p.apply_async(market_strategy,(a,b,c,d)) for a,b,c,d in zip(a_lists,b_lists,c_lists,d_lists)]
            result = [x for p in multi_results for x in p.get()]
        bitcoin_bought, bitcoin_sold, bitcoin_buying_price , bitcoin_selling_price, max_drawdown = result[0], result[1], result[2], result[3], result[4]
        bitcoin_buying_price = np.array(bitcoin_buying_price)
        bitcoin_selling_price = np.array(bitcoin_selling_price)

        buy_transaction = len(bitcoin_buying_price)
        number_of_trades = buy_transaction + len(bitcoin_selling_price)
        num_buy =  len(bitcoin_buying_price)

        profit = np.sum((bitcoin_selling_price[:num_buy] - bitcoin_buying_price)/bitcoin_buying_price) * 1000
        profit+= bitcoin_selling_price[num_buy:] * bitcoin_sold[num_buy:]
        save_diff = len(bitcoin_selling_price) - num_buy
        if(save_diff > 0):
            for i in range(save_diff):
                bitcoin_buying_price = np.append(bitcoin_buying_price,0)
                bitcoin_bought.append(0)
        gross_pnl = profit*2
        date_index += [start]*len(bitcoin_selling_price)
        date_array.append(start)

        buySize += bitcoin_bought
        sellSize += bitcoin_sold
        buyPrice += bitcoin_buying_price.tolist()
        sellPrice += bitcoin_selling_price.tolist() 

        num_trades.append(number_of_trades)
        profitLoss.append(profit)
        gross_pnlb.append(gross_pnl)
        drawdown.append(np.min(max_drawdown)/50)

    trades_df = pd.DataFrame()
    results_df = pd.DataFrame()
    
    trades_df['Date'] = date_index
    trades_df['buySize'] = buySize
    trades_df['buyPrice'] = buyPrice
    trades_df['sellSize'] = sellSize
    trades_df['sellPrice'] = sellPrice

    results_df['Date'] = date_array
    results_df['Number_of_Trades'] = num_trades
    results_df['Profit/(Loss)$'] = profitLoss
    results_df['Gross_PNL(BasisPoints)'] = gross_pnlb
    results_df['Drawdown'] = drawdown
    
    trades_df.to_csv('./tradesDetails.csv',index = False, header = True)   
    results_df.to_csv('./results.csv',index = False, header = True)  