# Algorithmic Trading Model Development for BTC/USDT Crypto Market

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Datasets

In [3]:
df_3m = pd.read_csv('datasets/btc_3m.csv')
df_5m = pd.read_csv('datasets/btc_5m.csv')
df_15m = pd.read_csv('datasets/btc_15m.csv')
df_30m = pd.read_csv('datasets/btc_30m.csv')
df_30m = pd.read_csv('datasets/btc_30m.csv')
df_1h = pd.read_csv('datasets/btc_1h.csv')
df_2h = pd.read_csv('datasets/btc_2h.csv')
df_4h = pd.read_csv('datasets/btc_4h.csv')
df_6h = pd.read_csv('datasets/btc_6h.csv')

Adding technical indicators (MCDA-Signal Line, RSI, Bollinger bands, ATR, moving averages)

In [4]:
def add_indicators(df):
    # SMA
    df['SMA_14'] = df['close'].rolling(window=14).mean()
    df['SMA_20'] = df['close'].rolling(window=20).mean()

    # RSI
    delta = df['close'].diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)

    avg_gain = gain.rolling(window=14).mean()
    avg_loss = loss.rolling(window=14).mean()

    rs = avg_gain / avg_loss
    df['RSI'] = 100 - (100 / (1 + rs))

    # Moving Average Convergence Divergence (MACD)
    short_window = 12
    long_window = 26
    signal_window = 9

    exp_short = df['close'].ewm(span=short_window, adjust=False).mean()
    exp_long = df['close'].ewm(span=long_window, adjust=False).mean()

    df['MACD'] = exp_short - exp_long
    df['Signal_Line'] = df['MACD'].ewm(span=signal_window, adjust=False).mean()

    # Bollinger Bands
    window = 20
    df['Upper_Band'] = df['SMA_20'] + 2 * df['close'].rolling(window=window).std()
    df['Lower_Band'] = df['SMA_20'] - 2 * df['close'].rolling(window=window).std()

    # ATR
    # Calculate True Range (TR)
    df['High-Low'] = df['high'] - df['low']
    df['High-PrevClose'] = abs(df['high'] - df['close'].shift(1))
    df['Low-PrevClose'] = abs(df['low'] - df['close'].shift(1))

    df['TrueRange'] = df[['High-Low', 'High-PrevClose', 'Low-PrevClose']].max(axis=1)

    # Choose the ATR period (e.g., 14 days)
    atr_period = 14
    df['EMA'] = df['close'].ewm(span=15, adjust=False).mean()
    # Calculate ATR using rolling mean
    df['ATR'] = df['TrueRange'].rolling(window=atr_period).mean()

    short_window = 20  # Example: 20-minute moving average
    long_window = 50   # Example: 50-minute moving average
    df['short_ma'] = df['close'].rolling(window=short_window, min_periods=1).mean()

    # Calculate long-term moving average (long_ma)
    df['long_ma'] = df['close'].rolling(window=long_window, min_periods=1).mean()

    # Drop intermediate columns used for calculation
    df.drop(['High-Low', 'High-PrevClose', 'Low-PrevClose', 'TrueRange'], axis=1, inplace=True)

    # Removing null rows
    df = df.dropna()
    return df

In [8]:
df_3m = add_indicators(df_3m)
df_5m = add_indicators(df_5m)
df_15m = add_indicators(df_15m)
df_30m = add_indicators(df_30m)
df_30m = add_indicators(df_30m)
df_1h = add_indicators(df_1h)
df_2h = add_indicators(df_2h)
df_4h = add_indicators(df_4h)
df_6h = add_indicators(df_6h)

In [12]:
print(f"Shape of df_3m : {df_3m.shape}")
print(f"Shape of df_5m : {df_5m.shape}")
print(f"Shape of df_15m: {df_15m.shape}")
print(f"Shape of df_30m: {df_30m.shape}")
print(f"Shape of df_30m: {df_30m.shape}")
print(f"Shape of df_1h : {df_1h.shape}")
print(f"Shape of df_2h : {df_2h.shape}")
print(f"Shape of df_4h : {df_4h.shape}")
print(f"Shape of df_6h : {df_6h.shape}")

Shape of df_3m : (712973, 17)
Shape of df_5m : (427790, 17)
Shape of df_15m: (142591, 17)
Shape of df_30m: (71273, 17)
Shape of df_30m: (71273, 17)
Shape of df_1h : (35645, 17)
Shape of df_2h : (17823, 17)
Shape of df_4h : (8912, 17)
Shape of df_6h : (5940, 17)


Filtering dataset and adding previous n timesteps of data as separate columns for training the model

In [7]:
def preprocessor(df):
  start_date = '2020-01-01'
  end_date = '2020-05-30'

  # Filter the data
  df = df[(df['datetime'] >= start_date) & (df['datetime'] <= end_date)]

  num_timestamps = 10

  # Create new columns for each day
  for i in range(1, num_timestamps + 1):
      df[f'openday{i}']   = df['open'].shift(i)
      df[f'highday{i}']   = df['high'].shift(i)
      df[f'lowday{i}']    = df['low'].shift(i)
      df[f'closeday{i}']  = df['close'].shift(i)
      df[f'volumeday{i}'] = df['volume'].shift(i)
      df[f'rsiday{i}']    = df['rsi'].shift(i)
      df[f'EMAday{i}']    = df['EMA'].shift(i)

  # Drop rows with NaN values created by shifting
  df = df.dropna()

  # Drop the high low volume and rsi columns
  df = df.drop(['high', 'low', 'volume'], axis=1)

  return df


In [None]:
df_3m=preprocessor(df_3m)
df_5m=preprocessor(df_5m)
df_15m=preprocessor(df_15m)
df_30m=preprocessor(df_30m)
df_1h=preprocessor(df_1h)
df_2h=preprocessor(df_2h)
df_4h=preprocessor(df_4h)
df_6h=preprocessor(df_6h)

Unnamed: 0,datetime,open,close,rsi,EMA,openday1,highday1,lowday1,closeday1,volumeday1,...,volumeday9,rsiday9,EMAday9,openday10,highday10,lowday10,closeday10,volumeday10,rsiday10,EMAday10
348390,2020-01-01 00:30:00,7169.32,7163.19,57.210464,7164.231590,7164.06,7170.59,7163.23,7168.86,15.076585,...,24.499807,49.891899,7162.907841,7168.22,7169.00,7157.80,7158.62,46.343030,53.100821,7162.853247
348391,2020-01-01 00:33:00,7161.77,7160.15,56.600338,7163.721391,7169.32,7170.41,7160.02,7163.19,180.417072,...,28.880407,44.946843,7162.508111,7158.05,7165.77,7158.00,7163.29,24.499807,49.891899,7162.907841
348392,2020-01-01 00:36:00,7160.15,7162.02,50.430778,7163.508718,7161.77,7163.76,7160.00,7160.15,34.999319,...,30.160449,49.033559,7163.005847,7163.29,7166.29,7157.75,7159.71,28.880407,44.946843,7162.508111
348393,2020-01-01 00:39:00,7162.96,7161.21,44.268710,7163.221378,7160.15,7163.27,7160.15,7162.02,18.167497,...,39.023847,57.995166,7163.971366,7158.35,7167.00,7158.35,7166.49,30.160449,49.033559,7163.005847
348394,2020-01-01 00:42:00,7160.87,7159.63,50.947645,7162.772456,7162.96,7166.32,7160.00,7161.21,23.535791,...,22.962410,63.001098,7164.853696,7165.14,7174.04,7165.14,7170.73,39.023847,57.995166,7163.971366
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
420145,2020-05-29 23:45:00,9412.90,9408.93,37.512656,9423.945880,9420.44,9421.50,9402.31,9412.90,131.110735,...,95.364586,53.592847,9432.598238,9433.86,9438.06,9427.22,9429.40,110.028078,55.657429,9432.030843
420146,2020-05-29 23:48:00,9408.93,9425.27,46.153846,9424.111395,9412.90,9416.13,9406.00,9408.93,62.352291,...,65.842809,47.400468,9432.799708,9429.39,9440.00,9424.89,9436.57,95.364586,53.592847,9432.598238
420147,2020-05-29 23:51:00,9425.25,9428.03,51.194539,9424.601221,9408.93,9427.04,9408.31,9425.27,73.968007,...,90.280471,40.415519,9432.102245,9436.43,9442.33,9433.42,9434.21,65.842809,47.400468,9432.799708
420148,2020-05-29 23:54:00,9428.04,9417.25,38.231966,9423.682318,9425.25,9434.13,9421.29,9428.03,120.602769,...,69.966933,44.927906,9431.470714,9433.79,9439.34,9424.13,9427.22,90.280471,40.415519,9432.102245


Dividing into features and target (current close price)

In [None]:

features= ['open','EMA','rsi','openday1', 'highday1', 'lowday1', 'closeday1', 'volumeday1', 'rsiday1',
       'EMAday1', 'openday2', 'highday2', 'lowday2', 'closeday2', 'volumeday2',
       'rsiday2', 'EMAday2', 'openday3', 'highday3', 'lowday3', 'closeday3',
       'volumeday3', 'rsiday3', 'EMAday3', 'openday4', 'highday4', 'lowday4',
       'closeday4', 'volumeday4', 'rsiday4', 'EMAday4', 'openday5', 'highday5',
       'lowday5', 'closeday5', 'volumeday5', 'rsiday5', 'EMAday5']

target = 'close'


In [None]:
x_3m = df_3m[features]
y_3m = df_3m[target]

x_5m = df_5m[features]
y_5m = df_5m[target]

x_15m = df_15m[features]
y_15m = df_15m[target]

x_30m = df_30m[features]
y_30m = df_30m[target]

x_1h = df_1h[features]
y_1h = df_1h[target]

x_2h = df_2h[features]
y_2h = df_2h[target]

x_4h = df_4h[features]
y_4h = df_4h[target]

x_6h = df_6h[features]
y_6h = df_6h[target]


## Training models

Training a random forest regressor on each frequency

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import pickle

def train(x,y,model_name=f"model_{datetime.datetime.now().timestamp()}"):
  X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.8)

  # Initialize the model
  model = RandomForestRegressor(max_features=None,n_estimators=250)

  # Train the model
  model.fit(X_train, y_train)

  # Predict on the test set
  y_pred = model.predict(X_test)

  # Saving models
  filename = f'models/{model_name}.h5'

  with open(filename, 'wb') as f:
      pickle.dump(model, f)

  return model

In [None]:
model1=train(x_3m,y_5m,"model_3m")
model2=train(x_5m,y_5m,"model_5m")
model3=train(x_15m,y_15m,"model_15m")
model4=train(x_30m,y_30m,"model_30m")
model5=train(x_1h,y_1h,"model_1h")
model6=train(x_2h,y_2h,"model_2h")
model7=train(x_4h,y_4h,"model_4h")
model8=train(x_6h,y_6h,"model_6h")

In [123]:
from datetime import datetime,timedelta

date = df_5m.iloc[1]['datetime']

date_object = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')

# Get the minutes from the datetime object
minutes = date_object.minute
hours=date_object.hour


a=int(minutes/3)
b=int(minutes/15)
c=int(minutes/30)


aa=a*3+3
bb=b*15+15
cc=c*30+30

a1=aa-minutes
b1=bb-minutes
c1=cc-minutes



if minutes%3==0:
  date_3m=date
else:
  date_3m=(date_object+timedelta(minutes=a1)).strftime('%Y-%m-%d %H:%M:%S')

if minutes%15==0:
  date_15m=date
else:
  date_15m=(date_object+timedelta(minutes=b1)).strftime('%Y-%m-%d %H:%M:%S')

if minutes%30==0:
  date_30m=date
else:
  date_30m=(date_object+timedelta(minutes=c1)).strftime('%Y-%m-%d %H:%M:%S')

if minutes==30:
  date_1h=date
else:
  date_1h=(date_object+timedelta(hours=1)).replace(minute=30, second=0).strftime('%Y-%m-%d %H:%M:%S')

if minutes==30 and hours%2==1:
  date_2h=date
else:
  date_2h=(date_object+timedelta(hours=2)).replace(minute=30, second=0).strftime('%Y-%m-%d %H:%M:%S')
'''
if minutes==30 and hours%4==1:
  date_4h=date
else:
  date_4h=(date_object+timedelta(hours=4)).strftime('%Y-%m-%d %H:%M:%S')

if minutes==30 and hours%6==1:
  date_6h=date
else:
  date_6h=(date_object+timedelta(hours=6)).strftime('%Y-%m-%d %H:%M:%S')'''


print(date_3m)
print(date_15m)
print(date_30m)
print(date_1h)
print(date_2h)
#print(date_4h)
#print(date_6h)


2018-01-01 05:36:00
2018-01-01 05:45:00
2018-01-01 06:00:00
2018-01-01 06:30:00
2018-01-01 07:30:00


In [16]:
start_date = '2020-01-01'
end_date = '2020-01-02'

# Filter the data
s1 = df_15m[(df_15m['datetime'] >= start_date) & (df_15m['datetime'] <= end_date)]
# For understanding the indicators

plot_df = s1.iloc[:,:]
plt.figure(figsize=(14, 6))

fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(14, 12), sharex=True)

# Plotting RSI
axes[0].plot(plot_df.index, plot_df['RSI'], color='orange')
axes[0].axhline(90, color='grey', linestyle='--', label='EXTRA-Overbought (90)')
axes[0].axhline(70, color='r', linestyle='--', label='Overbought (70)')
axes[0].axhline(30, color='g', linestyle='--', label='Oversold (30)')
axes[0].axhline(10, color='grey', linestyle='--', label='EXTRA-Overbought (90)')
axes[0].legend()
axes[0].set_ylabel('RSI')

# Plotting Close values and SMAs
axes[1].plot(plot_df.index, plot_df['close'], label='Close', color='black')
# axes[1].plot(plot_df['SMA_14'],label="14 previous average")
# axes[1].plot(plot_df['SMA_20'],label="20 previous average")
axes[1].legend()
axes[1].set_ylabel('Close Price')

# Plotting MACD and Signal Line
axes[2].plot(plot_df.index, plot_df['MACD'], label='MACD', color='blue')
axes[2].plot(plot_df.index, plot_df['Signal_Line'], label='Signal Line', color='red')
axes[2].legend()
axes[2].set_ylabel('MACD')
axes[2].set_xlabel('Date')

# Adjust layout and display the plots
plt.tight_layout()
plt.show()

# plot_candlestick_chart(plot_df)

## Trading loop

In [None]:
balance_usdt = 10000
balance_btc = 0
stop_loss = balance_btc*(0.002)

for i in range(df_15m.shape[0]):
    # Current row
    current_row = df_15m[i]

    # get the date for the i th row
    date = df_15m.iloc[i]['datetime']

    date_object = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')

    # Get the minutes from the datetime object
    minutes = date_object.minute
    hours=date_object.hour

    a=int(minutes/3)
    b=int(minutes/15)
    c=int(minutes/30)

    aa=a*3+3
    bb=b*15+15
    cc=c*30+30

    a1=aa-minutes
    b1=bb-minutes
    c1=cc-minutes

    if minutes%3==0:
      date_3m=date
    else:
      date_3m=(date_object+timedelta(minutes=a1)).strftime('%Y-%m-%d %H:%M:%S')

    if minutes%15==0:
      date_15m=date
    else:
      date_15m=(date_object+timedelta(minutes=b1)).strftime('%Y-%m-%d %H:%M:%S')

    if minutes%30==0:
      date_30m=date
    else:
      date_30m=(date_object+timedelta(minutes=c1)).strftime('%Y-%m-%d %H:%M:%S')

    if minutes==30:
      date_1h=date
    else:
      date_1h=(date_object+timedelta(hours=1)).replace(minute=30, second=0).strftime('%Y-%m-%d %H:%M:%S')

    if minutes==30 and hours%2==1:
      date_2h=date
    else:
      date_2h=(date_object+timedelta(hours=2)).replace(minute=30, second=0).strftime('%Y-%m-%d %H:%M:%S')
        # Get the data for the date

    data_3m = df_15m[df_15m['datetime'] == date_3m]
    data_5m = df_15m[df_15m['datetime'] == date]
    data_15m = df_15m[df_15m['datetime'] == date_15m]
    data_30m = df_15m[df_15m['datetime'] == date_30m]
    data_1h = df_15m[df_15m['datetime'] == date_1h]
    data_2h = df_15m[df_15m['datetime'] == date_2h]

    # Get the feature vector for the date
    xp_3m = data_3m[features]
    xp_5m = data_5m[features]
    xp_15m = data_15m[features]
    xp_30m = data_30m[features]
    xp_1h = data_1h[features]
    xp_2h = data_2h[features]

    # Predict the price using the model
    y_pred_3m = model1.predict(xp_3m)
    y_pred_5m = model2.predict(xp_5m)
    y_pred_15m = model3.predict(xp_15m)
    y_pred_30m = model4.predict(xp_30m)
    y_pred_1h = model5.predict(xp_1h)
    y_pred_2h = model6.predict(xp_2h)
  

    # difference between the predicted close and actual open
    open=data_5m["open"].values[0]

    close_predicted=y_pred_3m[0]
    diff1 = close_predicted-open

    close_predicted=y_pred_5m[0]
    diff2 = close_predicted-open

    close_predicted=y_pred_15m[0]
    diff3 = close_predicted-open

    close_predicted=y_pred_30m[0]
    diff4 = close_predicted-open

    close_predicted=y_pred_1h[0]
    diff5 = close_predicted-open

    close_predicted=y_pred_2h[0]
    diff6 = close_predicted-open

    sum1=0

    if diff1>0:
        sum1=sum1+1
    if diff2>0:
        sum1=sum1+1
    if diff3>0:
        sum1=sum1+1
    if diff4>0:
        sum1=sum1+1

    sum2=0

    if diff5>0:
        sum2=sum2+1
    if diff6>0:
        sum2=sum2+1

    #  100% investment
    if sum1==4 and current_row['rsi']<30 and current_row['short_ma']>current_row['long_ma'] and  balance_usdt>0 and current_row['MACD'] > current_row['Signal_Line'] :
        btc=btc+((balance_usdt-0.0015*balance_usdt)/open)
        stop_loss = balance_usdt*(0.002)
        balance_btc=btc
        balance_usdt=0
        print(f'{date} -> : usdt : {balance_usdt}   btc : {balance_btc}   [buying]')

    #  75% investment
    elif sum1==3 and current_row['rsi']<30 and current_row['short_ma']>current_row['long_ma'] and balance_usdt>0 and current_row['MACD'] > current_row['Signal_Line'] :
        btc=btc + (((balance_usdt*(0.75))-0.0015*balance_usdt)/open)
        stop_loss = balance_usdt*0.75*(0.002)
        balance_btc=btc
        balance_usdt=balance_usdt*(0.25)
        print(f'{date} -> : usdt : {balance_usdt}   btc : {balance_btc}   [buying]')

    #  50% investment
    elif sum1==2 and current_row['rsi']<30 and current_row['short_ma'] > current_row['long_ma'] and sum2>=2 :
        btc=btc+(((balance_usdt*(0.50))-0.0015*balance_usdt)/open)
        stop_loss = balance_usdt*0.50*(0.002)
        balance_btc=btc
        balance_usdt=balance_usdt*(0.50)
        print(f'{date} -> : usdt : {balance_usdt}   btc : {balance_btc}   [buying]')

    #25% sell
    elif sum1>=0 and current_row['rsi']>70 and current_row['short_ma'] < current_row['long_ma'] and diff3 < (-stop_loss) and balance_btc>0 :  # Changed this line
        usdt=balance_usdt+(balance_btc*0.25*open)
        balance_usdt=usdt-(0.0015*usdt)
        balance_btc=balance_btc*0.75
        print(f'{date} -> : usdt : {balance_usdt}   btc : {balance_btc}   [selling]')

    #50% sell
    elif sum1>=-2 and sum2>=-1 and current_row['rsi']>70 and current_row['short_ma'] < current_row['long_ma'] and diff3 < (-stop_loss) and balance_btc>0 and current_row['MACD'] < current_row['Signal_Line'] :   # Changed this line
        usdt=balance_usdt+(balance_btc*0.5*open)
        balance_usdt=usdt-(0.0015*usdt)
        balance_btc=balance_btc*0.50
        print(f'{date} -> : usdt : {balance_usdt}   btc : {balance_btc}   [selling]')

    #75% sell
    elif sum1>=-3 and sum2>=-2 and current_row['rsi']>70 and current_row['short_ma'] < current_row['long_ma'] and diff3 < (-stop_loss) and balance_btc>0 and current_row['MACD'] < current_row['Signal_Line'] :  # Changed this line
        usdt=balance_usdt+(balance_btc*0.75*open)
        balance_usdt=usdt-(0.0015*usdt)
        balance_btc=balance_btc*0.25
        print(f'{date} -> : usdt : {balance_usdt}   btc : {balance_btc}   [selling]')

    # 100% sell
    elif sum1==-4 and current_row['rsi']>70 and current_row['short_ma'] < current_row['long_ma'] and diff3 < (-stop_loss) and balance_btc>0 and current_row['MACD'] < current_row['Signal_Line'] :  # Changed this line
        usdt=balance_btc*open
        balance_usdt=usdt-(0.0015*usdt)
        balance_btc=0
        print(f'{date} -> : usdt : {balance_usdt}   btc : {balance_btc}   [selling]')

    else:
        print(f'{date} -> : usdt : {balance_usdt}   btc : {balance_btc}   [holding]')

    # If loop reached the end convert all btc to usd
    if i==df_15m.shape[0]-1 and balance_usdt==0:
        usdt=balance_btc*open
        balance_usdt=usdt
        balance_btc=0

# Print the final balances after executing the trades
print(f"Final Balances: USDT {balance_usdt}, BTC {balance_btc}")