# Trade Strategy 2 with Machine Learning


## Outline:
* Set parameters of interest & Import Data

* Establish Trading Strategy with signals

* Identify Training & Testing dataset, Scale data if necessary

* Run model 1: Support Vector Classifier

    * Evaluate Performance
    * Backtest

* Run model 2: Random Forest Classififier

    * Evaluate Performance
    * Backtest
    
* Run model 2: k Nearest Neighbor Classifier

    * Evaluate Performance
    * Backtest

In [5]:
# Imports
import pandas as pd
import numpy as np
from pathlib import Path
import hvplot.pandas
import matplotlib.pyplot as plt
import talib as TA
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report

### Set Parameters & Import the OHLCV dataset into a Pandas DataFrame.

In [6]:
# choose coin of interest: BTC,ETH,XRP,DOGE,ADA
# choose pair of interest: USD, USDT
coin = 'BTC'
pair = 'USD'

# choose exchange of interest Bitfinex, Binance, Coinbase
exchange = 'Bitfinex'

# choose data interval of interest 1h, 1d
time = '1h'

# get saved histoical csv data from Data folder, which was downloaded from cryptoDataOnline.com
df = pd.read_csv(Path(f"./Data/{exchange}/{pair}/{exchange}_{coin}_{time}.csv"), index_col= "Datetime", parse_dates= True, infer_datetime_format = True)
display (df.head())


Unnamed: 0_level_0,Exchange,Symbol_Pair,Open,High,Low,Close,Volume_USD,Volume_in_BTC
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-01-12 23:00:00,Bitfinex,BTC/USD,18913.0,18916.0,18856.0,18856.0,2015120.0,106.868894
2023-01-12 22:00:00,Bitfinex,BTC/USD,18834.0,18934.0,18785.0,18917.0,1909037.0,100.916485
2023-01-12 21:00:00,Bitfinex,BTC/USD,19064.0,19112.0,18775.0,18835.0,5595288.0,297.068657
2023-01-12 20:00:00,Bitfinex,BTC/USD,18889.0,19089.0,18828.0,19065.0,1969036.0,103.280162
2023-01-12 19:00:00,Bitfinex,BTC/USD,18803.0,19046.0,18771.0,18887.0,5353192.0,283.432631


In [7]:
dataframe = df.drop(columns=['Exchange','Symbol_Pair','Volume_USD']).copy()
dataframe = dataframe.rename(columns={'Volume_in_BTC': 'Volume'})
dataframe.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-01-12 23:00:00,18913.0,18916.0,18856.0,18856.0,106.868894
2023-01-12 22:00:00,18834.0,18934.0,18785.0,18917.0,100.916485
2023-01-12 21:00:00,19064.0,19112.0,18775.0,18835.0,297.068657
2023-01-12 20:00:00,18889.0,19089.0,18828.0,19065.0,103.280162
2023-01-12 19:00:00,18803.0,19046.0,18771.0,18887.0,283.432631


## Define trading strategy with entry and exit signals

In [9]:
#Strategy 1
minimal_roi = {
        "60":  0.01,
        "30":  0.03,
        "20":  0.04,
        "0":  0.05
    }
timeframe = '1h'


# ADX = Average Directional Movement Index (Momentum Indicator)
dataframe['adx'] = TA.ADX(dataframe, timeperiod=10)
dataframe['slowadx'] = TA.ADX(dataframe, timeperiod=35)

# Commodity Channel Index: values Oversold:<-100, Overbought:>100
dataframe['cci'] = TA.CCI(dataframe)

# Stoch
stoch = TA.STOCH(dataframe, 5)
dataframe['fastd'] = stoch['fastd']
dataframe['fastk'] = stoch['fastk']
dataframe['fastk-previous'] = dataframe.fastk.shift(1)
dataframe['fastd-previous'] = dataframe.fastd.shift(1)

# Slow Stoch
slowstoch = TA.STOCH(dataframe, 50)
dataframe['slowfastd'] = slowstoch['fastd']
dataframe['slowfastk'] = slowstoch['fastk']
dataframe['slowfastk-previous'] = dataframe.slowfastk.shift(1)
dataframe['slowfastd-previous'] = dataframe.slowfastd.shift(1)

# EMA - Exponential Moving Average
dataframe['ema5'] = TA.EMA(dataframe, 5)
        
# get the rolling volume mean for the last hour (12x5)
# Note: dataframe['volume'].mean() uses the whole dataframe in 
# backtesting hence will have lookahead, but would be fine for dry/live use
dataframe['mean-volume'] = dataframe['Volume'].rolling(12).mean()

# Review the DataFrame
display(dataframe.head())
display(dataframe.tail())

TypeError: ADX() takes at least 3 positional arguments (1 given)

In [None]:
# Entry Trend
dataframe['Signal'] = 0.0  #enter long trade signal =1, exit long trade signal = 0


dataframe.loc[
            (
                (
                    (dataframe['adx'] > 50) |
                    (dataframe['slowadx'] > 26)
                ) &
                (dataframe['cci'] < -100) &
                (
                    (dataframe['fastk-previous'] < 20) &
                    (dataframe['fastd-previous'] < 20)
                ) &
                (
                    (dataframe['slowfastk-previous'] < 30) &
                    (dataframe['slowfastd-previous'] < 30)
                ) &
                (dataframe['fastk-previous'] < dataframe['fastd-previous']) &
                (dataframe['fastk'] > dataframe['fastd']) &
                (dataframe['mean-volume'] > 0.75) &
                (dataframe['close'] > 0.00000100)
            ),
            'Signal'] = 1

#Exit Trend
dataframe.loc[
    (
        (dataframe['slowadx'] < 25) &
        ((dataframe['fastk'] > 70) | (dataframe['fastd'] > 70)) &
        (dataframe['fastk-previous'] < dataframe['fastd-previous']) &
        (dataframe['close'] > dataframe['ema5'])
    ),
    'Signal'] = 0

( how we usuall produce dataframe and plots for how well the strategy did on its own with the dataset)
this will need to be adjusted depending on the output of the strategy data

In [None]:
signals_df = dataframe.copy()

signals['Signal'].value_counts()
# Calculate the strategy returns and add them to the DataFrame
signals_df['Actual Returns'] = dataframe['Close'].pct_change()
signals_df['Strategy Returns'] = signals_df['Actual Returns'] * signals_df['Signal'].shift()

# Plot Strategy Returns to examine performance
(1 + signals_df['Strategy Returns']).cumprod().plot(title="Strategy Returns- Strategy 1")
(1 + signals_df['Actual Returns']).cumprod().sum()
(1 + signals_df['Strategy Returns']).cumprod().sum()

## Define training and testing datasets

In [None]:
# Assign a copy of the sma_fast and sma_slow columns to a features DataFrame called X
X = signals_df[['XXXXXXXXX', 'XXXXXXX']].shift().dropna().copy()
# Create the target set selecting the Signal column and assiging it to y
y = signals_df['Signal'].copy()

In [None]:
# Review the value counts
y.value_counts()

In [None]:
# Select the start of the training period
training_begin = X.index.min()
training_end = X.index.min() + DateOffset(months=6)

# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
display(X_train.head())
display(X_train.tail())

# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_test DataFrame
display(X_test.head())
display(X_test.tail())

In [None]:
# Scale the features DataFrames
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Model #1: Support Vector Classifier (SVC) from sklearn library

In [None]:
# From SVM, instantiate SVC classifier model instance
svc_model = svm.SVC()
 
svc_model = svc_model.fit(X_train_scaled, y_train)
 
svc_pred = svc_model.predict(X_test_scaled)

display(svc_pred[:10])

### Evaluation metrics

In [None]:
svc_testing_report = classification_report(y_test, svc_pred)
print(svc_testing_report)

In [None]:
# Calculate R_Square and Adjusted R Square
# The closer R2 and R2_adj are to 1 the better the model fit
R2 = r2_score(y_test, svc_pred)
#n is the sample size and p is the number of independent variables
R2_adj = 1-(1-R2)*((len(y_test))-1)/((len(y_test))-1-1)
print("The R squared is", R2)
print ("The R squared adjusted is", R2_adj)

In [None]:
# Calculate mean squared error and sqr rt of mean squared error
# The closer MSE & srMSE are to 0 the better the model fit
mse = (mean_squared_error(y_test, svc_pred))
sqr_mse = (math.sqrt(mean_squared_error(y_test, svc_pred)))
print(f"The mean square error is", mse)
print(f"The square root of the mse is", sqr_mse)

In [None]:

# Create a predictions DataFrame
predictions_df = pd.DataFrame(index=X_test.index)
# Add the SVM model predictions to the DataFrame
predictions_df['SVC Predicted'] = svm_pred

# Add the actual returns to the DataFrame
predictions_df['Actual Returns'] = dataframe["Actual Returns"]

# Add the strategy returns to the DataFrame
predictions_df['SVC Strategy Returns'] = predictions_df['Actual Returns'] * predictions_df['SVC Predicted']

# Review the DataFrame
display(predictions_df.head())
display(predictions_df.tail())

In [None]:
(1 + predictions_df[['SVC Strategy Returns']]).cumprod().sum()

In [None]:
(1 + predictions_df[['Actual Returns']]).cumprod().sum()

In [None]:
# Plot the actual returns versus the strategy returns
(1 + predictions_df[['SVC Strategy Returns','Actual Returns']]).cumprod().plot(title="SVC Strategy Returns vs. Actual Returns with Support Vecotr Classifier model, 2021-2022")


### Backtest the model 1: SVC to evaluate its performance.

## Model #2: Random Forest Classifier from sklearn library

In [None]:
# Import a new classifier from SKLearn
from sklearn.tree import RandomForestClassifier

# Initiate the model instance
RFC = RandomForestClassifier(max_depth=3, random_state=1)

# Fit the model using the training data
model = RFC.fit(X_train, y_train)

# Use the testing dataset to generate the predictions for the new model
forest_pred = RFC.predict(X_test)

# Review the model's predicted values
forest_pred[:10]

### Evaluation Metrics

In [None]:
# Use a classification report to evaluate the model using the predictions and testing data
forest_pred_report = classification_report(y_test, forest_pred)

# Print the classification report
print(forest_pred_report)


In [None]:
# Calculate R_Square and Adjusted R Square
# The closer R2 and R2_adj are to 1 the better the model fit
R2 = r2_score(y_test, forest_pred)
#n is the sample size and p is the number of independent variables
R2_adj = 1-(1-R2)*((len(y_test))-1)/((len(y_test))-1-1)
print("The R squared is", R2)
print ("The R squared adjusted is", R2_adj)

In [None]:
# Calculate mean squared error and sqr rt of mean squared error
# The closer MSE & srMSE are to 0 the better the model fit
mse = (mean_squared_error(y_test, forest_pred))
sqr_mse = (math.sqrt(mean_squared_error(y_test, forest_pred)))
print(f"The mean square error is", mse)
print(f"The square root of the mse is", sqr_mse)

In [None]:
# Add RVR model predictions to predicitons dataframe
predictions_df['RFC Predictions'] = forest_pred

predictions_df['RFC Strategy Returns'] = predictions_df['Actual Returns'] * predictions_df['RFC Predictions']

# Review the DataFrame
predictions_df

In [None]:
(1 + predictions_df[['RFC Strategy Returns']]).cumprod().sum()

In [None]:
(1 + predictions_df[['Actual Returns']]).cumprod().sum()

In [None]:
# Plot the actual returns versus the strategy returns
(1 + predictions_df[['RFC Strategy Returns','Actual Returns']]).cumprod().plot(title="RFC Strategy Returns vs. Actual with Random Forest Classifier model, 2021-2022")

### Backtest the model 2: RVC to evaluate its performance. 

## Model #3: KNearest Neighbors Classifier (kNN) from sklearn library

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
kNN = KNeighborsClassifier(n_neighbors = 3)
kNN = kNN.fit(X_train_scaled, y_train)
kNN_pred = kNN.predict(X_test_scaled)


### Evaluation Metrics

In [None]:
# Use a classification report to evaluate the model using the predictions and testing data
kNN_pred_report = classification_report(y_test, kNN_pred)

# Print the classification report
print(kNN_pred_report)


In [None]:
# Calculate R_Square and Adjusted R Square
# The closer R2 and R2_adj are to 1 the better the model fit
R2 = r2_score(y_test, kNN_pred)
#n is the sample size and p is the number of independent variables
R2_adj = 1-(1-R2)*((len(y_test))-1)/((len(y_test))-1-1)
print("The R squared is", R2)
print ("The R squared adjusted is", R2_adj)

In [None]:
# Calculate mean squared error and sqr rt of mean squared error
# The closer MSE & srMSE are to 0 the better the model fit
mse = (mean_squared_error(y_test, kNN_pred))
sqr_mse = (math.sqrt(mean_squared_error(y_test, kNN_pred)))
print(f"The mean square error is", mse)
print(f"The square root of the mse is", sqr_mse)

In [None]:

# Add the kNN model predictions to the DataFrame
predictions_df['kNN Predictions'] = kNN_pred

# Add the strategy returns to the DataFrame
predictions_df['kNN Strategy Returns'] = predictions_df['Actual Returns'] * predictions_df['kNN Predictions']


In [None]:
# Plot the actual returns versus the strategy returns
(1 + kNN_predictions_df[['kNN Strategy Returns','Actual Returns']]).cumprod().plot(title="kNN Strategy Returns vs. Actual with kNearest Neighbor Classifier model, 2021-2022")

### Backtest the model 3: kNN to evaluate its performance. 