# Trade Strategy 1 with Machine Learning


## Outline:
* Set parameters of interest & Import Data

* Establish Trading Strategy with signals

* Identify Training & Testing dataset, Scale data if necessary

* Run model 1: Support Vector Classifier

    * Evaluate Performance
    * Backtest

* Run model 2: Random Forest Classififier

    * Evaluate Performance
    * Backtest
    
* Run model 2: k Nearest Neighbor Classifier

    * Evaluate Performance
    * Backtest

In [None]:
# Imports
import pandas as pd
import numpy as np
from pathlib import Path
import hvplot.pandas
import matplotlib.pyplot as plt
import talib as ta
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report

### Set Parameters & Import the OHLCV dataset into a Pandas DataFrame.

In [None]:
# choose coin of interest: BTC,ETH,XRP,DOGE,ADA
coin = 'BTC'

# choose exchange of interest
exchange = 'Bitfinex'

# get saved histoical csv data from Data folder, which was downloaded from cryptoDataOnline.com
df = pd.read_csv(Path(f"./Data/{exchange}/hourly/21-22{exchange}_{coin}_h.csv"), index_col= "Datetime", parse_dates= True, infer_datetime_format = True)
display (df.head())

dataframe = df.drop(columns=['Symbol_Pair']).copy()
dataframe = dataframe.rename(columns={'Volume_in_BTC': 'Volume'})
# dataframe.head()

In [None]:
minimal_roi = {
        "60":  0.01,
        "30":  0.03,
        "20":  0.04,
        "0":  0.05
    }
timeframe = '1h'

## Define trading strategy with entry and exit signals

In [None]:
#Strategy 1
# MACD
macd = ta.MACD(dataframe)
dataframe['macd'] = macd['macd']
dataframe['macdsignal'] = macd['macdsignal']

# Minus Directional Indicator / Movement
dataframe['minus_di'] = ta.MINUS_DI(dataframe)

# RSI
dataframe['rsi'] = ta.RSI(dataframe)

# Inverse Fisher transform on RSI, values [-1.0, 1.0] (https://goo.gl/2JGGoy)
rsi = 0.1 * (dataframe['rsi'] - 50)
dataframe['fisher_rsi'] = (numpy.exp(2 * rsi) - 1) / (numpy.exp(2 * rsi) + 1)
# Inverse Fisher transform on RSI normalized, value [0.0, 100.0] (https://goo.gl/2JGGoy)
dataframe['fisher_rsi_norma'] = 50 * (dataframe['fisher_rsi'] + 1)

# Stoch fast - senstivive momentum indicator
stoch_fast = ta.STOCHF(dataframe)
dataframe['fastd'] = stoch_fast['fastd']
dataframe['fastk'] = stoch_fast['fastk']

        # Overlap Studies
        # ------------------------------------

# SAR Parabol
dataframe['sar'] = ta.SAR(dataframe)

# SMA - Simple Moving Average
dataframe['sma'] = ta.SMA(dataframe, timeperiod=40)

In [None]:
# Entry & Exit Trend
# Entry Conditions 
dataframe['signal'] = 0.0

dataframe.loc[((dataframe['close'] > 0.00000200) &
               (dataframe['volume'] > dataframe['volume'].rolling(self.buy_volumeAVG.value).mean() * 4) &
               (dataframe['close'] < dataframe['sma']) &
               (dataframe['fastd'] > dataframe['fastk']) &
               (dataframe['rsi'] > self.buy_rsi.value) &
               (dataframe['fastd'] > self.buy_fastd.value) &
               (dataframe['fisher_rsi_norma'] < self.buy_fishRsiNorma.value)), 'signal'] = 1

#Exit Conditions
conditions = []
if self.sell_trigger.value == 'rsi-macd-minusdi':
    conditions.append(qtpylib.crossed_above(dataframe['rsi'], self.sell_rsi.value))
    conditions.append(dataframe['macd'] < 0)
    conditions.append(dataframe['minus_di'] > self.sell_minusDI.value)
if self.sell_trigger.value == 'sar-fisherRsi':
    conditions.append(dataframe['sar'] > dataframe['close'])
    conditions.append(dataframe['fisher_rsi'] > self.sell_fishRsiNorma.value)

if conditions:
    dataframe.loc[reduce(lambda x, y: x & y, conditions), 'signal'] = -1


need to figure out evaluation metrics that compute strategy returns
then graph them


In [None]:
# # Calculate the strategy returns and add them to the DataFrame
# dataframe['Actual Returns'] = df['Close'].pct_change()
# dataframe['Strategy Returns'] = dataframe['Actual Returns'] * dataframe['Signal'].shift()

# # Review the DataFrame
# # display(dataframe.head())
# # display(dataframe.tail())

In [None]:
# # Plot Strategy Returns to examine performance
# (1 + dataframe['Strategy Returns']).cumprod().hvplot(title="Strategy Returns- Strategy 1, 2021-2022")

In [None]:
(1 + dataframe['Actual Returns']).cumprod().sum()

In [None]:
(1 + dataframe['Strategy Returns']).cumprod().sum()

In [None]:
# Visualize close price for the investment
security_close = dataframe[["Close"]].hvplot(
    line_color='lightgray',
    ylabel='Price in $',
    width=1000,
    height=400
)

bb_upper = dataframe[["BB_UPPER"]].hvplot(
    line_color='purple',
    ylabel='Price in $',
    width=1000,
    height=400
)

bb_middle = dataframe[["BB_MIDDLE"]].hvplot(
    line_color='orange',
    ylabel='Price in $',
    width=1000,
    height=400
)

bb_lower = dataframe[["BB_LOWER"]].hvplot(
    line_color='blue',
    ylabel='Price in $',
    width=1000,
    height=400
)

# entry = dataframe[dataframe["Signal"] == 1.0]["Close"].hvplot.scatter(
#     color='purple',
#     marker='^',
#     size=200,
#     legend=False,
#     ylabel='Price in $',
#     width=1000,
#     height=400
# )

# # Visualize exit position relative to close price
# exit = dataframe[dataframe["Signal"] == -1.0]["Close"].hvplot.scatter(
#     color='orange',
#     marker='v',
#     size=200,
#     legend=False,
#     ylabel='Price in $',
#     width=1000,
#     height=400
    
# Overlay plots
bbands_plot = security_close * bb_upper * bb_middle * bb_lower
# bbands_plot = security_close * bb_upper * bb_middle * bb_lower * entry * exit
bbands_plot

## Define training and testing datasets

In [None]:
# Assign a copy of the sma_fast and sma_slow columns to a features DataFrame called X
X = signals_df[['XXXXXXXXX', 'XXXXXXX']].shift().dropna().copy()
# Create the target set selecting the Signal column and assiging it to y
y = signals_df['Signal'].copy()

In [None]:
# Review the value counts
y.value_counts()

In [None]:
# Select the start of the training period
training_begin = X.index.min()
training_end = X.index.min() + DateOffset(months=6)

# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
display(X_train.head())
display(X_train.tail())

# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_test DataFrame
display(X_test.head())
display(X_test.tail())

In [None]:
# Scale the features DataFrames
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Model #1: Support Vector Classifier (SVC) from sklearn library

In [None]:
# From SVM, instantiate SVC classifier model instance
svc_model = svm.SVC()
 
svc_model = svc_model.fit(X_train_scaled, y_train)
 
svc_pred = svc_model.predict(X_test_scaled)

display(svc_pred[:10])

### Evaluation metrics

In [None]:
svc_testing_report = classification_report(y_test, svc_pred)
print(svc_testing_report)

In [None]:
# Calculate R_Square and Adjusted R Square
# The closer R2 and R2_adj are to 1 the better the model fit
R2 = r2_score(y_test, svc_pred)
#n is the sample size and p is the number of independent variables
R2_adj = 1-(1-R2)*((len(y_test))-1)/((len(y_test))-1-1)
print("The R squared is", R2)
print ("The R squared adjusted is", R2_adj)

In [None]:
# Calculate mean squared error and sqr rt of mean squared error
# The closer MSE & srMSE are to 0 the better the model fit
mse = (mean_squared_error(y_test, svc_pred))
sqr_mse = (math.sqrt(mean_squared_error(y_test, svc_pred)))
print(f"The mean square error is", mse)
print(f"The square root of the mse is", sqr_mse)

In [None]:

# Create a predictions DataFrame
predictions_df = pd.DataFrame(index=X_test.index)
# Add the SVM model predictions to the DataFrame
predictions_df['SVC Predicted'] = svm_pred

# Add the actual returns to the DataFrame
predictions_df['Actual Returns'] = dataframe["Actual Returns"]

# Add the strategy returns to the DataFrame
predictions_df['SVC Strategy Returns'] = predictions_df['Actual Returns'] * predictions_df['SVC Predicted']

# Review the DataFrame
display(predictions_df.head())
display(predictions_df.tail())

In [None]:
(1 + predictions_df[['SVC Strategy Returns']]).cumprod().sum()

In [None]:
(1 + predictions_df[['Actual Returns']]).cumprod().sum()

In [None]:
# Plot the actual returns versus the strategy returns
(1 + predictions_df[['SVC Strategy Returns','Actual Returns']]).cumprod().plot(title="SVC Strategy Returns vs. Actual Returns with Support Vecotr Classifier model, 2021-2022")


### Backtest the model 1: SVC to evaluate its performance.

## Model #2: Random Forest Classifier from sklearn library

In [None]:
# Import a new classifier from SKLearn
from sklearn.tree import RandomForestClassifier

# Initiate the model instance
RFC = RandomForestClassifier(max_depth=3, random_state=1)

# Fit the model using the training data
model = RFC.fit(X_train, y_train)

# Use the testing dataset to generate the predictions for the new model
forest_pred = RFC.predict(X_test)

# Review the model's predicted values
forest_pred[:10]

### Evaluation Metrics

In [None]:
# Use a classification report to evaluate the model using the predictions and testing data
forest_pred_report = classification_report(y_test, forest_pred)

# Print the classification report
print(forest_pred_report)


In [None]:
# Calculate R_Square and Adjusted R Square
# The closer R2 and R2_adj are to 1 the better the model fit
R2 = r2_score(y_test, forest_pred)
#n is the sample size and p is the number of independent variables
R2_adj = 1-(1-R2)*((len(y_test))-1)/((len(y_test))-1-1)
print("The R squared is", R2)
print ("The R squared adjusted is", R2_adj)

In [None]:
# Calculate mean squared error and sqr rt of mean squared error
# The closer MSE & srMSE are to 0 the better the model fit
mse = (mean_squared_error(y_test, forest_pred))
sqr_mse = (math.sqrt(mean_squared_error(y_test, forest_pred)))
print(f"The mean square error is", mse)
print(f"The square root of the mse is", sqr_mse)

In [None]:
# Add RVR model predictions to predicitons dataframe
predictions_df['RFC Predictions'] = forest_pred

predictions_df['RFC Strategy Returns'] = predictions_df['Actual Returns'] * predictions_df['RFC Predictions']

# Review the DataFrame
predictions_df

In [None]:
(1 + predictions_df[['RFC Strategy Returns']]).cumprod().sum()

In [None]:
(1 + predictions_df[['Actual Returns']]).cumprod().sum()

In [None]:
# Plot the actual returns versus the strategy returns
(1 + predictions_df[['RFC Strategy Returns','Actual Returns']]).cumprod().plot(title="RFC Strategy Returns vs. Actual with Random Forest Classifier model, 2021-2022")

### Backtest the model 2: RVC to evaluate its performance. 

### Model #3: KNearest Neighbors Classifier (kNN) from sklearn library

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
kNN = KNeighborsClassifier(n_neighbors = 3)
kNN = kNN.fit(X_train_scaled, y_train)
kNN_pred = kNN.predict(X_test_scaled)


### Evaluation Metrics

In [None]:
# Use a classification report to evaluate the model using the predictions and testing data
kNN_pred_report = classification_report(y_test, kNN_pred)

# Print the classification report
print(kNN_pred_report)


In [None]:
# Calculate R_Square and Adjusted R Square
# The closer R2 and R2_adj are to 1 the better the model fit
R2 = r2_score(y_test, kNN_pred)
#n is the sample size and p is the number of independent variables
R2_adj = 1-(1-R2)*((len(y_test))-1)/((len(y_test))-1-1)
print("The R squared is", R2)
print ("The R squared adjusted is", R2_adj)

In [None]:
# Calculate mean squared error and sqr rt of mean squared error
# The closer MSE & srMSE are to 0 the better the model fit
mse = (mean_squared_error(y_test, kNN_pred))
sqr_mse = (math.sqrt(mean_squared_error(y_test, kNN_pred)))
print(f"The mean square error is", mse)
print(f"The square root of the mse is", sqr_mse)

In [None]:

# Add the kNN model predictions to the DataFrame
predictions_df['kNN Predictions'] = kNN_pred

# Add the strategy returns to the DataFrame
predictions_df['kNN Strategy Returns'] = predictions_df['Actual Returns'] * predictions_df['kNN Predictions']


In [None]:
# Plot the actual returns versus the strategy returns
(1 + kNN_predictions_df[['kNN Strategy Returns','Actual Returns']]).cumprod().plot(title="kNN Strategy Returns vs. Actual with kNearest Neighbor Classifier model, 2021-2022")

### Backtest the model 3: kNN to evaluate its performance. 