# Trade Strategy 1 with Machine Learning


## Outline:
* Set parameters of interest & Import Data

* Establish Trading Strategy with signals

* Identify Training & Testing dataset, Scale data if necessary

* Run model 1: Support Vector Classifier

    * Evaluate Performance
    * Backtest

* Run model 2: Random Forest Classififier

    * Evaluate Performance
    * Backtest
    
* Run model 2: k Nearest Neighbor Classifier

    * Evaluate Performance
    * Backtest

In [None]:
# Imports
import pandas as pd
import numpy as np
from pathlib import Path
import hvplot.pandas
import matplotlib.pyplot as plt
import pandas_ta as ta
from datetime import datetime
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report
from sklearn.metrics import mean_squared_error, r2_score
import math

### Set Parameters & Import the OHLCV dataset into a Pandas DataFrame.

In [None]:
# choose coin of interest: BTC,ETH,XRP,DOGE,ADA
# choose pair of interest: USD, USDT
coin = 'BTC'
pair = 'USD'

# choose exchange of interest Bitfinex, Binance, Coinbase
exchange = 'Bitfinex'

# choose data interval of interest 1h, 1d
time = '1h'

# get saved histoical csv data from Data folder, which was downloaded from cryptoDataOnline.com
df = pd.read_csv(Path(f"./Data/{exchange}/{pair}/{exchange}_{coin}_{time}.csv"), index_col= "Datetime", parse_dates= True, infer_datetime_format = True)
display (df.head())

In [None]:
dataframe = df.drop(columns=['Exchange','Symbol_Pair','Volume_in_BTC']).copy()
dataframe = dataframe.rename(columns={'Volume_USD': 'Volume'})
dataframe = dataframe.sort_index(ascending=True)

print(f"The timeframe contained in this dataset is: {dataframe.index.min()} to {dataframe.index.max()}")

# chose start and end times of data to analze
start = '2019-01-01 00:00:00'
end = '2023-01-12 00:00:00'
dataframe = dataframe.loc[start:end ,:]

print(f"Timeframe to be evaluated in strategy and model: {start} to {end}")

## Define trading strategy with entry and exit signals

In [None]:
#Strategy 1
ema_length = 200
ema_fast = 12
ema_slow = 26
macd_signal = 9

# pandas_ta library
# EMA and ATR 
dataframe['EMA'] = ta.ema(dataframe['Close'], length=ema_length)
dataframe['ATR'] = dataframe.ta.atr()

#EMA
# dataframe.ta.macd(close='Close', fast=macd_fast, slow=macd_slow, signal=macd_signal, append=True)
EMA_fast = dataframe['Close'].ewm(span=ema_fast, adjust=False, min_periods=ema_fast).mean()
EMA_slow = dataframe['Close'].ewm(span=ema_slow, adjust=False, min_periods=ema_slow).mean()

dataframe['EMA_fast'] = EMA_fast
dataframe['EMA_slow'] = EMA_slow
dataframe.head()

In [None]:
#plot
data = dataframe.hvplot(x='Datetime', y=['Close','EMA','EMA_fast', 'EMA_slow'], value_label = 'USD')
data

In [None]:
# Entry and Exit Signals
signals_df = dataframe.loc[:,['Close','EMA','EMA_fast', 'EMA_slow']].copy()

signals_df['Signal'] = 0

signals_df['Signal'][ema_slow:] = np.where(
    (signals_df['EMA_fast'][ema_slow:] < signals_df['EMA'][ema_slow:]) & (signals_df['EMA_fast'][ema_slow:] < signals_df['EMA_slow'][ema_slow:]), 1,0)

signals_df["Entry/Exit"] = signals_df["Signal"].diff()

# for i in range(len(signals_df)):
#     if signals_df['Close'][i] < signals_df['EMA'][i] & 
#     (signals_df['EMA_fast'] == signals_df['EMA_slow']): #buy signal
#         signals_df['Signal'][i] = 1
        
#     elif dataframe['Close'][i] > signals_df['EMA'][i] & 
#     (signals_df['EMA_fast'] == signals_df['EMA_slow']):  #sell signal
#         signals_df['Signal'][i] = -1
          
#     else:
#         signals_df['Signal'][i] = 0

In [None]:
signals_df['Signal'].value_counts()

In [None]:
# Calculate the strategy returns and add them to the DataFrame
signals_df['Actual Returns'] = signals_df['Close'].pct_change()
signals_df['Strategy Returns'] = signals_df['Actual Returns'] * signals_df['Signal'].shift()

In [None]:
# Plot Strategy Returns to examine performance
strat = (1 + dataframe['Strategy Returns']).cumprod().plot(title="Strategy Returns- Bollinger Bands")
act = (1 + dataframe['Actual Returns']).cumprod().plot()
dual = strat * act
dual

In [None]:
(1 + signals_df['Actual Returns']).cumprod().sum()

In [None]:
(1 + signals_df['Strategy Returns']).cumprod().sum()

In [None]:
#Plot
# Visualize exit position relative to close price
sell = signals_df[signals_df['Entry/Exit'] == -1.0]['Close'].hvplot.scatter(
    color='red',
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400)


# Visualize entry position relative to close price
buy = signals_df[signals_df['Entry/Exit'] == 1.0]['Close'].hvplot.scatter(
    color='blue',
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400)


# Visualize close price for the investment
close = signals_df[['Close']].hvplot(
    line_color='lightgray',
    ylabel='Price in $',
    width=1000,
    height=400)


# Visualize moving averages
moving_avgs = signals_df[['EMA','EMA_fast', 'EMA_slow']].hvplot(
    ylabel='Price in $',
    width=1000,
    height=400)

# Show the plot
entry_exit_plot = moving_avgs * close * buy * sell

entry_exit_plot

## Define training and testing datasets

In [None]:
# dataframe = dataframe.sort_values(by=["Datetime"], ascending=True)

In [None]:
# Assign a copy of the sma_fast and sma_slow columns to a features DataFrame called X
X = signals_df.drop(['Signal'], axis=1).shift().dropna().copy()
# Create the target set selecting the Signal column and assiging it to y
y = signals_df['Signal'].dropna().copy()

In [None]:
display(X.head())
display(y.head())


In [None]:
display(X.shape)
display(y.shape)
display(X.index.max())
display(X.index.min())
display(y.index.max())
display(y.index.min())

In [None]:
# Review the value counts
y.value_counts()

In [None]:
# Select the start of the training period
training_begin = X.index.min()
training_end = X.index.min() + DateOffset(months=3)

# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
display(X_train.head(2))
display(X_train.tail(2))

# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_test DataFrame
display(X_test.head(2))
display(y_test.head(2))

In [None]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state = 1)
X_resampled, y_resampled = ros.fit_resample(X_train_scaled, y_train)

## Model #1: Support Vector Classifier (SVC) from sklearn library

In [None]:
# From SVM, instantiate SVC classifier model instance
svc_model = svm.SVC()
 
svc_model = svc_model.fit(X_train_scaled, y_train)
 
svc_pred = svc_model.predict(X_test_scaled)

display(svc_pred[:10])

### Evaluation metrics

In [None]:
svc_pred.shape

In [None]:
svc_testing_report = classification_report(y_test, svc_pred)
print(svc_testing_report)

In [None]:
# Calculate R_Square and Adjusted R Square
# The closer R2 and R2_adj are to 1 the better the model fit
R2 = r2_score(y_test, svc_pred)
#n is the sample size and p is the number of independent variables
R2_adj = 1-(1-R2)*((len(y_test))-1)/((len(y_test))-1-1)
print("The R squared is", R2)
print ("The R squared adjusted is", R2_adj)

In [None]:
# Calculate mean squared error and sqr rt of mean squared error
# The closer MSE & srMSE are to 0 the better the model fit
mse = (mean_squared_error(y_test, svc_pred))
sqr_mse = (math.sqrt(mean_squared_error(y_test, svc_pred)))
print(f"The mean square error is", mse)
print(f"The square root of the mse is", sqr_mse)

In [None]:
# Create a predictions DataFrame
predictions_df = pd.DataFrame(index=X_test.index)

# Add the SVM model predictions to the DataFrame
predictions_df['SVC Predicted'] = svc_pred
predictions_df['Actual Returns'] = X['Actual Returns']

predictions_df['SVC Strategy Returns'] = predictions_df['Actual Returns'] * predictions_df['SVC Predicted']

# Review the DataFrame
display(predictions_df.head())
display(predictions_df.tail())

In [None]:
(1 + predictions_df[['SVC Strategy Returns']]).cumprod().sum()

In [None]:
(1 + predictions_df[['Actual Returns']]).cumprod().sum()

In [None]:
# Plot the actual returns versus the strategy returns
(1 + predictions_df[['SVC Strategy Returns','Actual Returns']]).cumprod().plot(title="SVC Strategy Returns vs. Actual Returns with Support Vecotr Classifier model")


### Backtest the model 1: SVC to evaluate its performance.

In [None]:
# Initial Capital Investment
initial_capital = float(1000)

# Share size of each trade
share_size = 50

#select timeframe of Backtesting
start = '2019-05-01'
end = '2020-02-01'
signals_df = signals_df.loc[start:end,:].copy()

In [None]:
# Buy a 50 share position when the signal is 1
# Sell a 50 share position when the signal is 0
signals_df['Position'] = share_size * predictions_df['SVC Predicted']
# Determine the points in time where a 500 share position is bought or sold
signals_df['Entry/Exit Position'] = signals_df['Position'].diff()
# Multiply the close price by the number of shares held, or the Position
signals_df['Portfolio Holdings'] = signals_df['Close'] * signals_df['Position']
# Subtract the amount of either the cost or proceeds of the trade from the initial capital invested
signals_df['Portfolio Cash'] = initial_capital - (signals_df['Close'] * signals_df['Entry/Exit Position']).cumsum()
# Calculate the total portfolio value by adding the portfolio cash to the portfolio holdings (or investments)
signals_df['Portfolio Total'] = signals_df['Portfolio Cash'] + signals_df['Portfolio Holdings']
# Calculate the portfolio daily returns
signals_df['Portfolio Daily Returns'] = signals_df['Portfolio Total'].pct_change()
# Calculate the portfolio cumulative returns
signals_df['Portfolio Cumulative Returns'] = (1 + signals_df['Portfolio Daily Returns']).cumprod() - 1
signals_df.tail(10)

In [None]:
# Visualize exit position relative to total portfolio value
exit = signals_df[signals_df['Entry/Exit'] == -1.0]['Portfolio Total'].hvplot.scatter(
    color='yellow',
    marker='v',
    legend=False,
    ylabel='Total Portfolio Value',
    width=1000,
    height=400
)

# Visualize entry position relative to total portfolio value
entry = signals_df[signals_df['Entry/Exit'] == 1.0]['Portfolio Total'].hvplot.scatter(
    color='purple',
    marker='^',
    ylabel='Total Portfolio Value',
    width=1000,
    height=400
)

# Visualize the value of the total portfolio
total_portfolio_value = signals_df[['Portfolio Total']].hvplot(
    line_color='lightgray',
    ylabel='Total Portfolio Value',
    xlabel='Date',
    width=1000,
    height=400
)

# Overlay the plots
portfolio_entry_exit_plot = total_portfolio_value * entry * exit
portfolio_entry_exit_plot.opts(
    title="BTC Algorithm with SVC Preditions- Total Portfolio Value",
    yformatter='%.0f'
)

## Model #2: Random Forest Classifier from sklearn library

In [None]:
# Import a new classifier from SKLearn
from sklearn.ensemble import RandomForestClassifier

# Initiate the model instance
RFC = RandomForestClassifier(max_depth=3, random_state=1)

# Fit the model using the training data
model = RFC.fit(X_train, y_train)

# Use the testing dataset to generate the predictions for the new model
forest_pred = RFC.predict(X_test)

# Review the model's predicted values
forest_pred[:10]

### Evaluation Metrics

In [None]:
# Use a classification report to evaluate the model using the predictions and testing data
forest_pred_report = classification_report(y_test, forest_pred)

# Print the classification report
print(forest_pred_report)


In [None]:
# Calculate R_Square and Adjusted R Square
# The closer R2 and R2_adj are to 1 the better the model fit
R2 = r2_score(y_test, forest_pred)
#n is the sample size and p is the number of independent variables
R2_adj = 1-(1-R2)*((len(y_test))-1)/((len(y_test))-1-1)
print("The R squared is", R2)
print ("The R squared adjusted is", R2_adj)

In [None]:
# Calculate mean squared error and sqr rt of mean squared error
# The closer MSE & srMSE are to 0 the better the model fit
mse = (mean_squared_error(y_test, forest_pred))
sqr_mse = (math.sqrt(mean_squared_error(y_test, forest_pred)))
print(f"The mean square error is", mse)
print(f"The square root of the mse is", sqr_mse)

In [None]:
# Add RVR model predictions to predicitons dataframe
predictions_df['RFC Predictions'] = forest_pred
predictions_df['Actual Returns'] = X['Actual Returns']
predictions_df['RFC Strategy Returns'] = predictions_df['Actual Returns'] * predictions_df['RFC Predictions']

# Review the DataFrame
predictions_df

In [None]:
(1 + predictions_df[['RFC Strategy Returns']]).cumprod().sum()

In [None]:
(1 + predictions_df[['Actual Returns']]).cumprod().sum()

In [None]:
# Plot the actual returns versus the strategy returns
(1 + predictions_df[['RFC Strategy Returns','Actual Returns']]).cumprod().plot(title="RFC Strategy Returns vs. Actual with Random Forest Classifier model, 2021-2022")

### Backtest the model 2: RVC to evaluate its performance. 

In [None]:
# Initial Capital Investment
initial_capital = float(1000)

# Share size of each trade
share_size = 50

#select timeframe of Backtesting
start = '2019-05-01'
end = '2020-02-01'
signals_df = signals_df.loc[start:end,:].copy()

In [None]:
# Buy a 50 share position when the signal is 1
# Sell a 50 share position when the signal is 0
signals_df['Position'] = share_size * predictions_df['RFC Predictions']
# Determine the points in time where a 500 share position is bought or sold
signals_df['Entry/Exit Position'] = signals_df['Position'].diff()
# Multiply the close price by the number of shares held, or the Position
signals_df['Portfolio Holdings'] = signals_df['Close'] * signals_df['Position']
# Subtract the amount of either the cost or proceeds of the trade from the initial capital invested
signals_df['Portfolio Cash'] = initial_capital - (signals_df['Close'] * signals_df['Entry/Exit Position']).cumsum()
# Calculate the total portfolio value by adding the portfolio cash to the portfolio holdings (or investments)
signals_df['Portfolio Total'] = signals_df['Portfolio Cash'] + signals_df['Portfolio Holdings']
# Calculate the portfolio daily returns
signals_df['Portfolio Daily Returns'] = signals_df['Portfolio Total'].pct_change()
# Calculate the portfolio cumulative returns
signals_df['Portfolio Cumulative Returns'] = (1 + signals_df['Portfolio Daily Returns']).cumprod() - 1
signals_df.tail(10)

In [None]:
# Visualize exit position relative to total portfolio value
exit = signals_df[signals_df['Entry/Exit'] == -1.0]['Portfolio Total'].hvplot.scatter(
    color='yellow',
    marker='v',
    legend=False,
    ylabel='Total Portfolio Value',
    width=1000,
    height=400
)

# Visualize entry position relative to total portfolio value
entry = signals_df[signals_df['Entry/Exit'] == 1.0]['Portfolio Total'].hvplot.scatter(
    color='purple',
    marker='^',
    ylabel='Total Portfolio Value',
    width=1000,
    height=400
)

# Visualize the value of the total portfolio
total_portfolio_value = signals_df[['Portfolio Total']].hvplot(
    line_color='lightgray',
    ylabel='Total Portfolio Value',
    xlabel='Date',
    width=1000,
    height=400
)

# Overlay the plots
portfolio_entry_exit_plot = total_portfolio_value * entry * exit
portfolio_entry_exit_plot.opts(
    title="BTC Algorithm with SVC Preditions- Total Portfolio Value",
    yformatter='%.0f'
)

## Model #3: KNearest Neighbors Classifier (kNN) from sklearn library

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
kNN = KNeighborsClassifier(n_neighbors = 3)
kNN = kNN.fit(X_train_scaled, y_train)
kNN_pred = kNN.predict(X_test_scaled)


### Evaluation Metrics

In [None]:
# Use a classification report to evaluate the model using the predictions and testing data
kNN_pred_report = classification_report(y_test, kNN_pred)

# Print the classification report
print(kNN_pred_report)


In [None]:
# Calculate R_Square and Adjusted R Square
# The closer R2 and R2_adj are to 1 the better the model fit
R2 = r2_score(y_test, kNN_pred)
#n is the sample size and p is the number of independent variables
R2_adj = 1-(1-R2)*((len(y_test))-1)/((len(y_test))-1-1)
print("The R squared is", R2)
print ("The R squared adjusted is", R2_adj)

In [None]:
# Calculate mean squared error and sqr rt of mean squared error
# The closer MSE & srMSE are to 0 the better the model fit
mse = (mean_squared_error(y_test, kNN_pred))
sqr_mse = (math.sqrt(mean_squared_error(y_test, kNN_pred)))
print(f"The mean square error is", mse)
print(f"The square root of the mse is", sqr_mse)

In [None]:

# Add the kNN model predictions to the DataFrame
predictions_df['kNN Predictions'] = kNN_pred
predictions_df['Actual Returns'] = X['Actual Returns']
# Add the strategy returns to the DataFrame
predictions_df['kNN Strategy Returns'] = predictions_df['Actual Returns'] * predictions_df['kNN Predictions']


In [None]:
# Plot the actual returns versus the strategy returns
(1 + kNN_predictions_df[['kNN Strategy Returns','Actual Returns']]).cumprod().plot(title="kNN Strategy Returns vs. Actual with kNearest Neighbor Classifier model, 2021-2022")

### Backtest the model 3: kNN to evaluate its performance. 

In [None]:
# Initial Capital Investment
initial_capital = float(1000)

# Share size of each trade
share_size = 50

#select timeframe of Backtesting
start = '2019-05-01'
end = '2020-02-01'
signals_df = signals_df.loc[start:end,:].copy()

In [None]:
# Buy a 50 share position when the signal is 1
# Sell a 50 share position when the signal is 0
signals_df['Position'] = share_size * predictions_df['kNN Predictions']
# Determine the points in time where a 500 share position is bought or sold
signals_df['Entry/Exit Position'] = signals_df['Position'].diff()
# Multiply the close price by the number of shares held, or the Position
signals_df['Portfolio Holdings'] = signals_df['Close'] * signals_df['Position']
# Subtract the amount of either the cost or proceeds of the trade from the initial capital invested
signals_df['Portfolio Cash'] = initial_capital - (signals_df['Close'] * signals_df['Entry/Exit Position']).cumsum()
# Calculate the total portfolio value by adding the portfolio cash to the portfolio holdings (or investments)
signals_df['Portfolio Total'] = signals_df['Portfolio Cash'] + signals_df['Portfolio Holdings']
# Calculate the portfolio daily returns
signals_df['Portfolio Daily Returns'] = signals_df['Portfolio Total'].pct_change()
# Calculate the portfolio cumulative returns
signals_df['Portfolio Cumulative Returns'] = (1 + signals_df['Portfolio Daily Returns']).cumprod() - 1
signals_df.tail(10)

In [None]:
# Visualize exit position relative to total portfolio value
exit = signals_df[signals_df['Entry/Exit'] == -1.0]['Portfolio Total'].hvplot.scatter(
    color='yellow',
    marker='v',
    legend=False,
    ylabel='Total Portfolio Value',
    width=1000,
    height=400
)

# Visualize entry position relative to total portfolio value
entry = signals_df[signals_df['Entry/Exit'] == 1.0]['Portfolio Total'].hvplot.scatter(
    color='purple',
    marker='^',
    ylabel='Total Portfolio Value',
    width=1000,
    height=400
)

# Visualize the value of the total portfolio
total_portfolio_value = signals_df[['Portfolio Total']].hvplot(
    line_color='lightgray',
    ylabel='Total Portfolio Value',
    xlabel='Date',
    width=1000,
    height=400
)

# Overlay the plots
portfolio_entry_exit_plot = total_portfolio_value * entry * exit
portfolio_entry_exit_plot.opts(
    title="BTC Algorithm with kNN Preditions- Total Portfolio Value",
    yformatter='%.0f'
)