# Trade Strategy 1 with Machine Learning


## Outline:
* Set parameters of interest & Import Data

* Establish Trading Strategy with signals

* Identify Training & Testing dataset, Scale data if necessary

* Run model 1: Support Vector Classifier

    * Evaluate Performance
    * Backtest

* Run model 2: Random Forest Classififier

    * Evaluate Performance
    * Backtest
    
* Run model 2: k Nearest Neighbor Classifier

    * Evaluate Performance
    * Backtest

In [None]:
# Imports
import pandas as pd
import numpy as np
from pathlib import Path
import hvplot.pandas
import matplotlib.pyplot as plt
from finta import TA
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report
from sklearn.metrics import mean_squared_error, r2_score
import math

### Set Parameters & Import the OHLCV dataset into a Pandas DataFrame.

In [None]:
# choose coin of interest: BTC,ETH,XRP,DOGE,ADA
# choose pair of interest: USD, USDT
coin = 'BTC'
pair = 'USD'

# choose exchange of interest Bitfinex, Binance, Coinbase
exchange = 'Bitfinex'

# choose data interval of interest 1h, 1d
time = '1h'

# get saved histoical csv data from Data folder, which was downloaded from cryptoDataOnline.com
df = pd.read_csv(Path(f"./Data/Formatted_Data/{exchange}/{pair}/{exchange}_{coin}_{time}.csv"), index_col= "Datetime", parse_dates= True, infer_datetime_format = True)
display (df.head())


In [None]:

dataframe = df.drop(columns=['Exchange','Symbol_Pair']).copy()
dataframe = dataframe.rename(columns={'Volume_in_BTC': 'Volume'})
dataframe.head(2)

In [None]:
dataframe.shape

## Define trading strategy with entry and exit signals

In [None]:
#Strategy 1
bb_df = TA.BBANDS(dataframe)
dataframe = pd.concat([dataframe, bb_df], axis=1)



# ADD SOMETHING FOR VOLUME AVG?, STD?
   
dataframe.tail()

In [None]:
#plot
dataframe.hvplot(x='Datetime', y=['Close', 'BB_UPPER','BB_MIDDLE', 'BB_LOWER'], value_label='USD')

In [None]:
# Entry & Exit Signals
dataframe['Signal'] = 0.0

for index, row in dataframe.iterrows():
    if row['Close'] < row['BB_LOWER']:
        dataframe.loc[index, 'Signal'] = 1.0 #Buy Signal
    if row['Close'] > row['BB_UPPER']:
        dataframe.loc[index, 'Signal'] = -1.0 #Sell Signal
        


In [None]:
dataframe['Signal'].value_counts()

In [None]:
# Calculate the strategy returns and add them to the DataFrame
dataframe['Actual Returns'] = dataframe['Close'].pct_change()
dataframe['Strategy Returns'] = dataframe['Actual Returns'] * dataframe['Signal'].shift()

In [None]:
# Plot Strategy Returns to examine performance
strat = (1 + dataframe['Strategy Returns']).cumprod().plot(title="Strategy Returns- Bollinger Bands")
act = (1 + dataframe['Actual Returns']).cumprod().plot()
dual = strat * act
dual

In [None]:
(1 + dataframe['Actual Returns']).cumprod().sum()

In [None]:
(1 + dataframe['Strategy Returns']).cumprod().sum()

In [None]:
#Plot
# Visualize exit position relative to close price
entry = dataframe[dataframe["Signal"] == 1.0]["Close"].hvplot.scatter(
    color='green',
    marker='^',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize exit position relative to close price
exit = dataframe[dataframe["Signal"] == -1.0]["Close"].hvplot.scatter(
    color='red',
    marker='v',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize close price for the investment
security_close = dataframe[["Close"]].hvplot(
    line_color='lightgray',
    ylabel='Price in $',
    width=1000,
    height=400
)

bb_upper = dataframe[["BB_UPPER"]].hvplot(
    line_color='purple',
    ylabel='Price in $',
    width=1000,
    height=400
)


bb_middle = dataframe[["BB_MIDDLE"]].hvplot(
    line_color='orange',
    ylabel='Price in $',
    width=1000,
    height=400
)

bb_lower = dataframe[["BB_LOWER"]].hvplot(
    line_color='blue',
    ylabel='Price in $',
    width=1000,
    height=400
)


# Overlay plots
bbands_plot = security_close * bb_upper * bb_middle * bb_lower * entry * exit
bbands_plot

## Define training and testing datasets

In [None]:
dataframe = dataframe.sort_values(by=["Datetime"], ascending=True)

In [None]:
# Assign a copy of the sma_fast and sma_slow columns to a features DataFrame called X
X = dataframe.drop(['Signal'], axis=1).shift().dropna().copy()
# Create the target set selecting the Signal column and assiging it to y
y = dataframe['Signal'].dropna().copy()

In [None]:
display(X.head(2))
display(X.tail(2))
y.head()

In [None]:
display(X.shape)
display(y.shape)
display(X.index.min())
display(X.index.max())
display(y.index.min())
display(y.index.max())

In [None]:
# Review the value counts
y.value_counts()

In [None]:
# Select the start of the training period
training_begin = X.index.min()
training_end = X.index.min() + DateOffset(months=3)

# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
display(X_train.head(2))
display(X_train.tail(2))

# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_test DataFrame
display(X_test.head(2))
display(X_test.tail(2))

In [None]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# from imblearn.over_sampling import RandomOverSampler
# ros = RandomOverSampler(random_state = 1)
# X_resampled, y_resampled = ros.fit_resample(X_train_scaled, y_train)

## Model #1: Support Vector Classifier (SVC) from sklearn library

In [None]:
# From SVM, instantiate SVC classifier model instance
rbf_svc_model = svm.SVC(kernel='rbf', gamma=0.5, C=0.1)
poly_svc_model = svm.SVC(kernel='poly', degree=3,C=1)

rbf_svc_model = rbf_svc_model.fit(X_train_scaled, y_train)
poly_svc_model = poly_svc_model.fit(X_train_scaled, y_train)

rbf_svc_pred = rbf_svc_model.predict(X_test_scaled)
poly_svc_pred = poly_svc_model.predict(X_test_scaled)

### Evaluation metrics

In [None]:
display(rbf_svc_pred.shape)
display(poly_svc_pred.shape)

In [None]:
rbf_svc_testing_report = classification_report(y_test, rbf_svc_pred)
print(svc_testing_report)
poly_svc_testing_report = classification_report(y_test, poly_svc_pred)
print(svc_testing_report)

In [None]:
# Calculate R_Square and Adjusted R Square
# The closer R2 and R2_adj are to 1 the better the model fit
R2rbf = r2_score(y_test, rbf_svc_pred)
R2poly = r2_score(y_test, poly_svc_pred)
#n is the sample size and p is the number of independent variables
R2_adj_rbf = 1-(1-R2rbf)*((len(y_test))-1)/((len(y_test))-1-1)
R2_adj_poly = 1-(1-R2poly)*((len(y_test))-1)/((len(y_test))-1-1)
print("The R squared using the RBF kernel in SVC is", R2rbf)
print ("The R squared adjusted is", R2_adj_rbf)
print("The R squared using the Polynomial kernel in SVC is", R2poly)
print ("The R squared adjusted is", R2_adj_poly)

In [None]:
# Calculate mean squared error and sqr rt of mean squared error
# The closer MSE & srMSE are to 0 the better the model fit
mse_rbf = (mean_squared_error(y_test, rbf_svc_pred))
mse_poly = (mean_squared_error(y_test, poly_svc_pred))
sqr_mse_rbf = (math.sqrt(mean_squared_error(y_test, rbf_svc_pred)))
sqr_mse_poly = (math.sqrt(mean_squared_error(y_test, poly_svc_pred)))
print(f"The mean square error using the RBF kernel in SVC is", mse_rbf)
print(f"The square root of the mse is", sqr_mse_rbf)
print(f"The mean square error using the Polynomial kernel in SVC is", mse_poly)
print(f"The square root of the mse is", sqr_mse_ploy)

In [None]:
# Create a predictions DataFrame
predictions_df = pd.DataFrame(index=X_test.index)
# Add the SVM model predictions to the DataFrame
predictions_df['rbfSVC Predicted'] = rbf_svm_pred
predictions_df['polySVC Predicted'] = poly_svm_pred
# Add the actual returns to the DataFrame
predictions_df['Actual Returns'] = X["Actual Returns"]

# Add the strategy returns to the DataFrame
predictions_df['rbfSVC Strategy Returns'] = predictions_df['Actual Returns'] * predictions_df['rbfSVC Predicted']
predictions_df['polySVC Strategy Returns'] = predictions_df['Actual Returns'] * predictions_df['polySVC Predicted']

# Review the DataFrame
display(predictions_df.head())
display(predictions_df.tail())

In [None]:
(1 + predictions_df[['rbfSVC Strategy Returns']]).cumprod().sum()
(1 + predictions_df[['polySVC Strategy Returns']]).cumprod().sum()

In [None]:
(1 + predictions_df[['Actual Returns']]).cumprod().sum()

In [None]:
# Plot the actual returns versus the strategy returns
(1 + predictions_df[['rbfSVC Strategy Returns','polySVC Strategy Returns','Actual Returns']]).cumprod().plot(title="SVC Strategy Returns vs. Actual Returns with Support Vecotr Classifier model")


### Backtest the model 1: SVC to evaluate its performance.

In [None]:
# Initial Capital Investment
initial_capital = float(1000)

# Share size of each trade
share_size = 50

#select timeframe of Backtesting
start = '2019-05-01'
end = '2020-02-01'
signals_df = signals_df.loc[start:end,:].copy()

In [None]:
# Buy a 50 share position when the signal is 1
# Sell a 50 share position when the signal is 0
signals_df['Position'] = share_size * predictions_df['SVC Predicted']
# Determine the points in time where a 500 share position is bought or sold
signals_df['Entry/Exit Position'] = signals_df['Position'].diff()
# Multiply the close price by the number of shares held, or the Position
signals_df['Portfolio Holdings'] = signals_df['Close'] * signals_df['Position']
# Subtract the amount of either the cost or proceeds of the trade from the initial capital invested
signals_df['Portfolio Cash'] = initial_capital - (signals_df['Close'] * signals_df['Entry/Exit Position']).cumsum()
# Calculate the total portfolio value by adding the portfolio cash to the portfolio holdings (or investments)
signals_df['Portfolio Total'] = signals_df['Portfolio Cash'] + signals_df['Portfolio Holdings']
# Calculate the portfolio daily returns
signals_df['Portfolio Daily Returns'] = signals_df['Portfolio Total'].pct_change()
# Calculate the portfolio cumulative returns
signals_df['Portfolio Cumulative Returns'] = (1 + signals_df['Portfolio Daily Returns']).cumprod() - 1
signals_df.tail(10)

In [None]:
# Visualize exit position relative to total portfolio value
exit = signals_df[signals_df['Entry/Exit'] == -1.0]['Portfolio Total'].hvplot.scatter(
    color='yellow',
    marker='v',
    legend=False,
    ylabel='Total Portfolio Value',
    width=1000,
    height=400
)

# Visualize entry position relative to total portfolio value
entry = signals_df[signals_df['Entry/Exit'] == 1.0]['Portfolio Total'].hvplot.scatter(
    color='purple',
    marker='^',
    ylabel='Total Portfolio Value',
    width=1000,
    height=400
)

# Visualize the value of the total portfolio
total_portfolio_value = signals_df[['Portfolio Total']].hvplot(
    line_color='lightgray',
    ylabel='Total Portfolio Value',
    xlabel='Date',
    width=1000,
    height=400
)

# Overlay the plots
portfolio_entry_exit_plot = total_portfolio_value * entry * exit
portfolio_entry_exit_plot.opts(
    title="BTC Algorithm with SVC Preditions- Total Portfolio Value",
    yformatter='%.0f'
)

## Model #2: Random Forest Classifier from sklearn library

In [None]:
# Import a new classifier from SKLearn
from sklearn.tree import RandomForestClassifier

# Initiate the model instance
RFC = RandomForestClassifier(max_depth=3, random_state=1)

# Fit the model using the training data
model = RFC.fit(X_train, y_train)

# Use the testing dataset to generate the predictions for the new model
forest_pred = RFC.predict(X_test)

# Review the model's predicted values
forest_pred[:10]

### Evaluation Metrics

In [None]:
# Use a classification report to evaluate the model using the predictions and testing data
forest_pred_report = classification_report(y_test, forest_pred)

# Print the classification report
print(forest_pred_report)


In [None]:
# Calculate R_Square and Adjusted R Square
# The closer R2 and R2_adj are to 1 the better the model fit
R2 = r2_score(y_test, forest_pred)
#n is the sample size and p is the number of independent variables
R2_adj = 1-(1-R2)*((len(y_test))-1)/((len(y_test))-1-1)
print("The R squared is", R2)
print ("The R squared adjusted is", R2_adj)

In [None]:
# Calculate mean squared error and sqr rt of mean squared error
# The closer MSE & srMSE are to 0 the better the model fit
mse = (mean_squared_error(y_test, forest_pred))
sqr_mse = (math.sqrt(mean_squared_error(y_test, forest_pred)))
print(f"The mean square error is", mse)
print(f"The square root of the mse is", sqr_mse)

In [None]:
# Add RVR model predictions to predicitons dataframe
predictions_df['RFC Predictions'] = forest_pred
predictions_df['Actual Returns'] = X['Actual Returns']
predictions_df['RFC Strategy Returns'] = predictions_df['Actual Returns'] * predictions_df['RFC Predictions']

# Review the DataFrame
predictions_df

In [None]:
(1 + predictions_df[['RFC Strategy Returns']]).cumprod().sum()

In [None]:
(1 + predictions_df[['Actual Returns']]).cumprod().sum()

In [None]:
# Plot the actual returns versus the strategy returns
(1 + predictions_df[['RFC Strategy Returns','Actual Returns']]).cumprod().plot(title="RFC Strategy Returns vs. Actual with Random Forest Classifier model, 2021-2022")

### Backtest the model 2: RVC to evaluate its performance. 

In [None]:
# Initial Capital Investment
initial_capital = float(1000)

# Share size of each trade
share_size = 50

#select timeframe of Backtesting
start = '2019-05-01'
end = '2020-02-01'
signals_df = signals_df.loc[start:end,:].copy()

In [None]:
# Buy a 50 share position when the signal is 1
# Sell a 50 share position when the signal is 0
signals_df['Position'] = share_size * predictions_df['RFC Predictions']
# Determine the points in time where a 500 share position is bought or sold
signals_df['Entry/Exit Position'] = signals_df['Position'].diff()
# Multiply the close price by the number of shares held, or the Position
signals_df['Portfolio Holdings'] = signals_df['Close'] * signals_df['Position']
# Subtract the amount of either the cost or proceeds of the trade from the initial capital invested
signals_df['Portfolio Cash'] = initial_capital - (signals_df['Close'] * signals_df['Entry/Exit Position']).cumsum()
# Calculate the total portfolio value by adding the portfolio cash to the portfolio holdings (or investments)
signals_df['Portfolio Total'] = signals_df['Portfolio Cash'] + signals_df['Portfolio Holdings']
# Calculate the portfolio daily returns
signals_df['Portfolio Daily Returns'] = signals_df['Portfolio Total'].pct_change()
# Calculate the portfolio cumulative returns
signals_df['Portfolio Cumulative Returns'] = (1 + signals_df['Portfolio Daily Returns']).cumprod() - 1
signals_df.tail(10)

In [None]:
# Visualize exit position relative to total portfolio value
exit = signals_df[signals_df['Entry/Exit'] == -1.0]['Portfolio Total'].hvplot.scatter(
    color='yellow',
    marker='v',
    legend=False,
    ylabel='Total Portfolio Value',
    width=1000,
    height=400
)

# Visualize entry position relative to total portfolio value
entry = signals_df[signals_df['Entry/Exit'] == 1.0]['Portfolio Total'].hvplot.scatter(
    color='purple',
    marker='^',
    ylabel='Total Portfolio Value',
    width=1000,
    height=400
)

# Visualize the value of the total portfolio
total_portfolio_value = signals_df[['Portfolio Total']].hvplot(
    line_color='lightgray',
    ylabel='Total Portfolio Value',
    xlabel='Date',
    width=1000,
    height=400
)

# Overlay the plots
portfolio_entry_exit_plot = total_portfolio_value * entry * exit
portfolio_entry_exit_plot.opts(
    title="BTC Algorithm with SVC Preditions- Total Portfolio Value",
    yformatter='%.0f'
)

## Model #3: KNearest Neighbors Classifier (kNN) from sklearn library

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
kNN = KNeighborsClassifier(n_neighbors = 3)
kNN = kNN.fit(X_train_scaled, y_train)
kNN_pred = kNN.predict(X_test_scaled)


### Evaluation Metrics

In [None]:
# Use a classification report to evaluate the model using the predictions and testing data
kNN_pred_report = classification_report(y_test, kNN_pred)

# Print the classification report
print(kNN_pred_report)


In [None]:
# Calculate R_Square and Adjusted R Square
# The closer R2 and R2_adj are to 1 the better the model fit
R2 = r2_score(y_test, kNN_pred)
#n is the sample size and p is the number of independent variables
R2_adj = 1-(1-R2)*((len(y_test))-1)/((len(y_test))-1-1)
print("The R squared is", R2)
print ("The R squared adjusted is", R2_adj)

In [None]:
# Calculate mean squared error and sqr rt of mean squared error
# The closer MSE & srMSE are to 0 the better the model fit
mse = (mean_squared_error(y_test, kNN_pred))
sqr_mse = (math.sqrt(mean_squared_error(y_test, kNN_pred)))
print(f"The mean square error is", mse)
print(f"The square root of the mse is", sqr_mse)

In [None]:

# Add the kNN model predictions to the DataFrame
predictions_df['kNN Predictions'] = kNN_pred
predictions_df['Actual Returns'] = X['Actual Returns']
# Add the strategy returns to the DataFrame
predictions_df['kNN Strategy Returns'] = predictions_df['Actual Returns'] * predictions_df['kNN Predictions']


In [None]:
# Plot the actual returns versus the strategy returns
(1 + kNN_predictions_df[['kNN Strategy Returns','Actual Returns']]).cumprod().plot(title="kNN Strategy Returns vs. Actual with kNearest Neighbor Classifier model, 2021-2022")

### Backtest the model 3: kNN to evaluate its performance. 

In [None]:
# Initial Capital Investment
initial_capital = float(1000)

# Share size of each trade
share_size = 50

#select timeframe of Backtesting
start = '2019-05-01'
end = '2020-02-01'
back_df = signals_df.loc[start:end,:].copy()

In [None]:
# Buy a 50 share position when the signal is 1
# Sell a 50 share position when the signal is 0
signals_df['Position'] = share_size * predictions_df['kNN Predictions']
# Determine the points in time where a 500 share position is bought or sold
signals_df['Entry/Exit Position'] = signals_df['Position'].diff()
# Multiply the close price by the number of shares held, or the Position
signals_df['Portfolio Holdings'] = signals_df['Close'] * signals_df['Position']
# Subtract the amount of either the cost or proceeds of the trade from the initial capital invested
signals_df['Portfolio Cash'] = initial_capital - (signals_df['Close'] * signals_df['Entry/Exit Position']).cumsum()
# Calculate the total portfolio value by adding the portfolio cash to the portfolio holdings (or investments)
signals_df['Portfolio Total'] = signals_df['Portfolio Cash'] + signals_df['Portfolio Holdings']
# Calculate the portfolio daily returns
signals_df['Portfolio Daily Returns'] = signals_df['Portfolio Total'].pct_change()
# Calculate the portfolio cumulative returns
signals_df['Portfolio Cumulative Returns'] = (1 + signals_df['Portfolio Daily Returns']).cumprod() - 1
signals_df.tail(10)

In [None]:
# Visualize exit position relative to total portfolio value
exit = signals_df[signals_df['Entry/Exit'] == -1.0]['Portfolio Total'].hvplot.scatter(
    color='yellow',
    marker='v',
    legend=False,
    ylabel='Total Portfolio Value',
    width=1000,
    height=400
)

# Visualize entry position relative to total portfolio value
entry = signals_df[signals_df['Entry/Exit'] == 1.0]['Portfolio Total'].hvplot.scatter(
    color='purple',
    marker='^',
    ylabel='Total Portfolio Value',
    width=1000,
    height=400
)

# Visualize the value of the total portfolio
total_portfolio_value = signals_df[['Portfolio Total']].hvplot(
    line_color='lightgray',
    ylabel='Total Portfolio Value',
    xlabel='Date',
    width=1000,
    height=400
)

# Overlay the plots
portfolio_entry_exit_plot = total_portfolio_value * entry * exit
portfolio_entry_exit_plot.opts(
    title="BTC Algorithm with kNN Preditions- Total Portfolio Value",
    yformatter='%.0f'
)