In [1]:
import pandas as pd
from pathlib import Path
import hvplot.pandas
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report
from finta import TA

In [2]:
# Import the btc dataset into a Pandas Dataframe
btc_df = pd.read_csv(
    Path("BTC-USD-2020-2024.csv"), 
    index_col="Date", 
    infer_datetime_format=True, 
    parse_dates=True
)

  btc_df = pd.read_csv(


In [3]:
btc_df = btc_df.drop(columns=['Adj Close', 'Volume'])

btc_df

# Display sample data
btc_df.head()

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-01,7194.89209,7254.330566,7174.944336,7200.174316
2020-01-02,7202.55127,7212.155273,6935.27002,6985.470215
2020-01-03,6984.428711,7413.715332,6914.996094,7344.884277
2020-01-04,7345.375488,7427.385742,7309.51416,7410.656738
2020-01-05,7410.45166,7544.49707,7400.535645,7411.317383


In [4]:
# Import the lunar dataset into a Pandas Dataframe
moon_df = pd.read_csv(
    Path("Moon_Data - Sheet1.csv"), 
    index_col="Date", 
    infer_datetime_format=True, 
    parse_dates=True
)

# Display sample data
moon_df.head()

  moon_df = pd.read_csv(


Unnamed: 0_level_0,Moon Phase
Date,Unnamed: 1_level_1
2020-01-10,Full Moon
2020-01-24,New Moon
2020-02-08,Full Moon
2020-02-23,New Moon
2020-03-09,Full Moon


In [5]:
#Get the Bollinger Bands for the Dataset
bbands = TA.BBANDS(btc_df)

bbands.head()

Unnamed: 0_level_0,BB_UPPER,BB_MIDDLE,BB_LOWER
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-01,,,
2020-01-02,,,
2020-01-03,,,
2020-01-04,,,
2020-01-05,,,


In [6]:
#Concat Bollinger Bands to the dataframe
df = pd.concat([btc_df, bbands, moon_df], axis=1)

df.head()

Unnamed: 0_level_0,Open,High,Low,Close,BB_UPPER,BB_MIDDLE,BB_LOWER,Moon Phase
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-01,7194.89209,7254.330566,7174.944336,7200.174316,,,,
2020-01-02,7202.55127,7212.155273,6935.27002,6985.470215,,,,
2020-01-03,6984.428711,7413.715332,6914.996094,7344.884277,,,,
2020-01-04,7345.375488,7427.385742,7309.51416,7410.656738,,,,
2020-01-05,7410.45166,7544.49707,7400.535645,7411.317383,,,,


In [7]:
# Visualize plots

# Plot Bitcoin price as a line graph
btc_plot = df.hvplot.line(
    x='Date', 
    y='Close', 
    xlabel='Date', 
    ylabel='Bitcoin Closing Price', 
    title='Bitcoin Closing Price by Moon Phase/Bollinger Bands',
    color='black',
    width=1100,
    height=500
)

# Plot lunar cycles as a scatter plot
moon_plot = df.hvplot.scatter(
    x='Date', 
    y='Close', 
    by='Moon Phase', 
    xlabel='Date', 
    ylabel='Bitcoin Closing Price',
    width=1100,
    height=500
)

bb_upper = df[["BB_UPPER"]].hvplot(
    line_color="red",
    ylabel="Price in $",
    width=1100,
    height=500
)


bb_middle = df[["BB_MIDDLE"]].hvplot(
    line_color="green",
    ylabel="Price in $",
    width=1100,
    height=500
)

bb_lower = df[["BB_LOWER"]].hvplot(
    line_color="red",
    ylabel="Price in $",
    width=1100,
    height=500
)

# Overlay plots
bbands_plot = btc_plot * moon_plot * bb_upper * bb_middle * bb_lower
bbands_plot

# Add  legend to left of the plot
bbands_plot.opts(legend_position='top_left')

In [8]:
# create trading signals df
trade_signals_df = df.copy()

trade_signals_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,BB_UPPER,BB_MIDDLE,BB_LOWER,Moon Phase
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-01,7194.89209,7254.330566,7174.944336,7200.174316,,,,
2020-01-02,7202.55127,7212.155273,6935.27002,6985.470215,,,,
2020-01-03,6984.428711,7413.715332,6914.996094,7344.884277,,,,
2020-01-04,7345.375488,7427.385742,7309.51416,7410.656738,,,,
2020-01-05,7410.45166,7544.49707,7400.535645,7411.317383,,,,


In [9]:
# Iterate through DataFrame and generate combined signals
Signals = []
for index, row in trade_signals_df.iterrows():
    if row['Moon Phase'] == 'New Moon' or row['Close'] <= row['BB_LOWER']:
        Signals.append(1)  # Buy signal
    elif row['Moon Phase'] == 'Full Moon' or row['Close'] >= row['BB_UPPER']:
        Signals.append(-1)  # Sell signal
    elif row['Moon Phase'] == 'New Moon' or row['Low'] < row['BB_MIDDLE']:
        Signals.append(1)  # Buy signal
    else:
        Signals.append(0)  # No action

# Add combined signals to a new column
trade_signals_df['Signal'] = Signals

# Review the DataFrame
trade_signals_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,BB_UPPER,BB_MIDDLE,BB_LOWER,Moon Phase,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-01,7194.89209,7254.330566,7174.944336,7200.174316,,,,,0
2020-01-02,7202.55127,7212.155273,6935.27002,6985.470215,,,,,0
2020-01-03,6984.428711,7413.715332,6914.996094,7344.884277,,,,,0
2020-01-04,7345.375488,7427.385742,7309.51416,7410.656738,,,,,0
2020-01-05,7410.45166,7544.49707,7400.535645,7411.317383,,,,,0


In [10]:
#count value of signals
trade_signals_df['Signal'].value_counts()

Signal
 1    852
 0    534
-1    167
Name: count, dtype: int64

In [11]:
# Visualize plot with signals

# Plot Bitcoin price as a line graph
btc_close = trade_signals_df.hvplot.line(
    x='Date', 
    y='Close', 
    xlabel='Date', 
    ylabel='Bitcoin Closing Price', 
    title='Bitcoin Closing Price/signals',
    color='black',
    width=1100,
    height=500
)

# Visualize entry positions relative to close price
entry = trade_signals_df[trade_signals_df['Signal'] == 1.0]['Close'].hvplot.scatter(
    color='green',
    marker='^',
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400)

# Visualize exit positions relative to close price
exit = trade_signals_df[trade_signals_df['Signal'] == -1.0]['Close'].hvplot.scatter(
    color='red',
    marker='v',
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400)

# upper band
bb_upper = trade_signals_df[["BB_UPPER"]].hvplot(
    line_color="red",
    ylabel="Price in $",
    width=1100,
    height=500
)

# middle band
bb_middle = df[["BB_MIDDLE"]].hvplot(
    line_color="green",
    ylabel="Price in $",
    width=1100,
    height=500
)

# lower band
bb_lower = trade_signals_df[["BB_LOWER"]].hvplot(
    line_color="red",
    ylabel="Price in $",
    width=1100,
    height=500
)

# Overlay plots
signals_plot = btc_close * entry * exit  * bb_upper * bb_middle * bb_lower
signals_plot


In [12]:
# Calculate the points in time when the Signal value changes
# Identify trade entry (1) and exit (-1) points
trade_signals_df['Entry/Exit'] = trade_signals_df['Signal']
trade_signals_df

Unnamed: 0_level_0,Open,High,Low,Close,BB_UPPER,BB_MIDDLE,BB_LOWER,Moon Phase,Signal,Entry/Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-01-01,7194.892090,7254.330566,7174.944336,7200.174316,,,,,0,0
2020-01-02,7202.551270,7212.155273,6935.270020,6985.470215,,,,,0,0
2020-01-03,6984.428711,7413.715332,6914.996094,7344.884277,,,,,0,0
2020-01-04,7345.375488,7427.385742,7309.514160,7410.656738,,,,,0,0
2020-01-05,7410.451660,7544.497070,7400.535645,7411.317383,,,,,0,0
...,...,...,...,...,...,...,...,...,...,...
2024-11-01,,,,,,,,New Moon,1,1
2024-11-15,,,,,,,,Full Moon,-1,-1
2024-11-30,,,,,,,,New Moon,1,1
2024-12-15,,,,,,,,Full Moon,-1,-1


In [13]:
#count value of signals
trade_signals_df['Entry/Exit'].value_counts()

Entry/Exit
 1    852
 0    534
-1    167
Name: count, dtype: int64

In [14]:
# Visualize plot with signals

# Plot Bitcoin price as a line graph
btc_close = trade_signals_df.hvplot(
    x='Date', 
    y='Close', 
    xlabel='Date', 
    ylabel='Bitcoin Closing Price', 
    title='Bitcoin Closing Price/signals',
    color='black',
    width=1100,
    height=500
)

# Visualize entry positions relative to close price
entry = trade_signals_df[trade_signals_df['Entry/Exit'] == 1.0]['Close'].hvplot.scatter(
    color='green',
    marker='^',
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400)

# Visualize exit positions relative to close price
exit = trade_signals_df[trade_signals_df['Entry/Exit'] == -1.0]['Close'].hvplot.scatter(
    color='red',
    marker='v',
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400)

# upper band
bb_upper = trade_signals_df[["BB_UPPER"]].hvplot(
    line_color="red",
    ylabel="Price in $",
    width=1100,
    height=500
)

# middle band
bb_middle = df[["BB_MIDDLE"]].hvplot(
    line_color="green",
    ylabel="Price in $",
    width=1100,
    height=500
)

# lower band
bb_lower = trade_signals_df[["BB_LOWER"]].hvplot(
    line_color="red",
    ylabel="Price in $",
    width=1100,
    height=500
)

# Overlay plots
signals_plot_diff = btc_close * entry * exit  * bb_upper * bb_middle * bb_lower
signals_plot_diff

In [15]:
#calculate daily returns
trade_signals_df['Daily Returns'] = trade_signals_df['Close'].pct_change()
trade_signals_df

  trade_signals_df['Daily Returns'] = trade_signals_df['Close'].pct_change()


Unnamed: 0_level_0,Open,High,Low,Close,BB_UPPER,BB_MIDDLE,BB_LOWER,Moon Phase,Signal,Entry/Exit,Daily Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-01-01,7194.892090,7254.330566,7174.944336,7200.174316,,,,,0,0,
2020-01-02,7202.551270,7212.155273,6935.270020,6985.470215,,,,,0,0,-0.029819
2020-01-03,6984.428711,7413.715332,6914.996094,7344.884277,,,,,0,0,0.051452
2020-01-04,7345.375488,7427.385742,7309.514160,7410.656738,,,,,0,0,0.008955
2020-01-05,7410.451660,7544.497070,7400.535645,7411.317383,,,,,0,0,0.000089
...,...,...,...,...,...,...,...,...,...,...,...
2024-11-01,,,,,,,,New Moon,1,1,0.000000
2024-11-15,,,,,,,,Full Moon,-1,-1,0.000000
2024-11-30,,,,,,,,New Moon,1,1,0.000000
2024-12-15,,,,,,,,Full Moon,-1,-1,0.000000


In [16]:
# Calculate the mean of the 'BB_UPPER', 'BB_MIDDLE', and 'BB_LOWER' columns
bb_upper_mean = trade_signals_df['BB_UPPER'].mean()
bb_middle_mean = trade_signals_df['BB_MIDDLE'].mean()
bb_lower_mean = trade_signals_df['BB_LOWER'].mean()

# Fill NaN values with the mean values
trade_signals_df['BB_UPPER'].fillna(bb_upper_mean, inplace=True)
trade_signals_df['BB_MIDDLE'].fillna(bb_middle_mean, inplace=True)
trade_signals_df['BB_LOWER'].fillna(bb_lower_mean, inplace=True)

In [17]:
ml_df = trade_signals_df.copy()
#ml_df = ml_df.dropna()
ml_df

Unnamed: 0_level_0,Open,High,Low,Close,BB_UPPER,BB_MIDDLE,BB_LOWER,Moon Phase,Signal,Entry/Exit,Daily Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-01-01,7194.892090,7254.330566,7174.944336,7200.174316,33192.348552,29757.822302,26323.296053,,0,0,
2020-01-02,7202.551270,7212.155273,6935.270020,6985.470215,33192.348552,29757.822302,26323.296053,,0,0,-0.029819
2020-01-03,6984.428711,7413.715332,6914.996094,7344.884277,33192.348552,29757.822302,26323.296053,,0,0,0.051452
2020-01-04,7345.375488,7427.385742,7309.514160,7410.656738,33192.348552,29757.822302,26323.296053,,0,0,0.008955
2020-01-05,7410.451660,7544.497070,7400.535645,7411.317383,33192.348552,29757.822302,26323.296053,,0,0,0.000089
...,...,...,...,...,...,...,...,...,...,...,...
2024-11-01,,,,,33192.348552,29757.822302,26323.296053,New Moon,1,1,0.000000
2024-11-15,,,,,33192.348552,29757.822302,26323.296053,Full Moon,-1,-1,0.000000
2024-11-30,,,,,33192.348552,29757.822302,26323.296053,New Moon,1,1,0.000000
2024-12-15,,,,,33192.348552,29757.822302,26323.296053,Full Moon,-1,-1,0.000000


## Getting features and target sets for ML Model

In [19]:
# Create X features
X = ml_df.drop(columns=['Signal', 'Moon Phase']).copy()#.dropna(inplace=False)
X

Unnamed: 0_level_0,Open,High,Low,Close,BB_UPPER,BB_MIDDLE,BB_LOWER,Entry/Exit,Daily Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-01,7194.892090,7254.330566,7174.944336,7200.174316,33192.348552,29757.822302,26323.296053,0,
2020-01-02,7202.551270,7212.155273,6935.270020,6985.470215,33192.348552,29757.822302,26323.296053,0,-0.029819
2020-01-03,6984.428711,7413.715332,6914.996094,7344.884277,33192.348552,29757.822302,26323.296053,0,0.051452
2020-01-04,7345.375488,7427.385742,7309.514160,7410.656738,33192.348552,29757.822302,26323.296053,0,0.008955
2020-01-05,7410.451660,7544.497070,7400.535645,7411.317383,33192.348552,29757.822302,26323.296053,0,0.000089
...,...,...,...,...,...,...,...,...,...
2024-11-01,,,,,33192.348552,29757.822302,26323.296053,1,0.000000
2024-11-15,,,,,33192.348552,29757.822302,26323.296053,-1,0.000000
2024-11-30,,,,,33192.348552,29757.822302,26323.296053,1,0.000000
2024-12-15,,,,,33192.348552,29757.822302,26323.296053,-1,0.000000


In [20]:
# Create target set


# Create a new column in the trading_df called signal setting its value to zero.
ml_df["Signals"] = 0.0

# Create the signal to buy
ml_df.loc[(ml_df["Daily Returns"] >= 0), "signal"] = 1

# Create the signal to sell
ml_df.loc[(ml_df["Daily Returns"] < 0), "signal"] = -1

# Copy the new signal column to a new Series called y.
y = ml_df["Signals"].copy()
y.head()

Date
2020-01-01    0.0
2020-01-02    0.0
2020-01-03    0.0
2020-01-04    0.0
2020-01-05    0.0
Name: Signals, dtype: float64

### Split the Data into Training and Testing Sets

In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split


# Drop rows with NaN values from both X and y
df_clean = ml_df.dropna(subset=['BB_UPPER', 'BB_MIDDLE', 'BB_LOWER', 'Signal', 'Entry/Exit', 'Daily Returns'], inplace=False)

# Separate features (X) and target variable (y)
X = df_clean.drop(columns=['Signal', 'Entry/Exit', 'Moon Phase'])
y = df_clean['Signal']

# Split data into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)


# Split data into training and testing datasets
#X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [23]:
y_train.value_counts()

Signal
 1    639
 0    395
-1    130
Name: count, dtype: int64

## Select the start and end time of the training period

In [25]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2020-01-02 00:00:00


In [26]:
# Select the ending period for the training data with an offset of 9 months
training_end = X.index.min() + DateOffset(months=9)

# Display the training end date
print(training_end)

2020-10-02 00:00:00


In [27]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Display sample data
X_train.head()

Unnamed: 0_level_0,Open,High,Low,Close,BB_UPPER,BB_MIDDLE,BB_LOWER,Daily Returns,Signals,signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-01-02,7202.55127,7212.155273,6935.27002,6985.470215,33192.348552,29757.822302,26323.296053,-0.029819,0.0,-1.0
2020-01-03,6984.428711,7413.715332,6914.996094,7344.884277,33192.348552,29757.822302,26323.296053,0.051452,0.0,1.0
2020-01-04,7345.375488,7427.385742,7309.51416,7410.656738,33192.348552,29757.822302,26323.296053,0.008955,0.0,1.0
2020-01-05,7410.45166,7544.49707,7400.535645,7411.317383,33192.348552,29757.822302,26323.296053,8.9e-05,0.0,1.0
2020-01-06,7410.452148,7781.867188,7409.292969,7769.219238,33192.348552,29757.822302,26323.296053,0.048291,0.0,1.0


In [28]:
 # Generate the X_test and y_test DataFrames
X_test = X.loc[training_end:]
y_test = y.loc[training_end:]

# Display sample data
X_test.head()

Unnamed: 0_level_0,Open,High,Low,Close,BB_UPPER,BB_MIDDLE,BB_LOWER,Daily Returns,Signals,signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-10-02,10619.821289,10657.837891,10416.689453,10575.974609,11158.603173,10723.126904,10287.650636,-0.004094,0.0,-1.0
2020-10-03,10575.100586,10598.94043,10511.129883,10549.329102,11136.754534,10734.405566,10332.056599,-0.002519,0.0,-1.0
2020-10-04,10550.44043,10686.0,10534.391602,10669.583008,11136.538586,10733.842822,10331.147059,0.011399,0.0,1.0
2020-10-05,10676.529297,10793.507813,10634.600586,10793.339844,11136.242094,10733.662256,10331.082417,0.011599,0.0,1.0
2020-10-06,10796.306641,10797.578125,10528.890625,10604.40625,11104.868777,10715.137305,10325.405833,-0.017505,0.0,-1.0


In [29]:
y_test.value_counts()

Signal
 1    714
 0    421
-1    143
Name: count, dtype: int64

In [30]:
# Create a StandardScaler instance
scaler = StandardScaler()
 
#Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Import HistGradientBoostingClassifier Model

In [32]:
from sklearn.experimental import enable_hist_gradient_boosting 
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.utils.class_weight import compute_class_weight

# Calculate class weights
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)

# Create classifier with adjusted class weights
classifier = HistGradientBoostingClassifier(class_weight={-1: class_weights[0], 0: class_weights[1], 1: class_weights[2]}, 
                                            max_iter=200, max_depth=5, learning_rate=0.5)
# Make pipeline
pipeline = make_pipeline(SimpleImputer(), HistGradientBoostingClassifier())

pipeline.fit(X_train_scaled, y_train)



In [33]:
# Transform the testing data using the same preprocessing steps
X_test_transformed = pipeline.named_steps['simpleimputer'].transform(X_test_scaled)

In [34]:
# Make predictions
predictions = pipeline.predict(X_test_transformed)

In [35]:
# Generate classification report
report = classification_report(y_test, predictions)
print(report)

              precision    recall  f1-score   support

          -1       0.24      0.46      0.32       143
           0       0.34      0.81      0.48       421
           1       1.00      0.01      0.02       714

    accuracy                           0.32      1278
   macro avg       0.53      0.43      0.27      1278
weighted avg       0.70      0.32      0.20      1278



In [36]:
# Create a new empty predictions DataFrame
predictions_df = pd.DataFrame(index=X_test.index)
predictions_df["predicted_signal"] = predictions
predictions_df["actual_returns"] = ml_df["Daily Returns"].copy()
predictions_df["trading_algorithm_returns"] = predictions_df["actual_returns"] * predictions_df["predicted_signal"]
predictions_df.head()

Unnamed: 0_level_0,predicted_signal,actual_returns,trading_algorithm_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-10-02,1,-0.004094,-0.004094
2020-10-03,1,-0.002519,-0.002519
2020-10-04,1,0.011399,0.011399
2020-10-05,1,0.011599,0.011599
2020-10-06,1,-0.017505,-0.017505


In [37]:
# Calculate and plot the cumulative returns for the `actual_returns` and the `trading_algorithm_returns`
(1 + predictions_df[["actual_returns", "trading_algorithm_returns"]]).cumprod().hvplot(width=1500,
    height=500).opts(title="Cumulative Returns Comparison")

## Fix data imbalance by using SMOTE to oversample the minority class

In [39]:
from imblearn.over_sampling import SMOTE

#Instantiate the SMOTE instance 
smote_sampler = SMOTE(random_state=1, sampling_strategy='auto')

In [40]:
# Fit the training data to the cluster centroids model
X_resampled, y_resampled =  smote_sampler.fit_resample(X_train, y_train)

# BalancedRandomForestClassifier

In [42]:
# Import BalancedRandomForestClassifier from imblearn
from imblearn.ensemble import BalancedRandomForestClassifier

In [43]:
# Instantiate a BalancedRandomForestClassifier instance
brf = BalancedRandomForestClassifier()

In [44]:
# Fit the model to the training data
brf.fit(X_resampled, y_resampled)

  warn(
  warn(
  warn(


In [45]:
# Predict labels for testing features
y_pred = brf.predict(X_test_scaled)

# Display the predictions
y_pred



array([ 1,  1,  1, ..., -1, -1, -1], dtype=int64)

In [46]:
# Generate a classification report using the training data and the logistic regression model's predications
y_pred_training_report = classification_report(y_test, y_pred)

# Review the classification report
print(y_pred_training_report)

              precision    recall  f1-score   support

          -1       0.48      0.07      0.12       143
           0       0.00      0.00      0.00       421
           1       0.56      0.99      0.71       714

    accuracy                           0.56      1278
   macro avg       0.35      0.35      0.28      1278
weighted avg       0.37      0.56      0.41      1278



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [47]:
# Create a new empty predictions DataFrame
brf_predictions_df = pd.DataFrame(index=X_test.index)
brf_predictions_df["predicted_signal"] = y_pred
brf_predictions_df["actual_returns"] = ml_df["Daily Returns"].copy()
brf_predictions_df["trading_algorithm_returns"] = brf_predictions_df["actual_returns"] * brf_predictions_df["predicted_signal"]
brf_predictions_df.head()

Unnamed: 0_level_0,predicted_signal,actual_returns,trading_algorithm_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-10-02,1,-0.004094,-0.004094
2020-10-03,1,-0.002519,-0.002519
2020-10-04,1,0.011399,0.011399
2020-10-05,1,0.011599,0.011599
2020-10-06,1,-0.017505,-0.017505


In [48]:
# Calculate and plot the cumulative returns for the `actual_returns` and the `trading_algorithm_returns`
(1 + predictions_df[["actual_returns", "trading_algorithm_returns"]]).cumprod().hvplot(width=1500,
    height=500).opts(title="Cumulative Returns Comparison")