In [1]:
import pandas as pd
from pathlib import Path
import hvplot.pandas
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# Import the btc dataset into a Pandas Dataframe
btc_df = pd.read_csv(
    Path("BTC-USD-2020-2024.csv"), 
    index_col="Date", 
    infer_datetime_format=True, 
    parse_dates=True
)

# Display sample data
btc_df.head()

  btc_df = pd.read_csv(


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-01,7194.89209,7254.330566,7174.944336,7200.174316,7200.174316,18565660000.0
2020-01-02,7202.55127,7212.155273,6935.27002,6985.470215,6985.470215,20802080000.0
2020-01-03,6984.428711,7413.715332,6914.996094,7344.884277,7344.884277,28111480000.0
2020-01-04,7345.375488,7427.385742,7309.51416,7410.656738,7410.656738,18444270000.0
2020-01-05,7410.45166,7544.49707,7400.535645,7411.317383,7411.317383,19725070000.0


In [3]:
btc_df = btc_df.drop(columns=['Open','High','Low', 'Adj Close', 'Volume'])

btc_df

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2020-01-01,7200.174316
2020-01-02,6985.470215
2020-01-03,7344.884277
2020-01-04,7410.656738
2020-01-05,7411.317383
...,...
2024-03-08,68300.093750
2024-03-09,68498.882813
2024-03-10,69019.789063
2024-03-11,72123.906250


In [4]:
btc_df = btc_df.dropna()
btc_df

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2020-01-01,7200.174316
2020-01-02,6985.470215
2020-01-03,7344.884277
2020-01-04,7410.656738
2020-01-05,7411.317383
...,...
2024-03-07,66925.484375
2024-03-08,68300.093750
2024-03-09,68498.882813
2024-03-10,69019.789063


In [5]:
btc_df["Close"].hvplot()

In [6]:
# Import the lunar dataset into a Pandas Dataframe
moon_df = pd.read_csv(
    Path("Moon_Data - Sheet1.csv"), 
    index_col="Date", 
    infer_datetime_format=True, 
    parse_dates=True
)

# Display sample data
moon_df.head()

  moon_df = pd.read_csv(


Unnamed: 0_level_0,Moon Phase
Date,Unnamed: 1_level_1
2020-01-10,Full Moon
2020-01-24,New Moon
2020-02-08,Full Moon
2020-02-23,New Moon
2020-03-09,Full Moon


In [7]:
moon_df["Moon Phase"].hvplot()

In [8]:
# Merge dataframes on index (Date)
merged_df = btc_df.join(moon_df)


# Plot Bitcoin price as a line graph
btc_plot = merged_df.hvplot.line(
    x='Date', 
    y='Close', 
    xlabel='Date', 
    ylabel='Bitcoin Closing Price', 
    title='Bitcoin Closing Price by moon phase',
    color='black'
)

# Plot lunar cycles as a scatter plot
moon_plot = merged_df.hvplot.scatter(
    x='Date', 
    y='Close', 
    by='Moon Phase', 
    xlabel='Date', 
    ylabel='Bitcoin Closing Price'
)

# Overlay plots
overlay_plot = btc_plot * moon_plot

overlay_plot

In [9]:
# create trading signals df
moon_signals_df = merged_df.copy()

moon_signals_df.head()

Unnamed: 0_level_0,Close,Moon Phase
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-01,7200.174316,
2020-01-02,6985.470215,
2020-01-03,7344.884277,
2020-01-04,7410.656738,
2020-01-05,7411.317383,


In [10]:
# create buy and sell signals

# 1 = Buy signal
# -1 = Sell signal
# 0 = none

signals = []
for date, row in merged_df.iterrows():
    if row['Moon Phase'] == 'Full Moon':
        signals.append(1)  
    elif row['Moon Phase'] == 'New Moon':
        signals.append(-1)  
    else:
        signals.append(0)  

# Add signals to DataFrame
moon_signals_df['Signal'] = signals

moon_signals_df

Unnamed: 0_level_0,Close,Moon Phase,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-01,7200.174316,,0
2020-01-02,6985.470215,,0
2020-01-03,7344.884277,,0
2020-01-04,7410.656738,,0
2020-01-05,7411.317383,,0
...,...,...,...
2024-03-07,66925.484375,,0
2024-03-08,68300.093750,,0
2024-03-09,68498.882813,,0
2024-03-10,69019.789063,New Moon,-1


In [11]:
# Calculate the points in time at which a position should be taken, 1 or -1
moon_signals_df['Entry/Exit'] = moon_signals_df['Signal'].diff()
moon_signals_df

Unnamed: 0_level_0,Close,Moon Phase,Signal,Entry/Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-01,7200.174316,,0,
2020-01-02,6985.470215,,0,0.0
2020-01-03,7344.884277,,0,0.0
2020-01-04,7410.656738,,0,0.0
2020-01-05,7411.317383,,0,0.0
...,...,...,...,...
2024-03-07,66925.484375,,0,0.0
2024-03-08,68300.093750,,0,0.0
2024-03-09,68498.882813,,0,0.0
2024-03-10,69019.789063,New Moon,-1,-1.0


In [12]:
# Visualize entry positions relative to close price
entry = moon_signals_df[moon_signals_df['Entry/Exit'] == 1.0]['Close'].hvplot.scatter(
    color='green',
    marker='^',
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400)

In [13]:
# Visualize exit positions relative to close price
exit = moon_signals_df[moon_signals_df['Entry/Exit'] == -1.0]['Close'].hvplot.scatter(
    color='red',
    marker='v',
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400)

In [14]:
# Visualize close price for the investment
btc_close = moon_signals_df[["Close"]].hvplot(
    line_color='grey',
    ylabel='Price in $',
    width=1000,
    height=400
)

In [15]:
# Plot the entry and exit points

signals_plot = entry * exit * btc_close

signals_plot

In [16]:
moon_signals_df['daily returns'] = moon_signals_df['Close'].pct_change()
moon_signals_df

Unnamed: 0_level_0,Close,Moon Phase,Signal,Entry/Exit,daily returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-01,7200.174316,,0,,
2020-01-02,6985.470215,,0,0.0,-0.029819
2020-01-03,7344.884277,,0,0.0,0.051452
2020-01-04,7410.656738,,0,0.0,0.008955
2020-01-05,7411.317383,,0,0.0,0.000089
...,...,...,...,...,...
2024-03-07,66925.484375,,0,0.0,0.012384
2024-03-08,68300.093750,,0,0.0,0.020539
2024-03-09,68498.882813,,0,0.0,0.002911
2024-03-10,69019.789063,New Moon,-1,-1.0,0.007605


# Generate features and target sets for ML

In [18]:
#create x features
start_date = "2020-01-02"

# Slice the DataFrame to get X features starting from January 2nd, 2020
X = moon_signals_df.loc[start_date:, ["Close", "daily returns"]].copy()
X

Unnamed: 0_level_0,Close,daily returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-02,6985.470215,-0.029819
2020-01-03,7344.884277,0.051452
2020-01-04,7410.656738,0.008955
2020-01-05,7411.317383,0.000089
2020-01-06,7769.219238,0.048291
...,...,...
2024-03-07,66925.484375,0.012384
2024-03-08,68300.093750,0.020539
2024-03-09,68498.882813,0.002911
2024-03-10,69019.789063,0.007605


In [19]:
# create y target set
start_date = "2020-01-02"

# Slice the DataFrame to get y target set and adjust the start date
y = moon_signals_df.loc[start_date:, "Signal"].copy()
y

Date
2020-01-02    0
2020-01-03    0
2020-01-04    0
2020-01-05    0
2020-01-06    0
             ..
2024-03-07    0
2024-03-08    0
2024-03-09    0
2024-03-10   -1
2024-03-11    0
Name: Signal, Length: 1531, dtype: int64

### Split the Data into Training and Testing Sets

In [21]:
# Split data into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [22]:
y_train.value_counts()

Signal
 0    1072
 1      41
-1      35
Name: count, dtype: int64

In [23]:
# Create a StandardScaler instance
scaler = StandardScaler()
 
#Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [24]:
# Import required libraries
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report

In [25]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2020-01-02 00:00:00


In [26]:
# Select the ending period for the training data with an offset of 24 months
training_end = X.index.min() + DateOffset(months=24)

# Display the training end date
print(training_end)

2022-01-02 00:00:00


In [27]:
# Generate the X_train and y_train DataFrames
X_train_scaled = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

In [28]:
X_train_scaled

Unnamed: 0_level_0,Close,daily returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-02,6985.470215,-0.029819
2020-01-03,7344.884277,0.051452
2020-01-04,7410.656738,0.008955
2020-01-05,7411.317383,0.000089
2020-01-06,7769.219238,0.048291
...,...,...
2021-12-29,46444.710938,-0.024042
2021-12-30,47178.125000,0.015791
2021-12-31,46306.445313,-0.018476
2022-01-01,47686.812500,0.029809


In [29]:
# Generate the X_test and y_test DataFrames
X_test_scaled = X.loc[training_end:]
y_test= y.loc[training_end:]

In [30]:
y_test.value_counts()

Signal
 0    745
-1     28
 1     27
Name: count, dtype: int64

In [31]:
# Import SMOTE from imblearn
from imblearn.over_sampling import SMOTE

# Instantiate the SMOTE instance 
# Set the sampling_strategy parameter equal to auto
smote_sampler = SMOTE(random_state=1, sampling_strategy='auto')

In [32]:
# Fit the training data to the cluster centroids model
X_resampled, y_resampled =  smote_sampler.fit_resample(X_train_scaled, y_train)

In [33]:
# Count distinct values for the resampled target data
y_resampled.value_counts()

Signal
 0    682
 1    682
-1    682
Name: count, dtype: int64

## Import BalancedRandomForestClassifier from imblearn

In [35]:
# Import BalancedRandomForestClassifier from imblearn
from imblearn.ensemble import BalancedRandomForestClassifier

In [36]:
# Instantiate a BalancedRandomForestClassifier instance
brf = BalancedRandomForestClassifier()

In [37]:
# Fit the model to the training data
brf.fit(X_resampled, y_resampled)

  warn(
  warn(
  warn(


In [38]:
# Predict labels for testing features
y_pred = brf.predict(X_test_scaled)

# Display the predictions
y_pred

array([-1,  0, -1,  1, -1,  1, -1, -1,  1,  1,  0,  0,  0, -1, -1, -1,  0,
       -1,  0,  0, -1, -1,  1,  0,  0,  0,  0, -1,  0,  0,  0,  0,  1,  0,
       -1,  1, -1,  0, -1, -1,  0,  1,  1,  0, -1, -1,  1, -1,  0,  0, -1,
        0, -1,  1,  1,  0,  0,  0,  0, -1,  0,  0,  0,  0,  0,  1,  0,  0,
       -1,  0,  0,  0,  0,  0,  1,  1,  0,  0,  1,  0,  0, -1, -1,  0,  0,
        1,  1,  0,  1,  1, -1,  1,  0,  0,  1,  0,  0,  0,  0,  0,  0,  1,
        0,  0,  1,  0,  0,  1,  1, -1,  0,  0,  1,  1,  0,  0,  0,  1,  0,
        1, -1, -1,  0,  0,  0,  0, -1,  0,  1,  0,  0,  0,  1,  1,  0,  1,
        0,  1,  0,  0,  1,  0,  1,  0,  0,  0,  0,  1,  0,  0,  0,  1,  0,
        0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1,  0,  1,  0,
       -1,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  1,
       -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  0,  0,  0,  0,  0,
        0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  1,  0,  0,  0

In [39]:
# Generate a classification report using the training data and the logistic regression model's predications
y_pred_training_report = classification_report(y_test, y_pred)

# Review the classification report
print(y_pred_training_report)

              precision    recall  f1-score   support

          -1       0.06      0.25      0.09        28
           0       0.94      0.67      0.79       745
           1       0.05      0.30      0.09        27

    accuracy                           0.65       800
   macro avg       0.35      0.41      0.32       800
weighted avg       0.88      0.65      0.74       800



In [40]:
# Create a new empty predictions DataFrame
predictions_df = pd.DataFrame(index=X_test_scaled.index)
predictions_df["predicted_signal"] = y_pred
predictions_df["actual_returns"] = moon_signals_df["daily returns"].copy()
predictions_df["trading_algorithm_returns"] = predictions_df["actual_returns"] * predictions_df["predicted_signal"]
predictions_df.head()

Unnamed: 0_level_0,predicted_signal,actual_returns,trading_algorithm_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-01-02,-1,-0.007163,0.007163
2022-01-03,0,-0.018737,-0.0
2022-01-04,-1,-0.012066,0.012066
2022-01-05,1,-0.050734,-0.050734
2022-01-06,-1,-0.009366,0.009366


In [41]:
# Calculate and plot the cumulative returns for the `actual_returns` and the `trading_algorithm_returns`
(1 + predictions_df[["actual_returns", "trading_algorithm_returns"]]).cumprod().hvplot(width=1000,
    height=400).opts(title="Cumulative Returns Comparison")