## Import Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report, recall_score, precision_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from bayes_opt import BayesianOptimization
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
# import cupy as cp

## Data

In [None]:
data = pd.read_csv('/kaggle/input/btcusd-ta/BTCUSD_TA.csv')
data.set_index('Datetime', inplace=True)
data['Return'] = data['Close'].shift(-1) / data['Close'] - 1
data['Label'] = data['Return'].apply(lambda x: 0 if x <= 0 else 1)
data = data.loc['2014-01-01':'2024-01-01']

In [None]:
X, y = data.drop(columns=['Return', 'Label']), data['Label']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
pca = PCA(n_components=50)
X_pca = pca.fit_transform(X_scaled)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.3)

## Classification

In [None]:
model = XGBClassifier(max_depth = 11)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Create ConfusionMatrixDisplay
disp = ConfusionMatrixDisplay(confusion_matrix=cm)

# Plot confusion matrix
disp.plot(cmap='Blues', values_format='d')
plt.title('Confusion Matrix')
plt.show()

In [None]:
y_true = data.loc['2020-12-31':'2023-12-31', 'Close']

position1 = np.where(y_pred == 0, -1, y_pred)
curr = position1[0]
position2 = [curr]
for i in range(1, len(position1)):
    if curr == 0 or curr == position1[i]:
        curr = position1[i]
    else:
        curr = 0
    position2.append(curr)

In [None]:
val = 1
portf = [1]
for i in range(1, len(y_true)):
    val = val * (1 + position2[i-1] * (y_true[i] - y_true[i-1]) / y_true[i-1])
    portf.append(val)

In [None]:
buy_and_hold = y_true / y_true[0]
dates = pd.date_range(start='2021-01-01', end='2024-01-01', freq='D')

# Create a new figure
plt.figure(figsize=(10, 6))

# Plot the data
plt.plot(dates, buy_and_hold, color='red', label='Buy and Hold')
plt.plot(dates, portf, color='black', label='XG Boost')

# Set major formatter and locator for the x-axis
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%Y'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=3))

# Add a legend
plt.legend()

# Automatically format x-axis labels for better readability
plt.gcf().autofmt_xdate()

# Show the plot
plt.show()