In [38]:
%%bash
pip install xgboost



In [39]:
import pandas as pd

technical_indicators_binary = pd.read_csv('/home/jovyan/technical_indicators_binary.csv', index_col=0)

In [40]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
from xgboost import XGBClassifier
from sklearn.model_selection import KFold, RandomizedSearchCV
from sklearn.feature_selection import RFECV
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score, cross_val_predict

# Assuming 'technical_indicators_binary' is the DataFrame containing the features and the binary label

# Extract the features and the target variable
X = technical_indicators_binary.drop('Label', axis=1)
y = technical_indicators_binary['Label']

# Define the hyperparameter search space
param_dist = {
    'n_estimators': np.arange(50, 1000, 50),
    'learning_rate': np.logspace(-3, 0, 10),
    'max_depth': np.arange(3, 10),
    'min_child_weight': np.arange(1, 6),
    'subsample': np.linspace(0.5, 1, 6),
    'colsample_bytree': np.linspace(0.5, 1, 6),
    'gamma': np.logspace(-3, 0, 10),
}

# Initialize the XGBoost classifier
xgb_classifier = XGBClassifier(use_label_encoder=False, eval_metric='logloss')

# Create a RandomizedSearchCV object with the XGBoost classifier
random_search = RandomizedSearchCV(xgb_classifier, param_distributions=param_dist, n_iter=100, scoring='accuracy', cv=KFold(10), n_jobs=-1, random_state=42)

# Fit the RandomizedSearchCV object to the data
random_search.fit(X, y)

# Train the final XGBoost classifier with the best hyperparameters
best_xgb_classifier = random_search.best_estimator_

# Create the RFE object with cross-validation and the best XGBoost classifier as the estimator
rfecv = RFECV(estimator=best_xgb_classifier, step=1, cv=KFold(10), scoring='accuracy', n_jobs=-1)

# Fit the RFE object to the data
rfecv.fit(X, y)

# Print the optimal number of features
print("Optimal number of features : %d" % rfecv.n_features_)

# Get the selected features
selected_features = X.columns[rfecv.support_]
print("Selected features:", selected_features)

# Perform cross-validation with the selected features
cv_scores = cross_val_score(best_xgb_classifier, X[selected_features], y, cv=KFold(10))

# Print the mean cross-validation score
print("Mean cross-validation score: ", np.mean(cv_scores))

# Get the classification report using cross_val_predict
y_pred = cross_val_predict(best_xgb_classifier, X[selected_features], y, cv=KFold(10))
print(classification_report(y, y_pred))




Optimal number of features : 3
Selected features: Index(['MACD_signal', 'ADX', 'Price_ROC'], dtype='object')
Mean cross-validation score:  0.6107564973227113
              precision    recall  f1-score   support

           0       0.58      0.30      0.39      1051
           1       0.62      0.84      0.71      1428

    accuracy                           0.61      2479
   macro avg       0.60      0.57      0.55      2479
weighted avg       0.60      0.61      0.58      2479

