In [19]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report
import numpy as np
import pandas as pd

technical_indicators_binary = pd.read_csv('/home/jovyan/technical_indicators_binary.csv', index_col=0)

In [18]:
# Drop rows with NaN values
technical_indicators_binary.dropna(inplace=True)

# Separate features and labels
X = technical_indicators_binary.drop('Label', axis=1)
y = technical_indicators_binary['Label']

# Create the logistic regression model
logistic_regression_model = LogisticRegression()

# Initialize the RFE selector
selector = RFE(logistic_regression_model, n_features_to_select=5, step=1)

# Fit the selector to the data
selector = selector.fit(X, y)

# Get the selected features
selected_features = X.columns[selector.support_]

print("Selected features:")
print(selected_features)

# Get the feature subset based on the selected features
X_selected = selector.transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.3, random_state=42)

# Train the logistic regression model with the selected features
logistic_regression_model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = logistic_regression_model.predict(X_test)

# Print the classification report
report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)

# Perform 10-fold cross-validation
cv_scores = cross_val_score(logistic_regression_model, X_selected, y, cv=10)

# Calculate the average accuracy across the 10 folds
mean_accuracy = np.mean(cv_scores)

print("10-fold cross-validation accuracy: {:.2f}".format(mean_accuracy))


Selected features:
Index(['RSI', 'ADX', 'Stochastic_Oscillator', 'Williams_R', 'OBV'], dtype='object')
Classification Report:
              precision    recall  f1-score   support

           0       0.25      0.00      0.01       321
           1       0.57      0.99      0.72       423

    accuracy                           0.57       744
   macro avg       0.41      0.50      0.36       744
weighted avg       0.43      0.57      0.41       744

10-fold cross-validation accuracy: 0.58
