In [1]:
import numpy as np
from sklearn.model_selection import RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
from scipy.stats import uniform, randint
import pandas as pd

In [2]:
X_train = pd.read_csv('X_train_preprocessed.csv')
X_test = pd.read_csv('X_test_preprocessed.csv')
y_train = pd.read_csv('y_train_preprocessed.csv')
y_test = pd.read_csv('y_test_preprocessed.csv')

In [None]:
pipeline = Pipeline([
    ('classifier', LogisticRegression())
])

In [3]:
param_dist = [
    {
        'classifier': [LogisticRegression()],
        'classifier__C': uniform(0.1, 10),
        'classifier__solver': ['liblinear', 'lbfgs']
    },
    {
        'classifier': [RandomForestClassifier()],
        'classifier__n_estimators': randint(50, 300),
        'classifier__max_depth': randint(5, 30)
    },
    {
        'classifier': [GradientBoostingClassifier()],
        'classifier__n_estimators': randint(50, 200),
        'classifier__learning_rate': uniform(0.01, 0.2),
        'classifier__max_depth': randint(3, 7)
    },
    {
        'classifier': [XGBClassifier(use_label_encoder=False, eval_metric='logloss')],
        'classifier__n_estimators': randint(50, 200),
        'classifier__learning_rate': uniform(0.01, 0.2),
        'classifier__max_depth': randint(3, 7)
    },
    {
        'classifier': [RidgeClassifier()],
        'classifier__alpha': uniform(0.1, 10)
    },
    {
        'classifier': [SVC()],
        'classifier__kernel': ['linear', 'rbf'],
        'classifier__C': uniform(0.1, 10)
    }
]

In [5]:
random_search = RandomizedSearchCV(
    pipeline,
    param_distributions=param_dist,
    n_iter=100,
    cv=3,
    scoring='accuracy',
    n_jobs=-1,
    verbose=2,
    random_state=42
)

In [None]:
random_search.fit(X_train, y_train.values.ravel())

In [None]:
print("Best parameters:", random_search.best_params_)
print("Best cross-validation score:", random_search.best_score_)

In [None]:
y_pred = random_search.predict(X_test)
print("Test set accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))