In [5]:
%matplotlib inline

# import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from  sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from scipy.stats import uniform

In [6]:
from sklearn.datasets import make_classification

In [7]:
### Create the dataset
X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=15)

In [8]:
X

array([[-0.3779567 ,  1.04389498,  1.04349443, ..., -0.0671922 ,
         0.17547148, -1.04964564],
       [-0.32525851,  1.27626282, -0.68612327, ...,  1.00663329,
        -0.83369182,  0.95774417],
       [ 0.73901891, -0.60090284, -0.17729436, ..., -0.21898072,
         0.87864296, -1.25774001],
       ...,
       [ 0.67556288, -0.53841971, -1.29950008, ...,  2.04333597,
         0.94738793,  0.79035376],
       [ 2.62971021, -2.45289885, -1.35978523, ...,  0.37889809,
        -1.97189411, -0.2522504 ],
       [-1.79149103, -0.12190773,  0.53515332, ..., -1.94135733,
         0.58900166, -1.00748218]])

In [27]:
# Scale the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [28]:
X

array([[-0.35529194,  1.00852517,  0.99206944, ..., -0.12716169,
         0.19107744, -0.94483281],
       [-0.30369468,  1.24001827, -0.72653825, ...,  0.95517885,
        -0.81792129,  0.82447397],
       [ 0.73834878, -0.63008083, -0.22094833, ..., -0.28015384,
         0.89413423, -1.1282465 ],
       ...,
       [ 0.67621842, -0.56783292, -1.33601056, ...,  2.0001022 ,
         0.96286799,  0.67693662],
       [ 2.58954139, -2.47510503, -1.39591197, ...,  0.32246594,
        -1.95593796, -0.24201126],
       [-1.73929594, -0.15288889,  0.48696421, ..., -2.01618825,
         0.60454017, -0.90767007]])

In [29]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
# Define parameter distributions for RandomizedSearchCV
param_distributions = {
    'C': uniform(loc=0.001, scale=10),  # Randomly sample C between 0.001 and 10
    'penalty': ['l1', 'l2'],           # L1 and L2 regularization
    'solver': ['liblinear']            # Required for L1 penalty
}

In [31]:
## Create a Logistic Regression Model
log_reg = LogisticRegression()

In [32]:
# Set up RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=log_reg,
    param_distributions=param_distributions,
    n_iter=20,             # Number of random combinations to try
    scoring='accuracy',    # Metric for evaluation
    cv=5,                  # 5-fold cross-validation
    random_state=42,       # For reproducibility
    verbose=1
)

In [33]:
# Fit RandomizedSearchCV
random_search.fit(X_train, y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits


In [34]:
# Display the best parameters and best score
print(f"Best Parameters: {random_search.best_params_}")
print(f"Best Cross-validation Accuracy: {random_search.best_score_:.2f}")

Best Parameters: {'C': 0.5818361216819946, 'penalty': 'l2', 'solver': 'liblinear'}
Best Cross-validation Accuracy: 0.91


In [35]:
## Evaluate the best model on the test set
best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test)

In [36]:
y_pred

array([0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
       1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0,
       0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1,
       0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 0])

In [38]:
## calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.2f}")

Test Accuracy: 0.93


In [41]:
## Display the confusion metrix 
conf_matrix = confusion_matrix(y_test, y_pred)
print("confision Matrix: ")
print(conf_matrix)

confision Matrix: 
[[96  5]
 [ 9 90]]


In [42]:
## Display the classification report
report = classification_report(y_test, y_pred)
print("Classification Report: ")
print(report)

Classification Report: 
              precision    recall  f1-score   support

           0       0.91      0.95      0.93       101
           1       0.95      0.91      0.93        99

    accuracy                           0.93       200
   macro avg       0.93      0.93      0.93       200
weighted avg       0.93      0.93      0.93       200

