In [18]:
# 🔧 Step 1: Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# 📊 Step 2: Generate Synthetic Binary Classification Data
# Creating 1000 samples with 10 features and 2 classes (binary)
X, y = make_classification(n_samples=1000, 
                           n_features=10, 
                           n_classes=2, 
                           random_state=10)

# ✂️ Step 3: Train-Test Split (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=42
)

# ✅ Step 4: Define Hyperparameters for Grid Search

# Different values for C (inverse of regularization strength)
C_values = [100, 10, 1.0, 0.1, 0.01]

# Grid of parameters with only valid combinations (critical!)
# Each dictionary contains compatible combinations of penalty, solver, and optional l1_ratio
param_grid = {'penalty': ['l1', 'l2', 'elasticnet'],
 'C': [100, 10, 1.0, 0.1, 0.01],
 'solver': ['liblinear', 'saga', 'newton-cg', 'lbfgs', 'sag']
 }

# ⚙️ Step 5: Initialize Model
model = LogisticRegression()

# 🔁 Step 6: Cross-Validation Strategy
# StratifiedKFold ensures equal class distribution in each fold
cv = StratifiedKFold()

# 🔍 Step 7: Perform Grid Search
grid = GridSearchCV(
    estimator=model,          # Our base model
    param_grid=param_grid,    # Hyperparameter combinations to try
    scoring='accuracy',       # Metric to evaluate model performance
    cv=cv,                    # Cross-validation strategy
    n_jobs=-1,                # Run in parallel (all CPUs)
    verbose=1                 # Print progress
)

# 🚂 Step 8: Fit the model with training data and find best parameters
grid.fit(X_train, y_train)

# 🏆 Step 9: Get Best Parameters and Accuracy
print("Best Parameters:", grid.best_params_)
print("Best Cross-Validation Accuracy:", grid.best_score_)

# 🔮 Step 10: Make Predictions with Best Model
y_pred = grid.predict(X_test)

# 🧮 Step 11: Evaluate Model on Test Data
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Fitting 5 folds for each of 75 candidates, totalling 375 fits
# Best Parameters: {'C': 0.1, 'penalty': 'l1', 'solver': 'saga'}
# Best Cross-Validation Accuracy: 0.8571428571428571
# Test Accuracy: 0.8766666666666667
# Confusion Matrix:
#  [[128  20]
#  [ 17 135]]
# Classification Report:
#                precision    recall  f1-score   support

#            0       0.88      0.86      0.87       148
#            1       0.87      0.89      0.88       152

#     accuracy                           0.88       300
#    macro avg       0.88      0.88      0.88       300
# weighted avg       0.88      0.88      0.88       300


# •	GridSearchCV: Tries every combination of parameters and tells you the best one.
# •	StratifiedKFold: Ensures all folds in cross-validation have equal distribution of classes.



Fitting 5 folds for each of 75 candidates, totalling 375 fits
Best Parameters: {'C': 0.1, 'penalty': 'l1', 'solver': 'saga'}
Best Cross-Validation Accuracy: 0.8571428571428571
Test Accuracy: 0.8766666666666667
Confusion Matrix:
 [[128  20]
 [ 17 135]]
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.86      0.87       148
           1       0.87      0.89      0.88       152

    accuracy                           0.88       300
   macro avg       0.88      0.88      0.88       300
weighted avg       0.88      0.88      0.88       300



200 fits failed out of a total of 375.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\sahus\OneDrive\Desktop\Complete Data Science\venv\lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\sahus\OneDrive\Desktop\Complete Data Science\venv\lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\sahus\OneDrive\Desktop\Complete Data Science\venv\lib\site-packages\sklearn\linear_model\_logistic.py", line 1193, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
 

## GGGG