In [18]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import warnings
warnings.filterwarnings('ignore')

# Generate a sample dataset and save it as a CSV file
data = {
    'age': [25, 30, 35, 40],
    'income': [50000, 60000, 70000, 80000],
    'education': ['Bachelors', 'Masters', 'PhD', 'Bachelors'],
    'target': [0, 1, 0, 1]
}
df = pd.DataFrame(data)
df.to_csv('data.csv', index=False)

# Load the dataset
df = pd.read_csv('data.csv')

# Preprocess the data
df = pd.get_dummies(df, columns=['education'], drop_first=True)  # Encode categorical variables
X = df.drop('target', axis=1)  # Define features
y = df['target']  # Define target variable

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Define the parameter grid with valid combinations of penalties and solvers
param_grid = [
    {'penalty': ['l2'], 'solver': ['lbfgs'], 'C': [0.1, 1, 10, 100]},
    {'penalty': ['l1'], 'solver': ['liblinear'], 'C': [0.1, 1, 10, 100]},
    {'penalty': ['elasticnet'], 'solver': ['saga'], 'C': [0.1, 1, 10, 100], 'l1_ratio': [0.5]},
    {'penalty': ['none'], 'solver': ['lbfgs'], 'C': [0.1, 1, 10, 100]}
]

# Initialize and perform GridSearchCV with reduced cv
grid_search = GridSearchCV(
    estimator=LogisticRegression(random_state=42, max_iter=1000),
    param_grid=param_grid,
    cv=2,  # Reduce cv due to limited data
    scoring='accuracy',
    n_jobs=-1
)
grid_search.fit(X_train, y_train)

# Retrieve the best parameters and model
best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

print(f'Best Parameters: {best_params}')

# Evaluate the best model
y_pred_best = best_estimator.predict(X_test)

accuracy_best = accuracy_score(y_test, y_pred_best)
conf_matrix_best = confusion_matrix(y_test, y_pred_best)
class_report_best = classification_report(y_test, y_pred_best)

print(f'Best Model Accuracy: {accuracy_best}')
print(f'Best Model Confusion Matrix:\n{conf_matrix_best}')
print(f'Best Model Classification Report:\n{class_report_best}')

Best Parameters: {'C': 0.1, 'penalty': 'l2', 'solver': 'lbfgs'}
Best Model Accuracy: 0.0
Best Model Confusion Matrix:
[[0 0]
 [1 0]]
Best Model Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       0.0
           1       0.00      0.00      0.00       1.0

    accuracy                           0.00       1.0
   macro avg       0.00      0.00      0.00       1.0
weighted avg       0.00      0.00      0.00       1.0

