In [8]:
import numpy as np
import pandas as pd

from sklearn.model_selection import GridSearchCV, KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, recall_score, roc_auc_score

In [9]:
data10 = pd.read_csv(r'C:\Users\ddaga\Desktop\Assorted\Projects\HIGGSBosonCOMP4730\Dataset\higgs10k.csv')

In [10]:
X = data10.iloc[:, 1:]
y = data10.iloc[:, 0]

In [11]:
kf = KFold(n_splits=10, shuffle=True, random_state=42)

param_grid = {
    'penalty': ['l1', 'l2', 'elasticnet', None],
    'solver': ['liblinear', 'newton-cg', 'newton-cholesky', 'lbfgs', 'sag', 'saga']  
}

lr = LogisticRegression(random_state=42)

grid_search = GridSearchCV(estimator=lr, param_grid=param_grid, cv=kf, scoring='accuracy')

accuracies = []
recalls = []
auc_scores = []

In [12]:
for train_index, test_index in kf.split(X):

    X_train, X_test, y_train, y_test = X.iloc[train_index], X.iloc[test_index], y.iloc[train_index], y.iloc[test_index]

    grid_search.fit(X_train, y_train)

    best_lr = grid_search.best_estimator_
    
    y_scores = best_lr.decision_function(X_test)

    y_pred = best_lr.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_scores)
    
    accuracies.append(accuracy)
    recalls.append(recall)
    auc_scores.append(auc)



170 fits failed out of a total of 240.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
10 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\ddaga\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\ddaga\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1091, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "c:\Users\ddaga\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 61, in _check_solver
    raise ValueError(
ValueError: Solver new

In [14]:
print("Average Accuracy:", np.mean(accuracies))
print("Average Recall:", np.mean(recalls))
print("Average AUC:", np.mean(auc_scores))

print("Best Hyperparameters:", grid_search.best_params_)

Average Accuracy: 0.6432
Average Recall: 0.7294082365358013
Average AUC: 0.6838490603408139
Best Hyperparameters: {'penalty': 'l1', 'solver': 'saga'}
