In [3]:
sklearn.__version__

'1.5.2'

In [207]:
import sklearn
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import warnings

from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

warnings.filterwarnings('ignore')

In [149]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
car_evaluation = fetch_ucirepo(id=19) 
  
# data (as pandas dataframes) 
X = car_evaluation.data.features 
y = car_evaluation.data.targets 



In [150]:
y['class'].value_counts()

class
unacc    1210
acc       384
good       69
vgood      65
Name: count, dtype: int64

In [151]:
y['class'] = y['class'].apply(lambda x : 0 if x == "unacc" else 1) # np.where(y['class']=='unacc',0,1)


In [152]:
y['class'].value_counts(normalize=True)*100

class
0    70.023148
1    29.976852
Name: proportion, dtype: float64

In [153]:
X_train, X_test, y_train, y_test = train_test_split(X,y, 
                                                    test_size=0.3, 
                                                    random_state=24,
                                                   stratify = y['class'])

In [154]:
y_train['class'].value_counts(normalize=True)

class
0    0.700579
1    0.299421
Name: proportion, dtype: float64

In [155]:
y_test['class'].value_counts(normalize=True)

class
0    0.699422
1    0.300578
Name: proportion, dtype: float64

In [156]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((1209, 6), (519, 6), (1209, 1), (519, 1))

In [157]:
ohe = OneHotEncoder(drop='first', 
                   handle_unknown='ignore',
                   sparse_output=False
                   ).set_output(transform='pandas')

X_trn_ohe = ohe.fit_transform(X_train)
X_tst_ohe = ohe.transform(X_test)

In [167]:
log = LogisticRegression(solver='saga')
log.fit(X_trn_ohe, y_train)
y_pred = log.predict(X_tst_ohe)
accuracy_score(y_test, y_pred)

0.9633911368015414

In [169]:
y_train.value_counts()

class
0        847
1        362
Name: count, dtype: int64

In [171]:
518*0.7002

362.70360000000005

In [173]:
518 - 363

155

In [176]:
lr = LogisticRegression(solver = 'lbfgs', penalty = 'l2')
lr.fit(X_trn_ohe, y_train)
y_pred = lr.predict(X_tst_ohe)
accuracy_score(y_test, y_pred)

0.9633911368015414

In [178]:
lr = LogisticRegression(solver = 'lbfgs', penalty = None)
lr.fit(X_trn_ohe, y_train)
y_pred = lr.predict(X_tst_ohe)
accuracy_score(y_test, y_pred)

0.9653179190751445

In [180]:
lr = LogisticRegression(solver = 'liblinear', penalty = 'l1')
lr.fit(X_trn_ohe, y_train)
y_pred = lr.predict(X_tst_ohe)
accuracy_score(y_test, y_pred)

0.9653179190751445

In [182]:
lr = LogisticRegression(solver = 'liblinear', penalty = 'l2')
lr.fit(X_trn_ohe, y_train)
y_pred = lr.predict(X_tst_ohe)
accuracy_score(y_test, y_pred)

0.9595375722543352

In [184]:
lr = LogisticRegression(solver = 'newton-cg', penalty = 'l2')
lr.fit(X_trn_ohe, y_train)
y_pred = lr.predict(X_tst_ohe)
accuracy_score(y_test, y_pred)

0.9633911368015414

In [186]:
lr = LogisticRegression(solver = 'newton-cg', penalty = None)
lr.fit(X_trn_ohe, y_train)
y_pred = lr.predict(X_tst_ohe)
accuracy_score(y_test, y_pred)

0.9653179190751445

In [188]:
lr = LogisticRegression(solver = 'newton-cholesky', penalty = 'l2')
lr.fit(X_trn_ohe, y_train)
y_pred = lr.predict(X_tst_ohe)
accuracy_score(y_test, y_pred)

0.9633911368015414

In [190]:
lr = LogisticRegression(solver = 'newton-cholesky', penalty = None)
lr.fit(X_trn_ohe, y_train)
y_pred = lr.predict(X_tst_ohe)
accuracy_score(y_test, y_pred)

0.9653179190751445

In [192]:
lr = LogisticRegression(solver = 'sag', penalty = 'l2')
lr.fit(X_trn_ohe, y_train)
y_pred = lr.predict(X_tst_ohe)
accuracy_score(y_test, y_pred)

0.9633911368015414

In [194]:
lr = LogisticRegression(solver = 'sag', penalty = None)
lr.fit(X_trn_ohe, y_train)
y_pred = lr.predict(X_tst_ohe)
accuracy_score(y_test, y_pred)

0.9653179190751445

In [196]:
lr = LogisticRegression(solver = 'saga', penalty = None)
lr.fit(X_trn_ohe, y_train)
y_pred = lr.predict(X_tst_ohe)
accuracy_score(y_test, y_pred)

0.9653179190751445

In [198]:
lr = LogisticRegression(solver = 'saga', penalty = 'l2')
lr.fit(X_trn_ohe, y_train)
y_pred = lr.predict(X_tst_ohe)
accuracy_score(y_test, y_pred)

0.9633911368015414

In [200]:
lr = LogisticRegression(solver = 'saga', penalty = 'l1')
lr.fit(X_trn_ohe, y_train)
y_pred = lr.predict(X_tst_ohe)
accuracy_score(y_test, y_pred)

0.9672447013487476

In [202]:
sagalr = LogisticRegression(solver = 'saga', penalty = "elasticnet")
lr.fit(X_trn_ohe, y_train)
y_pred = lr.predict(X_tst_ohe)
accuracy_score(y_test, y_pred)

0.9672447013487476

In [231]:
ohe = OneHotEncoder( drop = 'first',
                   handle_unknown = 'ignore',
                   sparse_output = False)

lr = LogisticRegression()
pipe = Pipeline([("OHE", ohe), ("LR", lr)])
params = {"LR__solver" : ["liblinear", "lbfgs", "newton-cg", "newton-cholesky", "sag", "saga"]}
gcv = GridSearchCV(pipe, param_grid = params)
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)



{'LR__solver': 'newton-cg'}
0.829761246544358
