In [1]:
# set up working catalog
import sys
from pathlib import Path
project_path = str(Path().cwd().parent.parent.resolve())
if project_path not in sys.path:
    sys.path.append(project_path)

# imports
from common.utils import get_datasets, X_TRAIN, Y_TRAIN, X_TEST, Y_TEST

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

In [2]:
datasets = get_datasets()

In [3]:
pipeline = Pipeline([
    ("poly", PolynomialFeatures(include_bias=False)), 
    ("clf", LogisticRegression())
])

param_grid = [
    {
        "poly__degree": [1, 2],
        "clf__penalty": ["l1", "l2"],
        "clf__C": [0.1, 1],
        "clf__solver": ["liblinear"],
        "clf__max_iter": [200, 400, 600]
    },
    {
        "poly__degree": [1, 2],
        "clf__penalty": [None],
        "clf__C": [0.1, 1],
        "clf__solver": ["lbfgs"],
        "clf__max_iter": [200, 400, 600]
    }
]

grid = GridSearchCV(pipeline, param_grid, cv=5, scoring="accuracy", n_jobs=-1)

for dataset in datasets:
    grid.fit(dataset[X_TRAIN], dataset[Y_TRAIN])
    
    print(f"Best params: {grid.best_params_}")
    print(f"Train accuracy: {grid.score(dataset[X_TRAIN], dataset[Y_TRAIN])}")
    print(f"Test accuracy: {grid.score(dataset[X_TEST], dataset[Y_TEST])}")
    print()
    

Best params: {'clf__C': 1, 'clf__max_iter': 200, 'clf__penalty': 'l1', 'clf__solver': 'liblinear', 'poly__degree': 1}
Train accuracy: 0.8023058663953883
Test accuracy: 0.7803389830508475

Best params: {'clf__C': 1, 'clf__max_iter': 200, 'clf__penalty': 'l2', 'clf__solver': 'liblinear', 'poly__degree': 1}
Train accuracy: 0.800949474398101
Test accuracy: 0.7783050847457628

Best params: {'clf__C': 1, 'clf__max_iter': 400, 'clf__penalty': 'l1', 'clf__solver': 'liblinear', 'poly__degree': 1}
Train accuracy: 0.8016949152542373
Test accuracy: 0.7659430122116689

