In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, roc_auc_score
import pandas as pd
import numpy as np

In [6]:
cancer = load_breast_cancer()
df = pd.DataFrame(data=cancer.data, columns=cancer.feature_names)
scaled_df = StandardScaler().fit_transform(df)

X_train, X_test, y_train, y_test = train_test_split(scaled_df, cancer.target, test_size=0.2)

lr = LogisticRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

print("acc : {0:.3f}".format(accuracy_score(y_test, y_pred)))
print("roc auc : {0:.3f}".format(roc_auc_score(y_test, y_pred)))

acc : 0.974
roc auc : 0.971


In [9]:
params = {
    "penalty" : ["l2", "l1"],
    "C" : [0.01, 0.1, 1, 5, 10]
}

lr = LogisticRegression()
gs = GridSearchCV(lr, param_grid=params, cv=5, n_jobs=-1, scoring="accuracy")
gs.fit(scaled_df, cancer.target)
print("best param : {}".format(gs.best_params_))
print("best acc : {}".format(gs.best_score_))

best param : {'C': 1, 'penalty': 'l2'}
best acc : 0.9806862288464524
