In [1]:
# ==============================================================================
# Import modules
# ==============================================================================

import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# ==============================================================================
# Load data and select features Split into training and test data
# ==============================================================================
data = datasets.load_breast_cancer()
X = data.data
y = data.target

test_accuracies = []
train_accuracies = []
C = 1e-5
for rs in range(1, 101):
    # Split data into training and test data (70% training, 30% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=rs, stratify=y)

    # Standardize features
    sc = StandardScaler()
    sc.fit(X_train)
    X_train_sc = sc.transform(X_train)
    X_test_sc = sc.transform(X_test)

    clf = LogisticRegression(penalty="l2", solver="liblinear", random_state=1, C=C)
    clf.fit(X_train_sc, y_train)

    test_score = clf.score(X_test_sc, y_test)
    train_score = clf.score(X_train_sc, y_train)
    test_accuracies.append(test_score)
    train_accuracies.append(train_score)

print(
    f"Average test accuracy across 100 splits for C={C}"
    f" is {np.mean(test_accuracies):.3f} +/- {np.std(test_accuracies):.3f}"
)
print(
    f"Average train accuracy across 100 splits for C={C}"
    f" is {np.mean(train_accuracies):.3f} +/- {np.std(train_accuracies):.3f}"
)

Average test accuracy across 100 splits for C=1e-05 is 0.933 +/- 0.018
Average train accuracy across 100 splits for C=1e-05 is 0.933 +/- 0.006
