In [5]:
from sklearn import datasets 
import pandas as pd 
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import StratifiedKFold
from sklearn.tree import DecisionTreeClassifier

In [6]:
data = datasets.load_breast_cancer(return_X_y=False, as_frame=True)
X: pd.DataFrame = data["data"]
y: pd.Series = data["target"]

In [14]:
def evaluate_selection(X: pd.DataFrame, y: pd.Series, cv: int = 10):
    average_accuracy = 0
    average_f1 = 0

    for i, (train_index, test_index) in enumerate(
        StratifiedKFold(n_splits=cv, shuffle=True).split(X, y)
    ):

        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        model = DecisionTreeClassifier()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)

        average_accuracy += accuracy
        average_f1 += f1

    average_accuracy /= cv
    average_f1 /= cv

    return average_accuracy, average_f1

In [76]:
avg_acc, avg_f1 = 0, 0
for _ in range(100):
    acc, f1 = evaluate_selection(X, y)
    avg_acc += acc
    avg_f1 += f1 

avg_acc /= 100
avg_f1 /= 100 

print(avg_acc, avg_f1)

0.9270178571428568 0.9414571724742632


In [78]:
columns = ['mean texture', 'mean perimeter', 'mean smoothness', 'mean symmetry', 'mean fractal dimension', 'compactness error', 'symmetry error', 'fractal dimension error', 'worst area', 'worst smoothness', 'worst fractal dimension']
X_small = X[columns]

In [79]:
avg_acc, avg_f1 = 0, 0
for _ in range(100):
    acc, f1 = evaluate_selection(X_small, y)
    avg_acc += acc
    avg_f1 += f1 

avg_acc /= 100
avg_f1 /= 100 

print(avg_acc, avg_f1)

0.932750939849624 0.9461452708571712
