In [6]:
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Define numerical features to scale
numerical_features = X.select_dtypes(include=np.number).columns

# Create a column transformer to apply scaling to numerical features
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features)
    ],
    remainder='passthrough'
)

# Create pipelines for each classifier with preprocessing
clf1_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                               ('classifier', DecisionTreeClassifier(random_state=42))])

clf2_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                               ('classifier', LogisticRegression(max_iter=1000))])

# K-Fold Cross Validation
k_folds = KFold(n_splits=10, shuffle=True, random_state=42)

# Cross validation scores with pipelines
scores1_scaled = cross_val_score(clf1_pipeline, X, y, cv=k_folds)
scores2_scaled = cross_val_score(clf2_pipeline, X, y, cv=k_folds)

# Results
print("Decision Tree (Scaled Data) - Cross Validation Scores: ", scores1_scaled)
print("Logistic Regression (Scaled Data) - Cross Validation Scores: ", scores2_scaled)

print("Decision Tree (Scaled Data) - Average CV Score: ", scores1_scaled.mean())
print("Logistic Regression (Scaled Data) - Average CV Score: ", scores2_scaled.mean())

Decision Tree (Scaled Data) - Cross Validation Scores:  [0.63861386 0.60199005 0.6318408  0.6318408  0.64179104 0.55721393
 0.62686567 0.59701493 0.64676617 0.62189055]
Logistic Regression (Scaled Data) - Cross Validation Scores:  [0.60891089 0.53731343 0.6119403  0.60696517 0.64676617 0.6119403
 0.52736318 0.58706468 0.60696517 0.6318408 ]
Decision Tree (Scaled Data) - Average CV Score:  0.6195827791734396
Logistic Regression (Scaled Data) - Average CV Score:  0.5977070095069208
