In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [None]:
df=load_breast_cancer()

In [None]:
x,y=df.data,df.target

In [None]:
x

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [None]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [None]:
f_names=df.feature_names
t_names=df.target_names

In [None]:
f_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [None]:
t_names

array(['malignant', 'benign'], dtype='<U9')

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [None]:
p1=Pipeline([
    ('scaler',StandardScaler()),
    ('lr',LogisticRegression(random_state=42,solver='liblinear'))
])

In [None]:
p2=Pipeline([
    ('scaler',StandardScaler()),
    ('dt',DecisionTreeClassifier(random_state=42,max_depth=4))
])


In [None]:
p3= Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(probability=True, random_state=42))
])

p4 = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier(n_neighbors=5))
])

In [None]:
base_models=[('lr',p1),('dt',p2),('svc',p3),('knn',p4)]

In [None]:
for name,model in base_models:
    model.fit(x_train,y_train)
    y_pred=model.predict(x_test)
    accuracy=accuracy_score(y_test,y_pred)
    print(f"Accuracy of {name}: {accuracy:.3f}")
    print(f"Classification Report of {name}:\n{classification_report(y_test,y_pred)}")

Accuracy of lr: 0.974
Classification Report of lr:
              precision    recall  f1-score   support

           0       0.98      0.95      0.96        43
           1       0.97      0.99      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

Accuracy of dt: 0.947
Classification Report of dt:
              precision    recall  f1-score   support

           0       0.93      0.93      0.93        43
           1       0.96      0.96      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114

Accuracy of svc: 0.982
Classification Report of svc:
              precision    recall  f1-score   support

           0       1.00      0.95      0.98        43
           1       0.97      1.00      0.99        71

    accuracy               

In [None]:
meta_model=LogisticRegression(random_state=44,solver='liblinear')


In [None]:
stacked_model=StackingClassifier(estimators=base_models,
                                 final_estimator=meta_model,
                                 cv=5,
                                 passthrough=False,
                                 n_jobs=-1,
                                 verbose=1
)

In [None]:
stacked_model.fit(x_train,y_train)

In [None]:
y_pred_stacked = stacked_model.predict(x_test)
accuracy_stacked = accuracy_score(y_test, y_pred_stacked)


In [None]:
accuracy_stacked

0.9736842105263158

In [None]:
print(f"Stacked Model Accuracy on test set: {accuracy_stacked:.4f}")
print(f"Stacked Model Classification Report:\n{classification_report(y_test, y_pred_stacked, target_names=t_names)}")


Stacked Model Accuracy on test set: 0.9737
Stacked Model Classification Report:
              precision    recall  f1-score   support

   malignant       0.98      0.95      0.96        43
      benign       0.97      0.99      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114



In [None]:
for name,model in base_models:
  scores=cross_val_score(model,x,y,cv=5,scoring='accuracy',n_jobs=-1)
  print(f"Model: {name}, CV Accuracy: {np.mean(scores):.4f} (+/- {np.std(scores):.4f})")


Model: lr, CV Accuracy: 0.9789 (+/- 0.0070)
Model: dt, CV Accuracy: 0.9209 (+/- 0.0202)
Model: svc, CV Accuracy: 0.9736 (+/- 0.0147)
Model: knn, CV Accuracy: 0.9649 (+/- 0.0096)


In [None]:
stacked_scores = cross_val_score(stacked_model, x, y, cv=5, scoring='accuracy', n_jobs=-1)
print(f"Stacked Model CV Accuracy: {np.mean(stacked_scores):.4f} (+/- {np.std(stacked_scores):.4f})")


Stacked Model CV Accuracy: 0.9737 (+/- 0.0078)


In [None]:
param_grid = {
    'lr__lr__C': [0.1, 1.0, 10.0],
    'dt__dt__max_depth': [3, 5, 7],
    'svc__svc__C': [0.1, 1.0, 10.0],
    'knn__knn__n_neighbors': [3, 5, 7],
    'final_estimator__C': [0.1, 1.0, 10.0], # Parameter for the meta-model
    'passthrough': [True, False] # Parameter for StackingClassifier itself
}

In [None]:
grid=GridSearchCV(
    estimator=stacked_model,
    param_grid=param_grid,
    cv=5,
    n_jobs=-1,
    verbose=1
)

In [None]:
# After you have initialized your stacked_model:
# stacked_model = StackingClassifier(...)

print("Tunable parameters of the stacked model:")
for param_name in stacked_model.get_params().keys():
    print(param_name)

Tunable parameters of the stacked model:
cv
estimators
final_estimator__C
final_estimator__class_weight
final_estimator__dual
final_estimator__fit_intercept
final_estimator__intercept_scaling
final_estimator__l1_ratio
final_estimator__max_iter
final_estimator__multi_class
final_estimator__n_jobs
final_estimator__penalty
final_estimator__random_state
final_estimator__solver
final_estimator__tol
final_estimator__verbose
final_estimator__warm_start
final_estimator
n_jobs
passthrough
stack_method
verbose
lr
dt
svc
knn
lr__memory
lr__steps
lr__transform_input
lr__verbose
lr__scaler
lr__lr
lr__scaler__copy
lr__scaler__with_mean
lr__scaler__with_std
lr__lr__C
lr__lr__class_weight
lr__lr__dual
lr__lr__fit_intercept
lr__lr__intercept_scaling
lr__lr__l1_ratio
lr__lr__max_iter
lr__lr__multi_class
lr__lr__n_jobs
lr__lr__penalty
lr__lr__random_state
lr__lr__solver
lr__lr__tol
lr__lr__verbose
lr__lr__warm_start
dt__memory
dt__steps
dt__transform_input
dt__verbose
dt__scaler
dt__dt
dt__scaler__copy
d

In [None]:
  grid.fit(x_train,y_train)

Fitting 5 folds for each of 486 candidates, totalling 2430 fits


In [None]:
best_model=grid.best_estimator_

In [None]:
best_model

In [None]:
y_pred_best_stacked = best_model.predict(x_test)
accuracy_best_stacked = accuracy_score(y_test, y_pred_best_stacked)


In [None]:
accuracy_best_stacked

0.9649122807017544

In [None]:
print(f"\nBest Stacked Model Accuracy on test set (from GridSearchCV): {accuracy_best_stacked:.4f}")
print(f"Best Stacked Model Classification Report:\n{classification_report(y_test, y_pred_best_stacked, target_names=t_names)}")



Best Stacked Model Accuracy on test set (from GridSearchCV): 0.9649
Best Stacked Model Classification Report:
              precision    recall  f1-score   support

   malignant       0.95      0.95      0.95        43
      benign       0.97      0.97      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.96      0.96       114
weighted avg       0.96      0.96      0.96       114



In [None]:
user_input_features_indices = [0, 1, 2, 3, 4]
user_input_feature_names = [
    "mean radius", "mean texture", "mean perimeter", "mean area", "mean smoothness"
]

# Create an array for the new sample, initialized with mean values from training data
new_sample = x_train.mean(axis=0)

# Get user input for selected features
for i, feature_idx in enumerate(user_input_features_indices):
    while True:
        try:
            value = float(input(f"Enter value for '{f_names[feature_idx]}' ({user_input_feature_names[i]}): "))
            new_sample[feature_idx] = value
            break
        except ValueError:
            print("Invalid input. Please enter a numeric value.")

# Reshape the single sample to a 2D array as expected by predict method
new_sample_reshaped = new_sample.reshape(1, -1)

print("\nMaking prediction with the best stacked model...")
# Predict the class for the new sample
predicted_class = best_model.predict(new_sample_reshaped)[0]
predicted_class_label = t_names[predicted_class]

print(f"\nPredicted class: {predicted_class} ({predicted_class_label})")
print("--- Prediction Complete ---")

Enter value for 'mean radius' (mean radius): 11.5
Enter value for 'mean texture' (mean texture): 15.0
Enter value for 'mean perimeter' (mean perimeter): 85.0
Enter value for 'mean area' (mean area): 449.0
Enter value for 'mean smoothness' (mean smoothness): 0.12

Making prediction with the best stacked model...

Predicted class: 1 (benign)
--- Prediction Complete ---
