# Breast cancer Dataset from sklearn

### Import necessary libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
print("Done!")

### Loading dataset

In [27]:
breast_cancer_data = load_breast_cancer()
X = breast_cancer_data.data
y = breast_cancer_data.target


### Splitting data into training and testing sets

In [28]:
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state= 42,test_size=0.3)

### Training the RandomForestClassifier(Bagging)

In [19]:
# Initialize RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100,random_state =42)

# Train the model
rf.fit(X_train,y_train)

#Make predictions on the test set
predictions = rf.predict(X_test)

#Evaluation of Accuracy
from sklearn.metrics import precision_score,recall_score,f1_score,accuracy_score

accuracy = accuracy_score(y_test,predictions)*100 
precision = precision_score(y_test,predictions)*100 
recall = recall_score(y_test,predictions)*100 
f1 = f1_score(y_test,predictions)*100 

print("Accuracy Results using different metrics")
print(f"Accuracy_score: {accuracy:.2f}%")

# Measure out of the predicted positves how many are actually positive.
print(f"Precision_score: {precision:.2f}%")

# Measures out of the total actual positives how many are true positives.
print(f"Recall_score: {recall:.2f}%")

# Combines recall and precision
print(f"F1_score: {f1:.2f}%")

[1 0 0 1 1 0 0 0 0 1 1 0 1 0 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0
 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 1 1 0 0 1 0
 1 1 1 1 1 1 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0
 1 1 0 1 0 1 1 1 0 1 1 1 0 1 0 0 1 1 0 0 0 1 1 1 0 1 1 1 0 1 0 1 1 0 1 0 0
 0 1 0 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1]
Accuracy Results using different metrics
Accuracy_score: 97.08%
Precision_score: 96.40%
Recall_score: 99.07%
F1_score: 97.72%


### Boosting Implementation

In [25]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

# Initialize the base model(weak learner)
base_model = DecisionTreeClassifier(max_depth = 1)

# Initialize AdaBoostClassifier
ada_clf = AdaBoostClassifier(estimator=base_model, n_estimators=50, random_state=42)

# Train the AdaBoost model
ada_clf.fit(X_train,y_train)

# Make predictions
predictions = ada_clf.predict(X_test)

#Evaluation of Accuracy
from sklearn.metrics import precision_score,recall_score,f1_score,accuracy_score

accuracy = accuracy_score(y_test,predictions)*100 
precision = precision_score(y_test,predictions)*100 
recall = recall_score(y_test,predictions)*100 
f1 = f1_score(y_test,predictions)*100 

print("Accuracy Results using different metrics")
print(f"Accuracy_score: {accuracy:.2f}%")

# Measure out of the predicted positves how many are actually positive.
print(f"Precision_score: {precision:.2f}%")

# Measures out of the total actual positives how many are true positives.
print(f"Recall_score: {recall:.2f}%")

# Combines recall and precision
print(f"F1_score: {f1:.2f}%")



Accuracy Results using different metrics
Accuracy_score: 97.66%
Precision_score: 98.15%
Recall_score: 98.15%
F1_score: 98.15%


### Stacking Implementation

In [None]:
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

# Initialize base models
base_models = [
    ('dt', DecisionTreeClassifier(max_depth=3)),  # Decision Tree
    ('svc', SVC(kernel='linear', probability=True))  # Support Vector Classifier
]

# Initialize meta-learner (Logistic Regression)
meta_model = LogisticRegression()

# Create StackingClassifier
stack_clf = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)

# Train the stacking model
stack_clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred_stack = stack_clf.predict(X_test)

#Evaluation of Accuracy
from sklearn.metrics import precision_score,recall_score,f1_score,accuracy_score

accuracy = accuracy_score(y_test,predictions)*100 
precision = precision_score(y_test,predictions)*100 
recall = recall_score(y_test,predictions)*100 
f1 = f1_score(y_test,predictions)*100 

print("Accuracy Results using different metrics")
print(f"Accuracy_score: {accuracy:.2f}%")

# Measure out of the predicted positves how many are actually positive.
print(f"Precision_score: {precision:.2f}%")

# Measures out of the total actual positives how many are true positives.
print(f"Recall_score: {recall:.2f}%")

# Combines recall and precision
print(f"F1_score: {f1:.2f}%")