In [1]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer


In [11]:
# Load breast cancer dataset
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target



In [3]:
# Check for missing values
print("Missing values:", df.isnull().sum().sum())


Missing values: 0


In [4]:
# Impute missing values (if any)
imputer = SimpleImputer(strategy='mean')
df.iloc[:, :-1] = imputer.fit_transform(df.iloc[:, :-1])



In [5]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df.drop('target', axis=1), df['target'], test_size=0.2, random_state=42)



In [6]:
# Feature scaling using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



Preprocessing Explanation

.Loaded breast cancer dataset and checked missing values
.Split data into training (80%) and testing sets (20%).
.Applied StandardScaler for feature scaling to ensure equal weightage and improve model performance.



Classification Algorithm Implementation 

Logistic Regression


In [7]:

from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train_scaled, y_train)
lr_pred = lr_model.predict(X_test_scaled)




Decision Tree Classifier


In [12]:
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train_scaled, y_train)
dt_pred = dt_model.predict(X_test_scaled)



Random Forest Classifier


In [13]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)
rf_pred = rf_model.predict(X_test_scaled)

 Support Vector Machine (SVM)


In [16]:
from sklearn.svm import SVC

svm_model = SVC(random_state=42)
svm_model.fit(X_train_scaled, y_train)
svm_pred = svm_model.predict(X_test_scaled)


k-Nearest Neighbors (k-NN)


In [14]:
from sklearn.neighbors import KNeighborsClassifier

knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train_scaled, y_train)
knn_pred = knn_model.predict(X_test_scaled)


Model Comparison 

In [17]:
from sklearn.metrics import accuracy_score, classification_report

models = ['Logistic Regression', 'Decision Tree', 'Random Forest', 'SVM', 'k-NN']
accuracy = [
    accuracy_score(y_test, lr_pred),
    accuracy_score(y_test, dt_pred),
    accuracy_score(y_test, rf_pred),
    accuracy_score(y_test, svm_pred),
    accuracy_score(y_test, knn_pred)
]

print("Model Accuracy:")
for model, acc in zip(models, accuracy):
    print(f"{model}: {acc:.3f}")

print("\nClassification Reports:")
for model, pred in zip(models, [lr_pred, dt_pred, rf_pred, svm_pred, knn_pred]):
    print(f"\n{model}:")
    print(classification_report(y_test, pred))



Model Accuracy:
Logistic Regression: 0.974
Decision Tree: 0.947
Random Forest: 0.965
SVM: 0.982
k-NN: 0.947

Classification Reports:

Logistic Regression:
              precision    recall  f1-score   support

           0       0.98      0.95      0.96        43
           1       0.97      0.99      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114


Decision Tree:
              precision    recall  f1-score   support

           0       0.93      0.93      0.93        43
           1       0.96      0.96      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114


Random Forest:
              precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97       

Model Comparison Explanation

Calculated accuracy scores for each model.
Generated classification reports for precision, recall, and F1-score.

