# Breast Cancer Classification Assessment

## Objective
The objective of this assessment is to evaluate the understanding and application of supervised learning techniques to the breast cancer dataset.

## 1. Loading and Preprocessing 


In [8]:
#Load the Dataset
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer

# Load the breast cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Convert to DataFrame for easier manipulation
df = pd.DataFrame(X, columns=data.feature_names)
df['target'] = y


In [5]:
from sklearn.preprocessing import StandardScaler
# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

## 2.Classification Algorithm Implementation

In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

#split the data set
X_train,X_test,y_train,y_test=train_test_split(X_scaled,y,test_size=0.2,random_state=42)

#Logistic regression
log_reg=LogisticRegression()
log_reg.fit(X_train,y_train)
y_pred_log_reg=log_reg.predict(X_test)
accuracy_log_reg=accuracy_score(y_test,y_pred_log_reg)

In [14]:
from sklearn.tree import DecisionTreeClassifier

#Decision Tree Classifier
Dt_classifier=DecisionTreeClassifier()
Dt_classifier.fit(X_train,y_train)
y_pred_Dt=Dt_classifier.predict(X_test)
accuracy_dt=accuracy_score(y_pred_Dt,y_test)

In [15]:
from sklearn.ensemble import RandomForestClassifier

#Random Forest Classifier
rf_classifier=RandomForestClassifier()
rf_classifier=rf_classifier.fit(X_train,y_train)
y_pred_rf=rf_classifier.predict(X_test)
accuracy_rf=accuracy_score(y_pred_rf,y_test)

In [17]:
from sklearn.svm import SVC

#Support Vector Classifier
svm_classifier=SVC()
svm_classifier=svm_classifier.fit(X_train,y_train)
y_pred_svm=svm_classifier.predict(X_test)
accuracy_svm=accuracy_score(y_pred_svm,y_test)

In [21]:
from sklearn.neighbors import KNeighborsClassifier

#KNeighbors Classifier
knn_Classifier=KNeighborsClassifier()
knn_Classifier.fit(X_train,y_train)
y_pred_knn=knn_Classifier.predict(X_test)
accuracy_knn=accuracy_score(y_test,y_pred_knn)

In [22]:
# Compare the accuracies
accuracies = {
    'Logistic Regression': accuracy_log_reg,
    'Decision Tree': accuracy_dt,
    'Random Forest': accuracy_rf,
    'SVM': accuracy_svm,
    'k-NN': accuracy_knn
}

# Display the results
for model, accuracy in accuracies.items():
    print(f"{model}: {accuracy:.4f}")

# Identify the best and worst performing models

Logistic Regression: 0.9737
Decision Tree: 0.9386
Random Forest: 0.9649
SVM: 0.9737
k-NN: 0.9474
