<a href="https://colab.research.google.com/github/223016012-cyber/-Breast-Cancer-Diagnosis-Predictions/blob/main/assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report


df = pd.read_csv("Dataset.csv")


df["diagnosis_encoded"] = df["diagnosis"].map({"M": 1, "B": 0})

columns_to_drop = [col for col in ["id", "Unnamed: 32"] if col in df.columns]
df = df.drop(columns=columns_to_drop)

feature_cols = [col for col in df.columns if col not in ["diagnosis", "diagnosis_encoded"]]

scaler = MinMaxScaler()
df[feature_cols] = scaler.fit_transform(df[feature_cols])


X = df[feature_cols]
y = df["diagnosis_encoded"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42, stratify=y
)


kmeans = KMeans(n_clusters=2, random_state=42, n_init=10)
cluster_labels = kmeans.fit_predict(X)

print("\n--- K-Means Cluster vs True Labels ---")
print(pd.crosstab(cluster_labels, y, rownames=["Cluster"], colnames=["Actual"]))


knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)


y_pred = knn.predict(X_test)


print("\n--- KNN Classification Results ---")
print("Accuracy  :", accuracy_score(y_test, y_pred))
print("Precision :", precision_score(y_test, y_pred, zero_division=0))
print("Recall    :", recall_score(y_test, y_pred, zero_division=0))
print("F1-score  :", f1_score(y_test, y_pred, zero_division=0))

print("\n--- Classification Report ---")
print(classification_report(y_test, y_pred))

print("\n--- Confusion Matrix ---")
print(confusion_matrix(y_test, y_pred))



--- K-Means Cluster vs True Labels ---
Actual     0    1
Cluster          
0          9  180
1        348   32

--- KNN Classification Results ---
Accuracy  : 0.9649122807017544
Precision : 1.0
Recall    : 0.9047619047619048
F1-score  : 0.95

--- Classification Report ---
              precision    recall  f1-score   support

           0       0.95      1.00      0.97        72
           1       1.00      0.90      0.95        42

    accuracy                           0.96       114
   macro avg       0.97      0.95      0.96       114
weighted avg       0.97      0.96      0.96       114


--- Confusion Matrix ---
[[72  0]
 [ 4 38]]
