In [1]:
import os
os.environ['KAGGLE_CONFIG_DIR'] ='/content'
!kaggle datasets download -d yasserh/breast-cancer-dataset
!unzip \*.zip && rm *.zip

Downloading breast-cancer-dataset.zip to /content
  0% 0.00/48.6k [00:00<?, ?B/s]
100% 48.6k/48.6k [00:00<00:00, 1.48MB/s]
Archive:  breast-cancer-dataset.zip
  inflating: breast-cancer.csv       


In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KernelDensity, KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Step 1: Load Data
data = pd.read_csv('/content/breast-cancer.csv')

# Assuming 'diagnosis' is the target variable
X = data[['radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean',
          'smoothness_mean', 'compactness_mean', 'concavity_mean', 'concave points_mean']]
y = data['diagnosis']  # Target variable

# Step 2: Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 4: Density Estimation using KDE
kde = KernelDensity(bandwidth=1.0, kernel='gaussian')
kde.fit(X_train_scaled)

# Step 5: Adaptive Neighbor Selection
local_densities = np.exp(kde.score_samples(X_train_scaled))
adaptive_k_values = np.ceil(local_densities * len(X_train_scaled) / np.sum(local_densities)).astype(int)

# Step 6: Model Training and Evaluation
y_pred = []
for i, x_test_point in enumerate(X_test_scaled):
    # Find K nearest neighbors using adaptive K value
    knn_model = KNeighborsClassifier(n_neighbors=adaptive_k_values[i])
    knn_model.fit(X_train_scaled, y_train)
    y_pred.append(knn_model.predict([x_test_point])[0])

# Step 7: Model Evaluation
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9385964912280702
Precision: 0.939058795637743
Recall: 0.9385964912280702
F1 Score: 0.9381219979737462
Classification Report:
               precision    recall  f1-score   support

           B       0.93      0.97      0.95        71
           M       0.95      0.88      0.92        43

    accuracy                           0.94       114
   macro avg       0.94      0.93      0.93       114
weighted avg       0.94      0.94      0.94       114

