In [1]:
import os
os.environ['KAGGLE_CONFIG_DIR'] ='/content'
!kaggle datasets download -d erdemtaha/cancer-data
!unzip \*.zip && rm *.zip

Downloading cancer-data.zip to /content
  0% 0.00/48.6k [00:00<?, ?B/s]
100% 48.6k/48.6k [00:00<00:00, 34.5MB/s]
Archive:  cancer-data.zip
  inflating: Cancer_Data.csv         


In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Step 1: Load Data
data = pd.read_csv('/content/Cancer_Data.csv')  # Replace '/path/to/your/dataset.csv' with the actual path

# Step 2: Prepare Data
X = data[['radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean',
          'smoothness_mean', 'compactness_mean', 'concavity_mean', 'concave points_mean']]
y = data['diagnosis']  # Target variable

# Step 3: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 5: Find Mutual Nearest Neighbors
def find_mutual_neighbors(X_train):
    n_samples = X_train.shape[0]
    mutual_indices = np.zeros((n_samples, n_samples), dtype=bool)
    for i, x in enumerate(X_train):
        distances = np.linalg.norm(X_train - x, axis=1)
        sorted_indices = np.argsort(distances)
        for idx in sorted_indices[:k]:
            if np.argwhere(sorted_indices == i)[0][0] in np.argwhere(sorted_indices[:k] == idx):
                mutual_indices[i][idx] = True
    return mutual_indices

k = 5  # Number of neighbors
mutual_neighbors = find_mutual_neighbors(X_train_scaled)

# Step 6: Train the MKNN model
mknn_models = []
for i in range(len(mutual_neighbors)):
    mutual_indices = np.where(mutual_neighbors[i])[0]
    X_train_mutual = X_train_scaled[mutual_indices]
    y_train_mutual = y_train.iloc[mutual_indices]
    knn_model = KNeighborsClassifier(n_neighbors=k)
    knn_model.fit(X_train_mutual, y_train_mutual)
    mknn_models.append(knn_model)

# Step 7: Make predictions using the MKNN model
y_pred = []
for x_test_point in X_test_scaled:
    mutual_index = np.argmin(np.linalg.norm(X_train_scaled - x_test_point, axis=1))
    if isinstance(mutual_index, np.ndarray):
        mutual_index = mutual_index[0]  # Extracting the scalar index
    knn_model = mknn_models[mutual_index]
    # Reduce the number of neighbors to the minimum between 5 and the number of samples in the training set
    n_neighbors = min(5, len(X_train_scaled))
    knn_model.n_neighbors = n_neighbors  # Update the number of neighbors
    pred_label = knn_model.predict([x_test_point])[0]
    y_pred.append(pred_label)

# Step 8: Evaluate the model (same as before)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Display evaluation metrics (same as before)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

# Display classification report (same as before)
print("\nClassification Report:\n", classification_report(y_test, y_pred))



ValueError: Expected n_neighbors <= n_samples,  but n_samples = 1, n_neighbors = 5