In [1]:
import os
os.environ['KAGGLE_CONFIG_DIR'] ='/content'
!kaggle datasets download -d harshghadiya/kidneystone
!unzip \*.zip && rm *.zip

Downloading kidneystone.zip to /content
  0% 0.00/1.89k [00:00<?, ?B/s]
100% 1.89k/1.89k [00:00<00:00, 4.18MB/s]
Archive:  kidneystone.zip
  inflating: kidney-stone-dataset.csv  


In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Step 1: Load Data
data = pd.read_csv('/content/kidney-stone-dataset.csv')  # Replace '/path/to/your/dataset.csv' with the actual path

# Step 2: Prepare Data
X = data[['gravity', 'ph', 'osmo', 'cond', 'urea', 'calc']]
y = data['target']

# Step 3: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 5: Train multiple KNN models
n_models = 5  # Number of models in the ensemble
k = 5  # Number of neighbors for each model
models = []
for i in range(n_models):
    # Randomly sample a subset of the training data
    indices = np.random.choice(len(X_train_scaled), size=len(X_train_scaled), replace=True)
    X_train_subset = X_train_scaled[indices]
    y_train_subset = y_train.iloc[indices]
    # Train a KNN model on the subset
    knn_model = KNeighborsClassifier(n_neighbors=k)
    knn_model.fit(X_train_subset, y_train_subset)
    models.append(knn_model)

# Step 6: Make predictions with each model
y_preds = []
for model in models:
    y_pred = model.predict(X_test_scaled)
    y_preds.append(y_pred)

# Step 7: Aggregate predictions
# Here we will use simple majority voting for classification
y_pred_ensemble = np.mean(y_preds, axis=0) >= 0.5  # Assuming binary classification

# Step 8: Evaluate the ensemble model
accuracy = accuracy_score(y_test, y_pred_ensemble)
precision = precision_score(y_test, y_pred_ensemble, average='weighted')
recall = recall_score(y_test, y_pred_ensemble, average='weighted')
f1 = f1_score(y_test, y_pred_ensemble, average='weighted')

# Display evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

# Display classification report
print("\nClassification Report:\n", classification_report(y_test, y_pred_ensemble))


Accuracy: 0.6666666666666666
Precision: 0.7000000000000001
Recall: 0.6666666666666666
F1 Score: 0.6753246753246753

Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.67      0.73        12
           1       0.50      0.67      0.57         6

    accuracy                           0.67        18
   macro avg       0.65      0.67      0.65        18
weighted avg       0.70      0.67      0.68        18

