In [3]:
import os
os.environ['KAGGLE_CONFIG_DIR'] ='/content'
!kaggle datasets download -d jillanisofttech/brain-stroke-dataset


Downloading brain-stroke-dataset.zip to /content
  0% 0.00/47.2k [00:00<?, ?B/s]
100% 47.2k/47.2k [00:00<00:00, 70.2MB/s]


In [4]:
!unzip \*.zip && rm *.zip

Archive:  brain-stroke-dataset.zip
  inflating: brain_stroke.csv        


In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

class KNNEnsemble:
    def __init__(self, n_estimators=10, n_neighbors=5):
        self.n_estimators = n_estimators
        self.n_neighbors = n_neighbors
        self.models = []

    def fit(self, X, y):
        for _ in range(self.n_estimators):
            # Create a KNN classifier
            knn = KNeighborsClassifier(n_neighbors=self.n_neighbors)
            # Train the KNN classifier on the full training data
            knn.fit(X, y)
            self.models.append(knn)
        return self

    def predict(self, X):
        # Make predictions with each model
        predictions = np.array([model.predict(X) for model in self.models])
        # Aggregate predictions using majority voting
        y_pred = np.apply_along_axis(lambda x: np.argmax(np.bincount(x)), axis=0, arr=predictions)
        return y_pred


# Load the dataset (Replace 'your_dataset.csv' with your actual dataset file)
data = pd.read_csv('/content/brain_stroke.csv')

# Handle missing values if any

# Convert categorical variables to numerical using one-hot encoding
data = pd.get_dummies(data, columns=['gender', 'ever_married', 'work_type', 'Residence_type', 'smoking_status'])

# Split the dataset into features and target variable
X = data.drop(columns=['stroke'])  # Features
y = data['stroke']  # Target variable

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Perform feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the KNN ensemble classifier
knn_ensemble = KNNEnsemble(n_estimators=10, n_neighbors=5)
knn_ensemble.fit(X_train_scaled, y_train)

# Make predictions on the test set using the KNN ensemble classifier
ensemble_predictions = knn_ensemble.predict(X_test_scaled)

# Calculate evaluation metrics for the ensemble classifier
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
ensemble_precision = precision_score(y_test, ensemble_predictions)
ensemble_recall = recall_score(y_test, ensemble_predictions)
ensemble_f1 = f1_score(y_test, ensemble_predictions)

# Print the evaluation metrics
print("Ensemble Accuracy:", ensemble_accuracy)
print("Ensemble Precision:", ensemble_precision)
print("Ensemble Recall:", ensemble_recall)
print("Ensemble F1-score:", ensemble_f1)


Ensemble Accuracy: 0.9428284854563691
Ensemble Precision: 0.2
Ensemble Recall: 0.018518518518518517
Ensemble F1-score: 0.03389830508474576
