In [1]:
import os
os.environ['KAGGLE_CONFIG_DIR'] ='/content'
!kaggle datasets download -d jillanisofttech/brain-stroke-dataset
!unzip \*.zip && rm *.zip

Downloading brain-stroke-dataset.zip to /content
  0% 0.00/47.2k [00:00<?, ?B/s]
100% 47.2k/47.2k [00:00<00:00, 2.60MB/s]
Archive:  brain-stroke-dataset.zip
  inflating: brain_stroke.csv        


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import VotingClassifier

# Load the dataset from the CSV file
data = pd.read_csv('/content/brain_stroke.csv')

# Encode categorical variables
label_encoder = LabelEncoder()
data['gender'] = label_encoder.fit_transform(data['gender'])
data['ever_married'] = label_encoder.fit_transform(data['ever_married'])
data['work_type'] = label_encoder.fit_transform(data['work_type'])
data['Residence_type'] = label_encoder.fit_transform(data['Residence_type'])
data['smoking_status'] = label_encoder.fit_transform(data['smoking_status'])

# Split the dataset into features (X) and target variable (y)
X = data.drop(columns=['stroke'])
y = data['stroke']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize individual KNN classifiers
knn1 = KNeighborsClassifier(n_neighbors=5)
knn2 = KNeighborsClassifier(n_neighbors=7)
knn3 = KNeighborsClassifier(n_neighbors=9)

# Create an ensemble of KNN classifiers
ensemble = VotingClassifier(estimators=[('knn1', knn1), ('knn2', knn2), ('knn3', knn3)], voting='hard')

# Train the ensemble classifier
ensemble.fit(X_train_scaled, y_train)

# Make predictions on the test set
ensemble_predictions = ensemble.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, ensemble_predictions)
precision = precision_score(y_test, ensemble_predictions)
recall = recall_score(y_test, ensemble_predictions)
f1 = f1_score(y_test, ensemble_predictions)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


Accuracy: 0.9438314944834504
Precision: 0.25
Recall: 0.018518518518518517
F1 Score: 0.034482758620689655
