In [1]:
import os
os.environ['KAGGLE_CONFIG_DIR'] ='/content'
!kaggle datasets download -d jillanisofttech/brain-stroke-dataset
!unzip \*.zip && rm *.zip

Downloading brain-stroke-dataset.zip to /content
  0% 0.00/47.2k [00:00<?, ?B/s]
100% 47.2k/47.2k [00:00<00:00, 51.2MB/s]
Archive:  brain-stroke-dataset.zip
  inflating: brain_stroke.csv        


In [8]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from scipy import stats

# Load the dataset
data = pd.read_csv('/content/brain_stroke.csv')

# Handle missing values if any
data = data.dropna()

# Convert categorical variables to numeric
label_encoder = LabelEncoder()
data['gender'] = label_encoder.fit_transform(data['gender'])
data['ever_married'] = label_encoder.fit_transform(data['ever_married'])
data['work_type'] = label_encoder.fit_transform(data['work_type'])
data['Residence_type'] = label_encoder.fit_transform(data['Residence_type'])
data['smoking_status'] = label_encoder.fit_transform(data['smoking_status'])

# Split the dataset into features and target variable
X = data.drop(columns=['stroke'])  # Features
y = data['stroke']  # Target variable

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Generalized Mean Distance KNN implementation
# Generalized Mean Distance KNN implementation
class GMDKNN:
    def __init__(self, n_neighbors=5, p=2):
        self.n_neighbors = n_neighbors
        self.p = p

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
        return self

    def predict(self, X):
        y_pred = []
        for i in range(X.shape[0]):
            distances = np.linalg.norm(X[i] - self.X_train, ord=self.p, axis=1)
            nearest_indices = np.argsort(distances)[:self.n_neighbors]
            nearest_labels = self.y_train.iloc[nearest_indices]  # Use iloc to access labels by index
            mode, _ = stats.mode(nearest_labels)
            y_pred.append(mode[0])  # Corrected this line
        return np.array(y_pred)


# Initialize and train the Generalized Mean Distance KNN classifier
gmd_knn = GMDKNN(n_neighbors=5, p=2)
gmd_knn.fit(X_train_scaled, y_train)

# Make predictions on the test set using Generalized Mean Distance KNN classifier
gmd_knn_predictions = gmd_knn.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, gmd_knn_predictions)
precision = precision_score(y_test, gmd_knn_predictions)
recall = recall_score(y_test, gmd_knn_predictions)
f1 = f1_score(y_test, gmd_knn_predictions)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


IndexError: invalid index to scalar variable.