In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import warnings
 
# Load the dataset from the file
data = pd.read_csv('ml_dataset.csv')
 
# Handle missing values in the 'bmi' column
data['bmi'] = data['bmi'].replace('N/A', np.nan).astype(float)
 
# Encode categorical variables using label encoding
label_encoder = LabelEncoder()
data['gender'] = label_encoder.fit_transform(data['gender'])
data['ever_married'] = label_encoder.fit_transform(data['ever_married'])
data['work_type'] = label_encoder.fit_transform(data['work_type'])
data['Residence_type'] = label_encoder.fit_transform(data['Residence_type'])
data['smoking_status'] = label_encoder.fit_transform(data['smoking_status'])
 
# Define the features (X) and target variable (y)
X = data.drop(columns=['id', 'stroke'])
y = data['stroke']
 
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
# Impute missing values in the 'bmi' column with the mean
imputer = SimpleImputer(strategy='mean')
X_train['bmi'] = imputer.fit_transform(X_train[['bmi']])
X_test['bmi'] = imputer.transform(X_test[['bmi']])
 
# Create an SVM classifier
clf = svm.SVC()
 
# Fit the SVM model on the training data
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=UserWarning)
    clf.fit(X_train, y_train)
 
# Get the support vectors
support_vectors = clf.support_vectors_
print("Support Vectors:\n", support_vectors)
 
# Test the accuracy of the SVM classifier on the test set
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=UserWarning)
    y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
 
# Generate a classification report
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=UserWarning)
    classification_rep = classification_report(y_test, y_pred)
print("Classification Report:\n", classification_rep)

Support Vectors:
 [[  0.          62.           0.         ...  88.32        36.3
    0.        ]
 [  0.          79.           0.         ...  93.89        30.4
    2.        ]
 [  0.          81.           0.         ...  71.91        19.2
    0.        ]
 ...
 [  1.          57.           0.         ... 197.28        34.5
    1.        ]
 [  1.          61.           0.         ... 209.86        28.95231434
    0.        ]
 [  0.          73.           1.         ... 190.14        36.5
    2.        ]]
Accuracy: 0.9393346379647749
Classification Report:
               precision    recall  f1-score   support

           0       0.94      1.00      0.97       960
           1       0.00      0.00      0.00        62

    accuracy                           0.94      1022
   macro avg       0.47      0.50      0.48      1022
weighted avg       0.88      0.94      0.91      1022

