In [298]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

In [299]:
df = pd.read_csv('stroke.csv')

In [300]:
df.head()

Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,9046,Male,67.0,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
1,51676,Female,61.0,0,0,Yes,Self-employed,Rural,202.21,,never smoked,1
2,31112,Male,80.0,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1
3,60182,Female,49.0,0,0,Yes,Private,Urban,171.23,34.4,smokes,1
4,1665,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1


In [301]:
df.isnull().sum()

id                     0
gender                 0
age                    0
hypertension           0
heart_disease          0
ever_married           0
work_type              0
Residence_type         0
avg_glucose_level      0
bmi                  201
smoking_status         0
stroke                 0
dtype: int64

In [302]:
bmi_mean = df['bmi'].mean()

In [303]:
bmi_mean

28.893236911794666

In [304]:
bmi_median = df['bmi'].median()

In [305]:
bmi_median

28.1

In [306]:
stroke_1 = df.fillna(bmi_mean)

In [307]:
stroke_1

Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,9046,Male,67.0,0,1,Yes,Private,Urban,228.69,36.600000,formerly smoked,1
1,51676,Female,61.0,0,0,Yes,Self-employed,Rural,202.21,28.893237,never smoked,1
2,31112,Male,80.0,0,1,Yes,Private,Rural,105.92,32.500000,never smoked,1
3,60182,Female,49.0,0,0,Yes,Private,Urban,171.23,34.400000,smokes,1
4,1665,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24.000000,never smoked,1
...,...,...,...,...,...,...,...,...,...,...,...,...
5105,18234,Female,80.0,1,0,Yes,Private,Urban,83.75,28.893237,never smoked,0
5106,44873,Female,81.0,0,0,Yes,Self-employed,Urban,125.20,40.000000,never smoked,0
5107,19723,Female,35.0,0,0,Yes,Self-employed,Rural,82.99,30.600000,never smoked,0
5108,37544,Male,51.0,0,0,Yes,Private,Rural,166.29,25.600000,formerly smoked,0


In [308]:
stroke = stroke_1.drop(columns='id',axis=1)

In [309]:
stroke.isnull().sum()

gender               0
age                  0
hypertension         0
heart_disease        0
ever_married         0
work_type            0
Residence_type       0
avg_glucose_level    0
bmi                  0
smoking_status       0
stroke               0
dtype: int64

In [310]:
stroke

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,Male,67.0,0,1,Yes,Private,Urban,228.69,36.600000,formerly smoked,1
1,Female,61.0,0,0,Yes,Self-employed,Rural,202.21,28.893237,never smoked,1
2,Male,80.0,0,1,Yes,Private,Rural,105.92,32.500000,never smoked,1
3,Female,49.0,0,0,Yes,Private,Urban,171.23,34.400000,smokes,1
4,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24.000000,never smoked,1
...,...,...,...,...,...,...,...,...,...,...,...
5105,Female,80.0,1,0,Yes,Private,Urban,83.75,28.893237,never smoked,0
5106,Female,81.0,0,0,Yes,Self-employed,Urban,125.20,40.000000,never smoked,0
5107,Female,35.0,0,0,Yes,Self-employed,Rural,82.99,30.600000,never smoked,0
5108,Male,51.0,0,0,Yes,Private,Rural,166.29,25.600000,formerly smoked,0


In [311]:
label_encode = LabelEncoder()

In [312]:
label_gender = label_encode.fit_transform(df.gender)
label_marital = label_encode.fit_transform(df.ever_married)
label_work = label_encode.fit_transform(df.work_type)
label_residence = label_encode.fit_transform(df.Residence_type)
label_smoking = label_encode.fit_transform(df.smoking_status)

In [313]:
stroke['gender'] = label_gender
stroke['ever_married'] = label_marital
stroke['work_type'] = label_work
stroke['Residence_type'] = label_residence
stroke['smoking_status'] = label_smoking

In [314]:
stroke.tail()

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
5105,0,80.0,1,0,1,2,1,83.75,28.893237,2,0
5106,0,81.0,0,0,1,3,1,125.2,40.0,2,0
5107,0,35.0,0,0,1,3,0,82.99,30.6,2,0
5108,1,51.0,0,0,1,2,0,166.29,25.6,1,0
5109,0,44.0,0,0,1,0,1,85.28,26.2,0,0


In [315]:
model = svm.SVC()

In [316]:
X = stroke.drop(columns='stroke',axis=1)
Y = stroke['stroke']

In [317]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=2)

In [318]:
X_train.shape,X_test.shape

((4088, 10), (1022, 10))

In [319]:
model.fit(X_train,Y_train)

In [320]:
test_data_prediction = model.predict(X_test)

In [321]:
test_data_prediction

array([0, 0, 0, ..., 0, 0, 0])

In [322]:
score = accuracy_score(test_data_prediction,Y_test)

In [323]:
score

0.9500978473581213

In [324]:
# predictive system

input = (0,80.0,1,0,1,2,1,83.75,8.1,2)

input = np.array(input)

input = input.reshape(1,-1)

predict = model.predict(input)


if(predict == 1):
    print("Chance of heart stroke")
else:
    print("no chance of heart stroke")    

no chance of heart stroke


