## Importing the packages

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

: 

## Loading the dataset

In [None]:
brain_data=pd.read_csv("brain_stroke_new.csv")

: 

In [None]:
brain_data

: 

In [None]:
brain_data.shape

: 

In [None]:
brain_data["stroke"].value_counts()

: 

### Checking the data types

In [None]:
brain_data.info()

: 

In [None]:
brain_data.describe()

: 

## Data pre-processing

In [None]:
brain_data["stroke"].value_counts()

: 

>Since the data is not balanced we need to create a dataset with balanced data.

In [None]:
resampled_0=brain_data[brain_data["stroke"]==0].head(400)
resampled_0

: 

In [None]:
resampled_1=brain_data[brain_data["stroke"]==1].head(407)
resampled_1

: 

In [None]:
new_df = pd.concat([resampled_0,resampled_1],ignore_index=True)
new_df

: 

In [None]:
new_df.isnull().sum()

: 

In [None]:
new_df=new_df.dropna()
new_df

: 

In [None]:
new_df.rename(columns = {'bmi':'body_mass_index'}, inplace = True) 

: 

In [None]:
new_df = new_df.astype({"age": 'int'})
new_df

: 

In [None]:
new_df.replace({'gender':{'Female':0,'Male':1}
                 ,'ever_married':{'No':0,'Yes':1}
                 ,'work_type':{'children':0,'Govt_job':1,'Private':2,'Self-employed':3}
                 ,'Residence_type':{'Rural':0,'Urban':1}
                 ,'smoking_status':{'formerly smoked':0,'never smoked':1,'smokes':2,'Unknown':3}},inplace=True)

: 

In [None]:
new_df.head()

: 

In [None]:
X = new_df.drop(columns=['stroke'],axis=1)
Y = new_df['stroke']

: 

In [None]:
X.head()

: 

In [None]:
Y.head()

: 

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test,Y_train,Y_test = train_test_split(X,Y,stratify=Y,test_size=0.1,random_state=2)

: 

In [None]:
X_test

: 

In [None]:
Y_test

: 

In [None]:
print(X.shape,X_train.shape,X_test.shape)

: 

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

: 

In [None]:
models = [LogisticRegression(random_state = 0),KNeighborsClassifier(n_neighbors=30),ExtraTreesClassifier(n_estimators=15, random_state=42,max_features="log2"),GaussianNB(),DecisionTreeClassifier(random_state = 0),RandomForestClassifier(random_state = 0)]
models_name=["Logistic Regression","KNeighbors Classifier","ExtraTrees Classifier","GaussianNB","DecisionTree Classifier","RandomForest Classifier"]

: 

In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
score=[]
def compare_models(models):
    i=0
    for model in models:
        
        model.fit(X_train,Y_train)
        y_pred=model.predict(X_test)
       
        accuracy = accuracy_score(Y_test, y_pred)
        classification_rep = classification_report(Y_test, y_pred)
        conf_matrix = confusion_matrix(Y_test, y_pred)
        score.append(accuracy)
        
        print(f"Accuracy of {models_name[i]}")
        print(f"Accuracy: {accuracy:.4f}")
        print("\nConfusion Matrix:\n", conf_matrix)
        print("\nClassification Report:\n", classification_rep)
        print("------------------------------------------------------")
        i+=1

: 

In [None]:
compare_models(models)

: 

In [None]:
score

: 

In [None]:
sns.set(context='notebook', style=None)
ax = sns.barplot(x=models_name, y=score, palette=['#C05780','#FF828B','#E7C582','#FF5C77','#0065A2'])
ax.set_title('Classification Accuracy Comparison of Models', fontsize=18)
for item in ax.get_xticklabels():
    item.set_rotation(80)
for p in ax.patches:
        ax.annotate('{:}%'.format(p.get_height()), (p.get_x()+0.01, p.get_height()+1))

: 

In [None]:
model=ExtraTreesClassifier(n_estimators=15, random_state=42,max_features="log2")

: 

In [None]:
model.fit(X_train,Y_train)

: 

In [None]:
X_test

: 

In [None]:
Y_test

: 

In [None]:
#making a predictive system
input_data=(0,53,0,0,1,1,1,64.17,41.5,1)

#changing the input data to numpy array
input_data_as_numpy_array=np.asarray(input_data)

#reshape the array as we are predicting for one instance
input_data_reshaped=input_data_as_numpy_array.reshape(1,-1)

prediction=model.predict(input_data_reshaped)
print(prediction)

if(prediction[0]==0):
    print("The person doesn\'t suffer brain stroke.")
else:
    print("The person suffers brain stroke.")

: 

In [None]:
#making a predictive system
input_data=(1,25,0,0,0,2,0,113.80,35.3,0)

#changing the input data to numpy array
input_data_as_numpy_array=np.asarray(input_data)

#reshape the array as we are predicting for one instance
input_data_reshaped=input_data_as_numpy_array.reshape(1,-1)

prediction=model.predict(input_data_reshaped)
print(prediction)

if(prediction[0]==0):
    print("The person doesn\'t suffer brain stroke.")
else:
    print("The person suffers brain stroke.")

: 

In [None]:
#making a predictive system
input_data=(0,72,1,0,1,2,0,1.264925,0.916983,1)

#changing the input data to numpy array
input_data_as_numpy_array=np.asarray(input_data)

#reshape the array as we are predicting for one instance
input_data_reshaped=input_data_as_numpy_array.reshape(1,-1)

prediction=model.predict(input_data_reshaped)
print(prediction)

if(prediction[0]==0):
    print("The person doesn\'t suffer brain stroke.")
else:
    print("The person suffers brain stroke.")

: 

In [None]:
#making a predictive system
input_data=(1,25,0,0,0,2,0,113.80,35.3,0)

#changing the input data to numpy array
input_data_as_numpy_array=np.asarray(input_data)

#reshape the array as we are predicting for one instance
input_data_reshaped=input_data_as_numpy_array.reshape(1,-1)

prediction=model.predict(input_data_reshaped)
print(prediction)

if(prediction[0]==0):
    print("The person doesn\'t suffer brain stroke.")
else:
    print("The person suffers brain stroke.")

: 

In [None]:
import pickle

: 

In [None]:
filename='brainstroke_model.pkl'
pickle.dump(model,open(filename,'wb'))

: 

In [None]:
loaded_model=pickle.load(open("brainstroke_model.pkl",'rb'))

: 

In [None]:
#making a predictive system
input_data=(1,25,0,0,0,2,0,113.80,35.3,0)

#changing the input data to numpy array
input_data_as_numpy_array=np.asarray(input_data)

#reshape the array as we are predicting for one instance
input_data_reshaped=input_data_as_numpy_array.reshape(1,-1)

prediction=loaded_model.predict(input_data_reshaped)
print(prediction)

if(prediction[0]==0):
    print("The person doesn\'t suffer brain stroke.")
else:
    print("The person suffers brain stroke.")

: 