In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

In [3]:
data = pd.read_csv("stroke.csv", index_col=0)

In [4]:
data.head()

Unnamed: 0_level_0,gender,age,hypertension,alcohol_intake,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
9046,Male,67.0,147,2,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
51676,Female,61.0,135,4,0,Yes,Self-employed,Rural,202.21,,never smoked,1
31112,Male,80.0,115,3,1,Yes,Private,Rural,105.92,32.5,never smoked,1
60182,Female,49.0,125,2,0,Yes,Private,Urban,171.23,34.4,smokes,1
1665,Female,79.0,130,3,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1


In [5]:
data['hypertension'] = data['hypertension'].astype(float) 
data['alcohol_intake'] = data['alcohol_intake'].astype(float) 

In [6]:
data = data.drop("ever_married" , axis=1)
data = data.drop("work_type" , axis=1)

In [7]:
for column in data.columns:
    if data[column].dtype == np.number:
        continue
    data[column] = LabelEncoder().fit_transform( data[column] )

In [8]:
data = data.fillna(data.mean())

In [9]:
X = data.drop(["stroke"], axis=1)
Y = data["stroke"]

In [10]:
x_scaler = MinMaxScaler()
x_scaler.fit(X)
column_names = X.columns
X[column_names]=x_scaler.transform(X)

In [11]:
X.head()

Unnamed: 0_level_0,gender,age,hypertension,alcohol_intake,heart_disease,Residence_type,avg_glucose_level,bmi,smoking_status
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
9046,1.0,0.816895,0.858974,0.333333,1.0,1.0,0.801173,0.30126,0.333333
51676,0.0,0.743652,0.782051,0.666667,0.0,0.0,0.678875,0.21538,0.666667
31112,1.0,0.975586,0.653846,0.5,1.0,0.0,0.234159,0.254296,0.666667
60182,0.0,0.597168,0.717949,0.333333,0.0,1.0,0.535793,0.27606,1.0
1665,0.0,0.963379,0.75,0.5,0.0,0.0,0.549141,0.15693,0.666667


In [12]:
X=np.array(X)
Y= np.array(Y)

In [13]:
from imblearn.over_sampling import SMOTE 
sm = SMOTE(random_state = 2) 
X_res, Y_res = sm.fit_sample(X,Y.ravel())

In [14]:
print("After OverSampling, counts of label '1': {}".format(sum(Y_res == 1))) 
print("After OverSampling, counts of label '0': {}".format(sum(Y_res == 0))) 

After OverSampling, counts of label '1': 2751
After OverSampling, counts of label '0': 2751


In [15]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation,Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.metrics import binary_crossentropy

In [16]:
model = Sequential([
    Dense(32, input_shape=(X_res.shape[1],), activation='relu'),
    Dense(16, activation='relu'),
    Dense(8, activation='relu'),
    Dense(2, activation='softmax'),    
])

In [17]:
model.compile(optimizer=Adam(learning_rate = 0.0001),
             loss='sparse_categorical_crossentropy',
             metrics=['accuracy'])

In [18]:
model.fit(X_res,Y_res,validation_split=0.1,batch_size=10,epochs=145,shuffle=True,verbose=2)

Train on 4951 samples, validate on 551 samples
Epoch 1/145
4951/4951 - 1s - loss: 0.7126 - accuracy: 0.4676 - val_loss: 0.6355 - val_accuracy: 0.9165
Epoch 2/145
4951/4951 - 0s - loss: 0.6707 - accuracy: 0.6599 - val_loss: 0.6412 - val_accuracy: 0.7604
Epoch 3/145
4951/4951 - 1s - loss: 0.6460 - accuracy: 0.7334 - val_loss: 0.6175 - val_accuracy: 0.7804
Epoch 4/145
4951/4951 - 1s - loss: 0.6190 - accuracy: 0.7497 - val_loss: 0.6109 - val_accuracy: 0.7132
Epoch 5/145
4951/4951 - 1s - loss: 0.5872 - accuracy: 0.7649 - val_loss: 0.5795 - val_accuracy: 0.7677
Epoch 6/145
4951/4951 - 1s - loss: 0.5521 - accuracy: 0.7742 - val_loss: 0.5291 - val_accuracy: 0.8076
Epoch 7/145
4951/4951 - 1s - loss: 0.5219 - accuracy: 0.7796 - val_loss: 0.5126 - val_accuracy: 0.8040
Epoch 8/145
4951/4951 - 1s - loss: 0.5000 - accuracy: 0.7841 - val_loss: 0.4828 - val_accuracy: 0.8203
Epoch 9/145
4951/4951 - 1s - loss: 0.4859 - accuracy: 0.7813 - val_loss: 0.5120 - val_accuracy: 0.7931
Epoch 10/145
4951/4951 - 1

Epoch 80/145
4951/4951 - 1s - loss: 0.4139 - accuracy: 0.8152 - val_loss: 0.4817 - val_accuracy: 0.8276
Epoch 81/145
4951/4951 - 1s - loss: 0.4131 - accuracy: 0.8138 - val_loss: 0.4603 - val_accuracy: 0.8348
Epoch 82/145
4951/4951 - 1s - loss: 0.4122 - accuracy: 0.8158 - val_loss: 0.4414 - val_accuracy: 0.8457
Epoch 83/145
4951/4951 - 1s - loss: 0.4121 - accuracy: 0.8162 - val_loss: 0.4820 - val_accuracy: 0.8312
Epoch 84/145
4951/4951 - 1s - loss: 0.4117 - accuracy: 0.8160 - val_loss: 0.4555 - val_accuracy: 0.8403
Epoch 85/145
4951/4951 - 1s - loss: 0.4114 - accuracy: 0.8150 - val_loss: 0.4117 - val_accuracy: 0.8621
Epoch 86/145
4951/4951 - 1s - loss: 0.4108 - accuracy: 0.8202 - val_loss: 0.4659 - val_accuracy: 0.8348
Epoch 87/145
4951/4951 - 1s - loss: 0.4104 - accuracy: 0.8142 - val_loss: 0.4763 - val_accuracy: 0.8330
Epoch 88/145
4951/4951 - 1s - loss: 0.4105 - accuracy: 0.8142 - val_loss: 0.4484 - val_accuracy: 0.8457
Epoch 89/145
4951/4951 - 1s - loss: 0.4096 - accuracy: 0.8180 - 

<tensorflow.python.keras.callbacks.History at 0x122ec4a6a20>

In [19]:
#user_input=input("Enter the values one by one")
#user_input=user_input.split(",")


#for i in range(len(user_input)):
    # convert each item to int type
 #   user_input[i] = float(user_input[i])
    

Enter the values one by one1,67,147,2,1,1,228.69,36.60,1


In [20]:
#user_input = np.array(user_input)
#user_input = user_input.reshape(1,-1)
#user_input = x_scaler.transform(user_input)
#pred = model.predict(user_input)
#print(np.argmax(pred[0]))

1


In [19]:
model.save("final.h5")

In [20]:
new_model = tf.keras.models.load_model('final.h5')

In [21]:
new_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                320       
_________________________________________________________________
dense_1 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_2 (Dense)              (None, 8)                 136       
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 18        
Total params: 1,002
Trainable params: 1,002
Non-trainable params: 0
_________________________________________________________________


In [22]:
X_Testing5= np.array([[1, 6,85,0,0,0,100,22.0,2]])

In [23]:
X_Testing5=X_Testing5.reshape(1,-1)
X_Testing5=x_scaler.transform(X_Testing5)
Y_pred5=new_model.predict(X_Testing5)
print(np.argmax(Y_pred5[0]))

0


In [24]:
import joblib

In [25]:
joblib.dump(x_scaler,'sc_scaler.pkl')

['sc_scaler.pkl']