<a href="https://colab.research.google.com/github/TarekHasan011/Pattern-Recognition/blob/main/Pattern_Laboratory_Project_NeuralNetwork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
tf.test.gpu_device_name()
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

In [None]:
import pandas as pd
from pandas.api.types import is_string_dtype
data = pd.read_csv('healthcare-dataset-stroke-data.csv', na_values='N/A')

for column in data.columns:
    if (is_string_dtype(data[column].dtype)):
        data[column] = data[column].str.strip()

In [None]:
from sklearn.model_selection import train_test_split
X = data.loc[:, data.columns != 'stroke']
X = X.loc[:, X.columns != 'id']
y = data['stroke']
y = y ^ 1
train_val_X, test_X, train_val_y, test_y = train_test_split(X,y,test_size=0.2,stratify=y)
train_X, val_X, train_y, val_y = train_test_split(train_val_X, train_val_y, test_size=0.125,stratify=train_val_y) # 70 train - 10 validation - 20 test

In [None]:
from sklearn.impute import SimpleImputer
from pandas.api.types import is_string_dtype

si_train_X = pd.DataFrame()
si_val_X = pd.DataFrame()
si_test_X = pd.DataFrame()

for column in train_X.columns:
  if (is_string_dtype(train_X[column].dtype)):
    si = SimpleImputer(strategy='most_frequent')
  else:
    si = SimpleImputer(strategy='median')
  si.fit(train_X[[column]])
  si_train_X[column] = si.transform(train_X[[column]]).flatten()
  si_val_X[column] = si.transform(val_X[[column]]).flatten()
  si_test_X[column] = si.transform(test_X[[column]]).flatten()

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(train_y)
train_y = le.transform(train_y)
val_y = le.transform(val_y)
test_y = le.transform(test_y)

categorical_feature = ['gender','ever_married','work_type','Residence_type','smoking_status']
l_train_X = pd.DataFrame()
l_val_X = pd.DataFrame()
l_test_X = pd.DataFrame()

for column in train_X.columns:
  if column in categorical_feature:
    le.fit(si_train_X[column])
    l_train_X[column] = le.transform(si_train_X[column])
    l_val_X[column] = le.transform(si_val_X[column])
    l_test_X[column] = le.transform(si_test_X[column])
  else:
    l_train_X[column] = si_train_X[column].copy()
    l_val_X[column] = si_val_X[column].copy()
    l_test_X[column] = si_test_X[column].copy()

In [None]:
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
ss.fit(l_train_X)
ss_train_X = ss.transform(l_train_X)
ss_val_X = ss.transform(l_val_X)
ss_test_X = ss.transform(l_test_X)

In [None]:
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(100,input_dim=10,activation='relu'))
model.add(Dense(1000,activation='relu'))
model.add(Dense(10000,activation='relu'))
model.add(Dense(1000,activation='relu'))
model.add(Dense(100,activation='relu'))
model.add(Dense(10,activation='relu'))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
print(model.summary())

In [None]:
from keras.utils.vis_utils import plot_model
plot_model(model,to_file='model.png',show_shapes=True,show_layer_names=True)

In [None]:
import matplotlib.pyplot as plt
from keras.callbacks import EarlyStopping, ModelCheckpoint
es = EarlyStopping(monitor='val_loss',mode='min',verbose=1,patience=200)
mc = ModelCheckpoint('best_model.h5',monitor='val_loss',mode='min',verbose=1,save_best_only=True)

hist = model.fit(ss_train_X,train_y,validation_data=(ss_val_X,val_y), epochs=2000,batch_size=512,callbacks=[es,mc])
plt.plot(hist.history['loss'],label='train')
plt.plot(hist.history['val_loss'],label='test')
plt.legend()
plt.show()

In [None]:
import pickle as pkl
model = Sequential()
model.add(Dense(100,input_dim=10,activation='relu'))
model.add(Dense(1000,activation='relu'))
model.add(Dense(10000,activation='relu'))
model.add(Dense(1000,activation='relu'))
model.add(Dense(100,activation='relu'))
model.add(Dense(10,activation='relu'))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.load_weights('best_model.h5')

In [None]:
model.evaluate(ss_test_X,test_y)



[0.1728934347629547, 0.951076328754425]

In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
p = model.predict(ss_test_X)
predictions = [int(x>0.5) for x in p]
print(f'Accuracy: {accuracy_score(test_y,predictions)}')
print(f'Precision: {precision_score(test_y,predictions)}')
print(f'Recall: {recall_score(test_y,predictions)}')
print(f'F1 Score: {f1_score(test_y,predictions)}')

Accuracy: 0.9510763209393346
Precision: 0.9510763209393346
Recall: 1.0
F1 Score: 0.9749247743229689
