In [0]:
#importing the libraries:
import pandas as pd

In [0]:
#importing the dataset:
df = spark.read.csv('/FileStore/tables/heart_attack_prediction_dataset.csv',header=True,inferSchema=True)
display(df)

In [0]:
#creating a function to preprocess the data:
import pandas as pd
import sklearn
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.model_selection import train_test_split

def data_preprocessor(df):
    #converting the df:
    df = df.toPandas()

    #splitting the bloodpressure column:
    df['Systolic'] = df['Blood Pressure'].str.split('/',n=2,expand=True)[0].astype('int')
    df['Diastolic'] = df['Blood Pressure'].str.split('/',n=2,expand=True)[1].astype('int')

    #keeping required columns:
    new_df = df.drop(columns=['Patient ID','Blood Pressure'],axis=1)

    #splitting the columns into numerical and categorical:
    num_cols = new_df.select_dtypes(include='number').columns.to_list()
    cat_cols = new_df.select_dtypes(exclude='number').columns.to_list()

    #preprocessing the data:
    sc = StandardScaler()
    sc_cols = sc.fit_transform(new_df[num_cols].values)
    sc_df = pd.DataFrame(sc_cols,columns=sc.get_feature_names_out())
    
    ohe = OneHotEncoder(drop='first',handle_unknown='ignore',sparse=False)
    ohe_cols = ohe.fit_transform(new_df[cat_cols].values)
    ohe_df = pd.DataFrame(ohe_cols,columns=ohe.get_feature_names_out())

    final_df = pd.concat([sc_df,ohe_df,new_df['Heart Attack Risk']],axis=1)
    return final_df

In [0]:
data = data_preprocessor(df)
data

In [0]:
#splitting the data into train,val and test:
from sklearn.model_selection import train_test_split
X = data.drop('Heart Attack Risk',axis=1)
y = data['Heart Attack Risk']

X_train,X_rem,y_train,y_rem = train_test_split(X,y,test_size=0.4)
X_val,X_test,y_val,y_test = train_test_split(X_rem,y_rem,test_size=0.5)

print(X_train.shape)
print(X_val.shape)
print(X_test.shape)


In [0]:
#creating the model:
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *

def create_model(hpo):
    model = Sequential()
    model.add(Dense(int(hpo["dense_l1"]),activation="relu"))
    model.add(Dense(int(hpo["dense_l2"]),activation="relu"))
    model.add(Dense(1,activation='sigmoid'))
    return model

In [0]:
#setting the hyperparameters:
from hyperopt import tpe,fmin,hp,SparkTrials

def run_nn(hpo):
    model = create_model(hpo)

    #selecting the optimzier:
    optimizer_call = getattr(tf.keras.optimizers,hpo["optimizer"])
    optimizer = optimizer_call(learning_rate=hpo["learning_rate"])

    #compiling the model:
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
                  optimizer=optimizer,
                  metrics='accuracy')
    history = model.fit(X_train,y_train,validation_data=[X_val,y_val],epochs=10)

    #evaluating the model:
    metric = history.history['val_loss'][-1]
    return -metric


In [0]:
#setting up parameter space and training the model:
space = {
    "dense_l1" : hp.quniform("dense_l1",10,30,1),
    "dense_l2" : hp.quniform("dense_l2",10,30,1),
    "learning_rate" : hp.loguniform("learning_rate",-5,0),
    "optimizer" : hp.choice("optimizer",["Adadelta","Adam"])
}

trials = SparkTrials(parallelism=4)

best_hyperparam = fmin(
    fn = run_nn,
    space=space,
    algo=tpe.suggest,
    max_evals=4,
    trials=trials
)

best_hyperparam

In [0]:
#creating the final model with best hyperparams:
import mlflow

def create_model(best_hyperparam):
    model = Sequential()
    model.add(Dense(best_hyperparam['dense_l1'],activation="relu"))
    model.add(Dense(best_hyperparam['dense_l2'],activation="relu"))
    model.add(Dense(1,activation='sigmoid'))
    return model

with mlflow.start_run(run_name="DL_Model_Heart_Attack") as run:
    mlflow.tensorflow.autolog()
    model = create_model(best_hyperparam)

    #selecting the optimzier:
    optimizer_call = getattr(tf.keras.optimizers,"Adadelta")
    optimizer = optimizer_call(learning_rate=best_hyperparam['learning_rate'])

    #compiling the model:
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
                  optimizer=optimizer,
                  metrics='accuracy')
    mlflow.log_params(best_hyperparam)
    history = model.fit(X_train,y_train,validation_data=[X_val,y_val],epochs=10)
    
    #evaluating the model:
    metric = history.history['val_loss'][-1]
    print(metric) 


In [0]:
import mlflow
from sklearn.metrics import accuracy_score

logged_model = 'runs:/16e526f07d2f47559ee513f6dc25dfa0/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
preds = loaded_model.predict(X_test)
for i in preds.index:
    if preds[0][i] >= 0.5:
        preds[0][i] = 1
    else:
        preds[0][i] = 0
preds
score = accuracy_score(preds,y_test)
print(score)

In [0]:
import flask
from flask import Flask

app = Flask("HeartPred")

@app.route("/",methods=["GET"])
def welcome():
    return "Welcome to the Home page"    

if app == 'HeartPred':
    app.run(debug=True)