# Import Modules

In [None]:
# Prepare dataset
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
# Modelling
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.model_selection import KFold , cross_val_score
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.multiclass import OneVsOneClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, recall_score
from sklearn.ensemble import GradientBoostingClassifier
# Saving weights
import joblib
#---------------------------------------
import warnings
warnings.filterwarnings("ignore")


# Prepare the Data

In [None]:
train_data = pd.read_csv("/content/data_fin.csv")

In [None]:
train_data.head()

In [None]:
print(train_data.describe())

In [None]:
print(train_data.shape)

In [None]:
train_data.info()

# Feature Engineering

### Check Null Data



In [None]:
print(train_data.isnull().sum())
# no null data

### Check Missing Data

In [None]:
print(train_data.isna().sum())
# no missing data

### Check Duplicated Data

In [None]:
# check for duplicated data
duplicated = train_data.duplicated()
print('Number of duplicated rows:', duplicated.sum())

# Exploration the Data

In [None]:
sns.set()   # makes a grid

In [None]:
def bar_chart(feature):
  Healthy = train_data[train_data["diagnose"]=="Healthy"][feature].value_counts()
  Angina = train_data[train_data["diagnose"]=="Angina"][feature].value_counts()
  Asthma = train_data[train_data["diagnose"]=="Asthma"][feature].value_counts()
  COPD = train_data[train_data["diagnose"]=="COPD"][feature].value_counts()
  Pneumonia = train_data[train_data["diagnose"]=="Pneumonia"][feature].value_counts()
  Heart_Attack = train_data[train_data["diagnose"]=="Heart Attack"][feature].value_counts()
  Hypertension = train_data[train_data["diagnose"]=="Hypertension"][feature].value_counts()
  Cardiac_arrest = train_data[train_data["diagnose"]=="Cardiac Arrest"][feature].value_counts()
  Arrhythmia = train_data[train_data["diagnose"]=="Arrhythmia"][feature].value_counts()
  Anemia = train_data[train_data["diagnose"]=="Anemia"][feature].value_counts()
  Cardiogenic_shock = train_data[train_data["diagnose"]=="Cardiogenic Shock"][feature].value_counts()

  # -----------------------------------------------------------------------------
  df = pd.DataFrame([Healthy,Angina,Asthma,COPD,Pneumonia,Heart_Attack,Hypertension,Cardiac_arrest,Arrhythmia,Cardiogenic_shock])
  df.index = ['Healthy','Angina','Asthma','COPD','Pneumonia','Heart Attack','Hypertension','Cardiac Arrest','Arrhythmia','Cardiogenic Shock']
  df.plot(kind='bar',stacked=False,figsize=(10,5))

In [None]:
bar_chart('sex')

In [None]:
bar_chart('geneticHeartDiseases')

In [None]:
bar_chart('geneticDiabetes')

In [None]:
bar_chart('faint')

In [None]:
bar_chart('sleep')

In [None]:
# ignoring divide error and pair plotting
with np.errstate(divide='ignore',invalid='ignore'):
    sns.pairplot(train_data, hue="diagnose", palette="husl")
plt.show()

In [None]:
fecet = sns.FacetGrid(train_data,hue='diagnose',aspect=4)
fecet.map(sns.kdeplot,'age',fill=True)
fecet.set(xlim=(0,train_data['age'].max()))
fecet.add_legend()
plt.show()

In [None]:
fecet = sns.FacetGrid(train_data,hue='diagnose',aspect=4)
fecet.map(sns.kdeplot,'HR',fill=True)
fecet.set(xlim=(0,train_data['HR'].max()))
fecet.add_legend()
plt.show()

In [None]:
fecet = sns.FacetGrid(train_data,hue='diagnose',aspect=4)
fecet.map(sns.kdeplot,'HRV',fill=True)
fecet.set(xlim=(0,train_data['HRV'].max()))
fecet.add_legend()
plt.show()

In [None]:
fecet = sns.FacetGrid(train_data,hue='diagnose',aspect=4)
fecet.map(sns.kdeplot,'RR',fill=True)
fecet.set(xlim=(0,train_data['RR'].max()))
fecet.add_legend()
plt.show()

In [None]:
fecet = sns.FacetGrid(train_data,hue='diagnose',aspect=4)
fecet.map(sns.kdeplot,'SpO2',fill=True)
fecet.set(xlim=(0,train_data['SpO2'].max()))
fecet.add_legend()
plt.show()

In [None]:
fecet = sns.FacetGrid(train_data,hue='diagnose',aspect=4)
fecet.map(sns.kdeplot,'Systolic_BP',fill=True)
fecet.set(xlim=(0,train_data['Systolic_BP'].max()))
fecet.add_legend()
plt.show()

In [None]:
fecet = sns.FacetGrid(train_data,hue='diagnose',aspect=4)
fecet.map(sns.kdeplot,'Diastolic_BP',fill=True)
fecet.set(xlim=(0,train_data['Diastolic_BP'].max()))
fecet.add_legend()
plt.show()

In [None]:
fecet = sns.FacetGrid(train_data,hue='diagnose',aspect=4)
fecet.map(sns.kdeplot,'temperature',fill=True)
fecet.set(xlim=(30,train_data['temperature'].max()))
fecet.add_legend()
plt.show()

In [None]:
train_data.info()

In [None]:
plt.figure(figsize=(20,20))
sns.heatmap(train_data.corr(),annot=True,fmt='.2f')

In [None]:
train_data.head(10)

# Encode Categorical Variables

In [None]:
def encode_variables(Target_data):
  # --------------------------------------------------------
  # Encode the categorical data as integers
  le = LabelEncoder()
  # Encode sex
  Target_data['sex'] = le.fit_transform(Target_data['sex'])
  # Encode genetic_diabetes
  Target_data['geneticDiabetes'] = le.fit_transform(Target_data['geneticDiabetes'])
  # Encode genetic_heart_disease
  Target_data['geneticHeartDiseases'] = le.fit_transform(Target_data['geneticHeartDiseases'])
  # Encode smoker
  Target_data['smoker'] = le.fit_transform(Target_data['smoker'])
  # Encode faint
  Target_data['faint'] = le.fit_transform(Target_data['faint'])
  # Encode sleep
  Target_data['sleep'] = le.fit_transform(Target_data['sleep'])


def encode_diagnose(Diagnosis):
  # Encode the categorical data as integers
  le = LabelEncoder()
  # Encode diagnose
  Diagnosis['diagnose'] = le.fit_transform(Diagnosis['diagnose'])

In [None]:
train_data.info()

In [None]:
train_data.head()


In [None]:
encode_variables(train_data)
encode_diagnose(train_data)

# Spliting the Data into Training and Testing sets

In [None]:
# Split the data into training and testing sets
x_train = train_data.drop("diagnose",axis=1)
y_train = train_data["diagnose"]

x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.20, random_state=1)

In [None]:
train_data.head()

# Traning Model

In [None]:
# Train and evaluate a Linear Discriminant model
lr = LinearDiscriminantAnalysis()
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
# storing recall_score for later comparision
lr_recall = round(recall_score(y_test,y_pred,average='weighted'),3)
print (classification_report(y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, y_pred)
print ( "Linear Discriminant Accuracy: %.3f" % acc)
print ("\n")
# --------------------------------------------------------------------------
# Train and evaluate a Logistic Regression model
lr = LogisticRegression(max_iter=10000)
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
# storing recall_score for later comparision
lr_recall = round(recall_score(y_test,y_pred,average='weighted'),3)
print (classification_report(y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, y_pred)
print ( "Logistic Regression Accuracy: %.3f" % acc)
print ("\n")
# --------------------------------------------------------------------------
# Train and evaluate a Decision Tree model
dt = DecisionTreeClassifier()
dt.fit(x_train, y_train)
y_pred = dt.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
# storing recall_score for later comparision
dt_recall = round(recall_score(y_test,y_pred,average='weighted'),3)
print (classification_report(y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, y_pred)
print ( "Decision Tree Accuracy: %.3f" % acc)
print ("\n")
# --------------------------------------------------------------------------
# Train and evaluate a Random Forest model
rf = RandomForestClassifier()
rf.fit(x_train, y_train)
y_pred = rf.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
# storing recall_score for later comparision
rf_recall = round(recall_score(y_test,y_pred,average='weighted'),3)
print (classification_report(y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, y_pred)
print ( "Random Forest Accuracy: %.3f" % acc)
print ("\n")
# --------------------------------------------------------------------------
# Train and evaluate a K-Nearest Neighbors model
knn = KNeighborsClassifier()
knn.fit(x_train, y_train)
y_pred = knn.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
# storing recall_score for later comparision
knn_recall = round(recall_score(y_test,y_pred,average='weighted'),3)
print (classification_report(y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, y_pred)
print ( "K-Nearest Neighbors Accuracy: %.3f" % acc)
print ("\n")
# --------------------------------------------------------------------------
# Train and evaluate a Support Vector Machine model
svc = SVC()
svc.fit(x_train, y_train)
y_pred = svc.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
# storing recall_score for later comparision
svc_recall = round(recall_score(y_test,y_pred,average='weighted'),3)
print (classification_report(y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, y_pred)
print ( "Support Vector Machine Accuracy: %.3f" % acc)
print ("\n")
# --------------------------------------------------------------------------
# Train and evaluate a Naive Bayes model
gnb = GaussianNB()
gnb.fit(x_train, y_train)
y_pred = gnb.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
# storing recall_score for later comparision
gnb_recall = round(recall_score(y_test,y_pred,average='weighted'),3)
print (classification_report(y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, y_pred)
print ( "Naive Bayes Accuracy: %.3f" % acc)
print ("\n")
# --------------------------------------------------------------------------
# Train and evaluate a Multi-Layer Perceptron model
mlp = MLPClassifier(max_iter=10000)
mlp.fit(x_train, y_train)
y_pred = mlp.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
# ---   ---   ---
# storing recall_score for later comparision
mlp_recall = round(recall_score(y_test,y_pred,average='weighted'),3)
print (classification_report(y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, y_pred)
print ( "Multi-Layer Perceptron Accuracy: %.3f" % acc)
# --------------------------------------------------------------------------
# Train and evaluate a Gradient Boosting Classifier model
gbc = GradientBoostingClassifier()
gbc.fit(x_train, y_train)
y_pred = gbc.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
# ---   ---   ---
# storing recall_score for later comparision
gbc_recall = round(recall_score(y_test,y_pred,average='weighted'),3)
print (classification_report(y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, y_pred)
print ( "Gradient Boosting Classifier Accuracy: %.3f" % acc)
# --------------------------------------------------------------------------
print("\n\n")
# train using Cross Val Score
# Spot Check Algorithms
models = []
models.append(('LR Logistic Regression Accuracy', LogisticRegression(solver='liblinear', multi_class='ovr')))
models.append(('LDA Linear Discriminat Analysis Accuracy:', LinearDiscriminantAnalysis()))
models.append(('KNN Accurcy:', KNeighborsClassifier()))
models.append(('CenterART Decision Tree Accuracy:', DecisionTreeClassifier()))
models.append(('NB Naive Bayes Accuracy:', GaussianNB()))
models.append(('Multi-Layer Perceptron Accuracy:', MLPClassifier(max_iter=10000)))
models.append(('Random Forest Accuracy:', RandomForestClassifier()))
models.append(('SVM', SVC(gamma='auto')))
models.append(('GradientBoostingClassifier Accuracy:',GradientBoostingClassifier()))
# evaluate each model in turn
results = []
names = []
for name, model in models:
	kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)
	cv_results = cross_val_score(model, x_train, y_train, cv=kfold, scoring='accuracy')
	results.append(cv_results)
	names.append(name)
	print('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))



### Save the weights as JOBLIB

In [None]:
filename = 'heart_model_KNN.joblib'
joblib.dump(knn, open(filename, 'wb'))

In [None]:
loaded_model = joblib.load(open(filename, 'rb'))
model_pred = loaded_model.predict(x_test)
# storing recall_score for later comparision
model_recall = round(recall_score(y_test,model_pred,average='weighted'),3)
print (classification_report(y_test, model_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, model_pred)
print ("Accuracy: %.3f" % acc)

In [None]:
filename = 'heart_model_LogisticRegression.joblib'
joblib.dump(lr, open(filename, 'wb'))

In [None]:
loaded_model = joblib.load(open(filename, 'rb'))
model_pred = loaded_model.predict(x_test)
# storing recall_score for later comparision
model_recall = round(recall_score(y_test,model_pred,average='weighted'),3)
print (classification_report(y_test, model_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, model_pred)
print ("Accuracy: %.3f" % acc)

In [None]:
filename = 'heart_model_DecisionTree.joblib'
joblib.dump(dt, open(filename, 'wb'))

In [None]:
loaded_model = joblib.load(open(filename, 'rb'))
model_pred = loaded_model.predict(x_test)
# storing recall_score for later comparision
model_recall = round(recall_score(y_test,model_pred,average='weighted'),3)
print (classification_report(y_test, model_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, model_pred)
print ("Accuracy: %.3f" % acc)

In [None]:
filename = 'heart_model_RandomForest.joblib'
joblib.dump(rf, open(filename, 'wb'))

In [None]:
loaded_model = joblib.load(open(filename, 'rb'))
model_pred = loaded_model.predict(x_test)
# storing recall_score for later comparision
model_recall = round(recall_score(y_test,model_pred,average='weighted'),3)
print (classification_report(y_test, model_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, model_pred)
print ("Accuracy: %.3f" % acc)

In [None]:
filename = 'heart_model_SVM.joblib'
joblib.dump(svc, open(filename, 'wb'))

In [None]:
loaded_model = joblib.load(open(filename, 'rb'))
model_pred = loaded_model.predict(x_test)
# storing recall_score for later comparision
model_recall = round(recall_score(y_test,model_pred,average='weighted'),3)
print (classification_report(y_test, model_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, model_pred)
print ("Accuracy: %.3f" % acc)

In [None]:
filename = 'heart_model_GaussianNB.joblib'
joblib.dump(gnb, open(filename, 'wb'))

In [None]:
loaded_model = joblib.load(open(filename, 'rb'))
model_pred = loaded_model.predict(x_test)
# storing recall_score for later comparision
model_recall = round(recall_score(y_test,model_pred,average='weighted'),3)
print (classification_report(y_test, model_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, model_pred)
print ("Accuracy: %.3f" % acc)

In [None]:
filename = 'heart_model_Multi_LayerPerceptron.joblib'
joblib.dump(mlp, open(filename, 'wb'))

In [None]:
loaded_model = joblib.load(open(filename, 'rb'))
model_pred = loaded_model.predict(x_test)
# storing recall_score for later comparision
model_recall = round(recall_score(y_test,model_pred,average='weighted'),3)
print (classification_report(y_test, model_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, model_pred)
print ("Accuracy: %.3f" % acc)

# Normalization

split

In [None]:
x_data = train_data.drop(['diagnose'], axis=1)
# normalization
X = (x_data - np.min(x_data)) / (np.max(x_data) - np.min(x_data)).values
Y = train_data['diagnose']
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)


In [None]:
train_data.head(15)

### Training

In [None]:
# Train and evaluate a Logistic Regression model
lr = LinearDiscriminantAnalysis()
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
# storing recall_score for later comparision
lr_recall = round(recall_score(y_test,y_pred,average='weighted'),3)
print (classification_report(y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, y_pred)
print ( "Linear Discriminant Accuracy: %.3f" % acc)
print ("\n")
# --------------------------------------------------------------------------
# Train and evaluate a Logistic Regression model
lr = LogisticRegression(max_iter=10000)
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
# storing recall_score for later comparision
lr_recall = round(recall_score(y_test,y_pred,average='weighted'),3)
print (classification_report(y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, y_pred)
print ( "Logistic Regression Accuracy: %.3f" % acc)
print ("\n")
# --------------------------------------------------------------------------
# Train and evaluate a Decision Tree model
dt = DecisionTreeClassifier()
dt.fit(x_train, y_train)
y_pred = dt.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
# storing recall_score for later comparision
dt_recall = round(recall_score(y_test,y_pred,average='weighted'),3)
print (classification_report(y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, y_pred)
print ( "Decision Tree Accuracy: %.3f" % acc)
print ("\n")
# --------------------------------------------------------------------------
# Train and evaluate a Random Forest model
rf = RandomForestClassifier()
rf.fit(x_train, y_train)
y_pred = rf.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
# storing recall_score for later comparision
rf_recall = round(recall_score(y_test,y_pred,average='weighted'),3)
print (classification_report(y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, y_pred)
print ( "Random Forest Accuracy: %.3f" % acc)
print ("\n")
# --------------------------------------------------------------------------
# Train and evaluate a K-Nearest Neighbors model
knn = KNeighborsClassifier()
knn.fit(x_train, y_train)
y_pred = knn.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
# storing recall_score for later comparision
knn_recall = round(recall_score(y_test,y_pred,average='weighted'),3)
print (classification_report(y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, y_pred)
print ( "K-Nearest Neighbors Accuracy: %.3f" % acc)
print ("\n")
# --------------------------------------------------------------------------
# Train and evaluate a Support Vector Machine model
svc = SVC()
svc.fit(x_train, y_train)
y_pred = svc.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
# storing recall_score for later comparision
svc_recall = round(recall_score(y_test,y_pred,average='weighted'),3)
print (classification_report(y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, y_pred)
print ( "Support Vector Machine Accuracy: %.3f" % acc)
print ("\n")
# --------------------------------------------------------------------------
# Train and evaluate a Naive Bayes model
gnb = GaussianNB()
gnb.fit(x_train, y_train)
y_pred = gnb.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
# storing recall_score for later comparision
gnb_recall = round(recall_score(y_test,y_pred,average='weighted'),3)
print (classification_report(y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, y_pred)
print ( "Naive Bayes Accuracy: %.3f" % acc)
print ("\n")
# --------------------------------------------------------------------------
# Train and evaluate a Multi-Layer Perceptron model
mlp = MLPClassifier(max_iter=10000)
mlp.fit(x_train, y_train)
y_pred = mlp.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
# ------
# storing recall_score for later comparision
mlp_recall = round(recall_score(y_test,y_pred,average='weighted'),3)
print (classification_report(y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, y_pred)
print ( "Multi-Layer Perceptron Accuracy: %.3f" % acc)
# ----------------------------------------------------------
print("\n\n")
# train using Cross Val Score
# Spot Check Algorithms
models = []
models.append(('LR Logistic Regression Accuracy', LogisticRegression(solver='liblinear', multi_class='ovr')))
models.append(('LDA Linear Discriminat Analysis Accuracy:', LinearDiscriminantAnalysis()))
models.append(('KNN Accurcy:', KNeighborsClassifier()))
models.append(('CenterART Decision Tree Accuracy:', DecisionTreeClassifier()))
models.append(('NB Naive Bayes Accuracy:', GaussianNB()))
models.append(('Multi-Layer Perceptron Accuracy:', MLPClassifier(max_iter=10000)))
models.append(('Random Forest Accuracy:', RandomForestClassifier()))
models.append(('SVM', SVC(gamma='auto')))
# evaluate each model in turn
results = []
names = []
for name, model in models:
	kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)
	cv_results = cross_val_score(model, x_train, y_train, cv=kfold, scoring='accuracy')
	results.append(cv_results)
	names.append(name)
	print('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))



### Saving weights as Joblib

In [None]:
filename = 'heart_model_KNN_normalization.joblib'
joblib.dump(knn, open(filename, 'wb'))

In [None]:
loaded_model = joblib.load(open(filename, 'rb'))
model_pred = loaded_model.predict(x_test)
# storing recall_score for later comparision
model_recall = round(recall_score(y_test,model_pred,average='weighted'),3)
print (classification_report(y_test, model_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, model_pred)
print ("Accuracy: %.3f" % acc)

In [None]:
filename = 'heart_model_LogisticRegression_normalization.joblib'
joblib.dump(lr, open(filename, 'wb'))

In [None]:
loaded_model = joblib.load(open(filename, 'rb'))
model_pred = loaded_model.predict(x_test)
# storing recall_score for later comparision
model_recall = round(recall_score(y_test,model_pred,average='weighted'),3)
print (classification_report(y_test, model_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, model_pred)
print ("Accuracy: %.3f" % acc)

In [None]:
filename = 'heart_model_DecisionTree_normalization.joblib'
joblib.dump(dt, open(filename, 'wb'))

In [None]:
loaded_model = joblib.load(open(filename, 'rb'))
model_pred = loaded_model.predict(x_test)
# storing recall_score for later comparision
model_recall = round(recall_score(y_test,model_pred,average='weighted'),3)
print (classification_report(y_test, model_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, model_pred)
print ("Accuracy: %.3f" % acc)

In [None]:
filename = 'heart_model_RandomForest_normalization.joblib'
joblib.dump(rf, open(filename, 'wb'))

In [None]:
loaded_model = joblib.load(open(filename, 'rb'))
model_pred = loaded_model.predict(x_test)
# storing recall_score for later comparision
model_recall = round(recall_score(y_test,model_pred,average='weighted'),3)
print (classification_report(y_test, model_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, model_pred)
print ("Accuracy: %.3f" % acc)

In [None]:
filename = 'heart_model_SVM_normalization.joblib'
joblib.dump(svc, open(filename, 'wb'))

In [None]:
loaded_model = joblib.load(open(filename, 'rb'))
model_pred = loaded_model.predict(x_test)
# storing recall_score for later comparision
model_recall = round(recall_score(y_test,model_pred,average='weighted'),3)
print (classification_report(y_test, model_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, model_pred)
print ("Accuracy: %.3f" % acc)

In [None]:
filename = 'heart_model_GaussianNB_normalization.joblib'
joblib.dump(gnb, open(filename, 'wb'))

In [None]:
loaded_model = joblib.load(open(filename, 'rb'))
model_pred = loaded_model.predict(x_test)
# storing recall_score for later comparision
model_recall = round(recall_score(y_test,model_pred,average='weighted'),3)
print (classification_report(y_test, model_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, model_pred)
print ("Accuracy: %.3f" % acc)

In [None]:
filename = 'heart_model_Multi_LayerPerceptron_normalization.joblib'
joblib.dump(mlp, open(filename, 'wb'))

In [None]:
loaded_model = joblib.load(open(filename, 'rb'))
model_pred = loaded_model.predict(x_test)
# storing recall_score for later comparision
model_recall = round(recall_score(y_test,model_pred,average='weighted'),3)
print (classification_report(y_test, model_pred, labels=None, target_names=None, sample_weight=None, digits=3, output_dict=False))
acc = accuracy_score(y_test, model_pred)
print ("Accuracy: %.3f" % acc)

# Predict model

In [None]:
model_pred = loaded_model.predict(x_test[0:7])
print(model_pred)
print(y_test[0:7])
print(train_data.head(35))