Importing the Dependencies

In [None]:
pip uninstall tensorflow

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.cluster import KMeans
from sklearn.neighbors import LocalOutlierFactor
from sklearn.cluster import DBSCAN
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

ImportError: cannot import name 'keras' from 'tensorflow' (unknown location)

Data Collection and Processing

In [None]:
# loading the csv data to a Pandas DataFrame
heart_data = pd.read_csv('C:/Users/dasta/nltk/PAD/heart.csv')

In [None]:
# print first 5 rows of the dataset
heart_data.head()

In [None]:
# print last 5 rows of the dataset
heart_data.tail()

In [None]:
# number of rows and columns in the dataset
heart_data.shape

In [None]:
# getting some info about the data
heart_data.info()

In [None]:
# checking for missing values
heart_data.isnull().sum()

In [None]:
# statistical measures about the data
heart_data.describe()

In [None]:
# checking the distribution of Target Variable
heart_data['target'].value_counts()

1 --> Defective Heart

0 --> Healthy Heart

Splitting the Features and Target

In [None]:
X = heart_data.drop(columns='target', axis=1)
Y = heart_data['target']

In [None]:
print(X)

In [None]:
print(Y)

Splitting the Data into Training data & Test Data

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

Model Training

Logistic Regression

In [None]:
model = LogisticRegression()

In [None]:
# training the LogisticRegression model with Training data
model.fit(X_train, Y_train)

Model Evaluation

Accuracy Score

In [None]:
# accuracy on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [None]:
print('Accuracy on Training data : ', training_data_accuracy)

In [None]:
# accuracy on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [None]:
print('Accuracy on Test data : ', test_data_accuracy)

# Anomaly Detection Techniques (KMeans, Local Outlier Factor (LOF), ROC curve for LOF, One-Class SVM)

In [None]:
# KMeans Clustering
# Scaling the data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Fitting the model
kmeans = KMeans(n_clusters=2, random_state=0).fit(X_scaled)

In [None]:
# Scatter plot of KMeans Clusters
fig = plt.figure(figsize=(10, 6))
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
ax.scatter(X_scaled[:, 0], X_scaled[:, 1], X_scaled[:, 2], c=kmeans.labels_.astype(float), edgecolor='k')
ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")
ax.set_zlabel("Feature 3")
ax.set_title("KMeans Clustering")
plt.show()

In [None]:
# Local Outlier Factor (LOF)
# Fitting the model
lof = LocalOutlierFactor(n_neighbors=20)
y_pred = lof.fit_predict(X_scaled)

In [None]:
# Visualization of the outliers
plt.figure(figsize=(10, 6))
plt.scatter(X_scaled[:, 0], X_scaled[:, 4], c=y_pred, cmap='coolwarm')
plt.title('Outlier Detection using LOF')
plt.xlabel('Age')
plt.ylabel('Cholesterol')
plt.show()


In [None]:
# ROC curve for LOF
# Plot the ROC curve
from sklearn.metrics import roc_curve, auc

fpr, tpr, _ = roc_curve(Y, -lof.negative_outlier_factor_)
roc_auc = auc(fpr, tpr)


In [None]:
plt.figure(figsize=(10, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (AUC = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (LOF)')
plt.legend(loc="lower right")
plt.show()

In [None]:
#One-Class SVM
from sklearn.svm import OneClassSVM

#training the model
one_class_svm = OneClassSVM(gamma='auto')
one_class_svm.fit(X_train)

#predictions on training data
training_predictions = one_class_svm.predict(X_train)
training_predictions[training_predictions == 1] = 0
training_predictions[training_predictions == -1] = 1

In [None]:
#accuracy on training data
training_data_accuracy = accuracy_score(Y_train, training_predictions)
print('Accuracy on training data: ', training_data_accuracy)

#predictions on test data
test_predictions = one_class_svm.predict(X_test)
test_predictions[test_predictions == 1] = 0
test_predictions[test_predictions == -1] = 1

#accuracy on test data
test_data_accuracy = accuracy_score(Y_test, test_predictions)
print('Accuracy on test data: ', test_data_accuracy)

In [None]:
#Visualization of the outliers
plt.figure(figsize=(10, 6))
plt.scatter(X_test.iloc[:, 0], X_test.iloc[:, 4], c=test_predictions, cmap='coolwarm')
plt.title('Outlier Detection using One-Class SVM')
plt.xlabel('Age')
plt.ylabel('Cholesterol')
plt.show()

Building a Predictive System

In [None]:
input_data = (62,0,0,140,268,0,0,160,0,3.6,0,2,2)

# change the input data to a numpy array
input_data_as_numpy_array= np.asarray(input_data)

# reshape the numpy array as we are predicting for only on instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = model.predict(input_data_reshaped)
print(prediction)

if (prediction[0]== 0):
  print('The Person does not have a Heart Disease')
else:
  print('The Person has Heart Disease')

Saving the trained model

In [None]:
import pickle

In [None]:
filename = 'heart_disease_model.sav'
pickle.dump(model, open(filename, 'wb'))

In [None]:
# loading the saved model
loaded_model = pickle.load(open('heart_disease_model.sav', 'rb'))

In [None]:
for column in X.columns:
  print(column)