# importing libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rcParams 
from sklearn.metrics import precision_recall_curve, auc, roc_curve, roc_auc_score
from sklearn.metrics import average_precision_score


import warnings
warnings.filterwarnings("ignore")



# reading dataset

In [None]:
heart_data = pd.read_csv('heart.csv')


# print head of datset

In [None]:
heart_data.head()

In [None]:
heart_data.describe()


In [None]:
heart_data.shape

# Taking Care of Missing Values

In [None]:
heart_data.isnull().sum()

In [None]:
print(heart_data)

# Taking Care of Duplicate Values

In [None]:
heart_data_dup = heart_data.duplicated().any()
heart_data_dup

In [None]:
# Number of duplicate rows in the original DataFrame
print("Number of duplicate rows in original DataFrame:", heart_data.duplicated().sum())


In [None]:
#REMOVING DUPLICATES
heart_data = heart_data.drop_duplicates()


In [None]:
heart_data_dup =heart_data.duplicated().any()
heart_data_dup

In [None]:

# Number of duplicate rows in the new DataFrame
print("Number of duplicate rows in new DataFrame:", heart_data.duplicated().sum())


# coreleation matrix

In [None]:
correlation_matrix = heart_data.corr()

# Plotting the correlation matrix using a heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='plasma', fmt=".2f", linewidths=0.5)
plt.title('Correlation Matrix')
plt.show()

In [None]:
# Set up a 4x4 grid for subplots (or adjust as needed)
fig, axes = plt.subplots(4, 4, figsize=(15, 15))


# Flatten the axes for easy iteration
axes = axes.flatten()

# Define customization options
hist_kwargs = {
    'bins': 6,
    'alpha': 1,  # Transparency
    'edgecolor': 'black',
    'color': 'mediumblue'
}

# Iterate through each column and create a histogram (limit to 14 columns)
for i, column in enumerate(heart_data.columns[:14]):
    axes[i].hist(heart_data[column], **hist_kwargs)
    axes[i].set_title(column)
    axes[i].set_xlabel(column)
    axes[i].set_ylabel('Frequency')

# Remove empty subplots
for j in range(i+1, len(axes)):
    fig.delaxes(axes[j])

# Adjust layout for better spacing
plt.tight_layout(w_pad=0, h_pad=0)

# Show the plot
plt.show()







# model train

In [None]:
# Separating features (X) and target variable (Y)
X = heart_data.drop(columns='target', axis=1)
Y = heart_data['target']

In [None]:
# Splitting the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)


# Logistic Regression

In [None]:
 
# Logistic Regression
model = LogisticRegression()
model.fit(X_train, Y_train)




In [None]:
# Accuracy on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(Y_train, X_train_prediction)


In [None]:
# Accuracy on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(Y_test, X_test_prediction)

In [None]:
# Displaying Logistic Regression accuracies
print("Logistic Regression Accuracy:")
print("Training Accuracy:", training_data_accuracy)
print("Test Accuracy:", test_data_accuracy)



In [None]:
# Get predicted probabilities on the test set
Y_prob = model.predict_proba(X_test)[:, 1]
# Precision-Recall Curve
precision, recall, _ = precision_recall_curve(Y_test, Y_prob)
pr_auc = auc(recall, precision)
average_precision = average_precision_score(Y_test, Y_prob)
plt.figure(figsize=(8, 6))
plt.plot(recall, precision, color='blue', label=f'Precision-Recall Curve (AUC = {pr_auc:.2f})')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend()
plt.text(0.5, 0.2, f'Average Precision: {average_precision:.2f}', ha='center', va='center')
plt.show()

# ROC Curve
fpr, tpr, _ = roc_curve(Y_test, Y_prob)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='red', label=f'ROC Curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], linestyle='--', color='gray', label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend()
plt.text(0.5, 0.2, f'ROC AUC: {roc_auc:.2f}', ha='center', va='center')
plt.show()

# Print the scores
print(f'Average Precision: {average_precision:.2f}')
print(f'ROC AUC: {roc_auc:.2f}')

# K-Nearest Neighbors

In [None]:
# K-Nearest Neighbors
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, Y_train)



In [None]:
# Accuracy on training data for KNN
knn_train_prediction = knn_model.predict(X_train)
knn_training_accuracy = accuracy_score(Y_train, knn_train_prediction)



In [None]:
# Accuracy on test data for KNN
knn_test_prediction = knn_model.predict(X_test)
knn_test_accuracy = accuracy_score(Y_test, knn_test_prediction)


In [None]:

# Displaying K-Nearest Neighbors accuracies
print("\nK-Nearest Neighbors Accuracy:")
print("Training Accuracy:", knn_training_accuracy)
print("Test Accuracy:", knn_test_accuracy)


In [None]:

# Assuming knn_scores is defined for each K value
k_values = [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25]  # Example K values
knn_scores = [0.75, 0.82, 0.88, 0.90, 0.79, 0.81, 0.85, 0.88, 0.91, 0.84, 0.87, 0.80, 0.86]  # Remove one element

# Plotting for KNN
plt.plot(k_values, knn_scores, color='blue', marker='o')

for k, score in zip(k_values, knn_scores):
    plt.text(k, score, f'({k}, {score:.2f})')

plt.xlabel('Number of Neighbors (K)')
plt.ylabel('Scores')
plt.title('K-Nearest Neighbors Scores for Different Numbers of Neighbors (K)')

# Find the index of the maximum KNN score
max_knn_score_index = knn_scores.index(max(knn_scores))
max_knn_value = k_values[max_knn_score_index]

# Print the highest KNN score in percentage and its corresponding K value
plt.text(max_knn_value, max(knn_scores), f'Highest KNN Score: {max(knn_scores) * 100:.2f}% (K={max_knn_value})', ha='right', va='bottom')

plt.show()


# Random Forest

In [None]:
# Random Forest
rf_model = RandomForestClassifier()
rf_model.fit(X_train, Y_train)


In [None]:
# Accuracy on training data for Random Forest
rf_train_prediction = rf_model.predict(X_train)
rf_training_accuracy = accuracy_score(Y_train, rf_train_prediction)



In [None]:
# Accuracy on test data for Random Forest
rf_test_prediction = rf_model.predict(X_test)
rf_test_accuracy = accuracy_score(Y_test, rf_test_prediction)



In [None]:
# Displaying Random Forest accuracies
print("\nRandom Forest Accuracy:")
print("Training Accuracy:", rf_training_accuracy)
print("Test Accuracy:", rf_test_accuracy)



In [None]:

# Assuming rf_scores is defined for each number of estimators
n_estimators = [10, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500]  # Example number of estimators
rf_scores = [0.78, 0.85, 0.88, 0.90, 0.91, 0.92, 0.93, 0.94, 0.95, 0.95, 0.96]  # Example Random Forest scores

# Plotting for Random Forest
plt.plot(n_estimators, rf_scores, color='green', marker='o')

for n, score in zip(n_estimators, rf_scores):
    plt.text(n, score, f'({n}, {score:.2f})')

plt.xlabel('Number of Estimators')
plt.ylabel('Scores')
plt.title('Random Forest Scores for Different Numbers of Estimators')

# Find the index of the maximum Random Forest score
max_rf_score_index = rf_scores.index(max(rf_scores))
max_rf_value = n_estimators[max_rf_score_index]

# Print the highest Random Forest score and its corresponding number of estimators
plt.text(max_rf_value, max(rf_scores), f'Highest RF Score: {max(rf_scores) * 100:.2f}% (Estimators={max_rf_value})', ha='right', va='bottom')

plt.show()


# Decision Tree

In [None]:
# Decision Tree
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, Y_train)



In [None]:
# Accuracy on training data for Decision Tree
dt_train_prediction = dt_model.predict(X_train)
dt_training_accuracy = accuracy_score(Y_train, dt_train_prediction)


In [None]:
# Accuracy on test data for Decision Tree
dt_test_prediction = dt_model.predict(X_test)
dt_test_accuracy = accuracy_score(Y_test, dt_test_prediction)



In [None]:
# Displaying Decision Tree accuracies
print("\nDecision Tree Accuracy:")
print("Training Accuracy:", dt_training_accuracy)
print("Test Accuracy:", dt_test_accuracy)


In [None]:

# Assuming dt_scores is defined for each number of maximum features
max_features = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]  # Example number of maximum features
dt_scores = [0.78, 0.82, 0.85, 0.87, 0.89, 0.90, 0.91, 0.92, 0.93, 0.93, 0.94, 0.95, 0.96, 0.96]  # Example Decision Tree scores

# Plotting for Decision Tree
plt.plot(max_features, dt_scores, color='orange', marker='o')

for f, score in zip(max_features, dt_scores):
    plt.text(f, score, f'({f}, {score:.2f})')

plt.xlabel('Number of Maximum Features')
plt.ylabel('Scores')
plt.title('Decision Tree Scores for Different Numbers of Maximum Features')

# Find the index of the maximum Decision Tree score
max_dt_score_index = dt_scores.index(max(dt_scores))
max_dt_value = max_features[max_dt_score_index]

# Print the highest Decision Tree score and its corresponding number of maximum features
plt.text(max_dt_value, max(dt_scores), f'Highest DT Score: {max(dt_scores) * 100:.2f}% (Max Features={max_dt_value})', ha='right', va='bottom')

plt.show()


# accuracies of all model used 

In [None]:
import matplotlib.pyplot as plt

# Assuming you have the accuracy values for each model
models = ['Logistic Regression', 'K-N Neighbors', 'Random Forest', 'Decision Tree']
training_accuracies = [training_data_accuracy, knn_training_accuracy, rf_training_accuracy, dt_training_accuracy]
test_accuracies = [test_data_accuracy, knn_test_accuracy, rf_test_accuracy, dt_test_accuracy]

# Convert accuracy values to percentages
training_accuracies_percent = [acc * 100 for acc in training_accuracies]
test_accuracies_percent = [acc * 100 for acc in test_accuracies]

# Plotting the bar chart
bar_width = 0.35
index = np.arange(len(models))

plt.bar(index, training_accuracies_percent, bar_width, label='Training Accuracy', color='blue')
plt.bar(index + bar_width, test_accuracies_percent, bar_width, label='Test Accuracy', color='orange')



plt.xlabel('Models')
plt.ylabel('Accuracy (%)')
plt.title('Model Accuracy Comparison between Training and Test Data')

plt.xticks(index + bar_width / 2, models)
plt.legend()


plt.show()


# prediction of result

In [None]:
# Assuming the model variable is your trained Logistic Regression model

# Sample input data for prediction (you can replace this with your own data)
sample_data = np.array([43, 1, 0, 120, 177, 0, 0, 120, 1, 2.5, 1, 0, 3])

# Reshape the input data to match the model's expectations
sample_data = sample_data.reshape(1, -1)

# Make a prediction
prediction = model.predict(sample_data)

# Display the prediction
if prediction[0] == 1:
    print("The model predicts that the individual has heart disease.")
else:
    print("The model predicts that the individual does not have heart disease.")


In [None]:
from tkinter import *
import joblib
from tkinter import messagebox

In [None]:
from tkinter import messagebox

def show_entry_fields():
    p1 = int(e1.get())
    p2 = int(e2.get())
    p3 = int(e3.get())
    p4 = int(e4.get())
    p5 = int(e5.get())
    p6 = int(e6.get())
    p7 = int(e7.get())
    p8 = int(e8.get())
    p9 = int(e9.get())
    p10 = float(e10.get())
    p11 = int(e11.get())
    p12 = int(e12.get())
    p13 = int(e13.get())
    
    model = joblib.load('model_joblib_heart')
    result = model.predict([[p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13]])
    
    result_text = "No Heart Disease" if result == 0 else "Possibility of Heart Disease"
    
    # Display the result in a pop-up message box
    messagebox.showinfo("Prediction Result", result_text)

    
    
master = Tk()
master.title("Heart Disease Prediction System")


label = Label(master, text = "Heart Disease Prediction System"
                          , bg = "black", fg = "white"). \
                               grid(row=0,columnspan=2)


Label(master, text="Enter Your Age").grid(row=1)
Label(master, text="Male Or Female [1/0]").grid(row=2)
Label(master, text="Enter Value of CP").grid(row=3)
Label(master, text="Enter Value of trestbps").grid(row=4)
Label(master, text="Enter Value of chol").grid(row=5)
Label(master, text="Enter Value of fbs").grid(row=6)
Label(master, text="Enter Value of restecg").grid(row=7)
Label(master, text="Enter Value of thalach").grid(row=8)
Label(master, text="Enter Value of exang").grid(row=9)
Label(master, text="Enter Value of oldpeak").grid(row=10)
Label(master, text="Enter Value of slope").grid(row=11)
Label(master, text="Enter Value of ca").grid(row=12)
Label(master, text="Enter Value of thal").grid(row=13)



e1 = Entry(master)
e2 = Entry(master)
e3 = Entry(master)
e4 = Entry(master)
e5 = Entry(master)
e6 = Entry(master)
e7 = Entry(master)
e8 = Entry(master)
e9 = Entry(master)
e10 = Entry(master)
e11 = Entry(master)
e12 = Entry(master)
e13 = Entry(master)

e1.grid(row=1, column=1)
e2.grid(row=2, column=1)
e3.grid(row=3, column=1)
e4.grid(row=4, column=1)
e5.grid(row=5, column=1)
e6.grid(row=6, column=1)
e7.grid(row=7, column=1)
e8.grid(row=8, column=1)
e9.grid(row=9, column=1)
e10.grid(row=10, column=1)
e11.grid(row=11, column=1)
e12.grid(row=12, column=1)
e13.grid(row=13, column=1)



Button(master, text='Predict', command=show_entry_fields).grid()

mainloop()

In [142]:
pip install Flask


Note: you may need to restart the kernel to use updated packages.


In [154]:
from flask import Flask, render_template, request, jsonify
import joblib

app = Flask(__name__)

# Load the trained model
model = joblib.load('model_joblib_heart')

@app.route('/')
def home():
    return render_template('index.html')

@app.route('/predict', methods=['POST'])
def predict():
    if request.method == 'POST':
        # Get user input from the form
        features = [float(x) for x in request.form.values()]

        # Make a prediction using the model
        result = model.predict([features])[0]

        # Display the result in a user-friendly way
        result_text = "No Heart Disease" if result == 0 else "Possibility of Heart Disease"

        return render_template('index.html', result=result_text)

if __name__ == '__main__':
    app.run(debug=True)





 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with watchdog (windowsapi)


SystemExit: 1