In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the Heart Disease UCI dataset (replace 'heart.csv' with the path to your dataset file)
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
column_names = ["age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang", "oldpeak", "slope", "ca", "thal", "target"]
0data = pd.read_csv(url, names=column_names, na_values=["?"])


In [None]:
data

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,2
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,45.0,1.0,1.0,110.0,264.0,0.0,0.0,132.0,0.0,1.2,2.0,0.0,7.0,1
299,68.0,1.0,4.0,144.0,193.0,1.0,0.0,141.0,0.0,3.4,2.0,2.0,7.0,2
300,57.0,1.0,4.0,130.0,131.0,0.0,0.0,115.0,1.0,1.2,2.0,1.0,7.0,3
301,57.0,0.0,2.0,130.0,236.0,0.0,2.0,174.0,0.0,0.0,2.0,1.0,3.0,1


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    float64
 1   sex       303 non-null    float64
 2   cp        303 non-null    float64
 3   trestbps  303 non-null    float64
 4   chol      303 non-null    float64
 5   fbs       303 non-null    float64
 6   restecg   303 non-null    float64
 7   thalach   303 non-null    float64
 8   exang     303 non-null    float64
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    float64
 11  ca        299 non-null    float64
 12  thal      301 non-null    float64
 13  target    303 non-null    int64  
dtypes: float64(13), int64(1)
memory usage: 33.3 KB


In [None]:
data.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
count,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,299.0,301.0,303.0
mean,54.438944,0.679868,3.158416,131.689769,246.693069,0.148515,0.990099,149.607261,0.326733,1.039604,1.60066,0.672241,4.734219,0.937294
std,9.038662,0.467299,0.960126,17.599748,51.776918,0.356198,0.994971,22.875003,0.469794,1.161075,0.616226,0.937438,1.939706,1.228536
min,29.0,0.0,1.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,1.0,0.0,3.0,0.0
25%,48.0,0.0,3.0,120.0,211.0,0.0,0.0,133.5,0.0,0.0,1.0,0.0,3.0,0.0
50%,56.0,1.0,3.0,130.0,241.0,0.0,1.0,153.0,0.0,0.8,2.0,0.0,3.0,0.0
75%,61.0,1.0,4.0,140.0,275.0,0.0,2.0,166.0,1.0,1.6,2.0,1.0,7.0,2.0
max,77.0,1.0,4.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,3.0,3.0,7.0,4.0


In [None]:
# Handle missing values by removing rows with missing data
data.dropna(inplace=True)

# Convert the target column to binary labels (0 = no heart disease, 1 = presence of heart disease)
data["target"] = data["target"].apply(lambda x: 0 if x == 0 else 1)


In [None]:
data

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,1
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
297,57.0,0.0,4.0,140.0,241.0,0.0,0.0,123.0,1.0,0.2,2.0,0.0,7.0,1
298,45.0,1.0,1.0,110.0,264.0,0.0,0.0,132.0,0.0,1.2,2.0,0.0,7.0,1
299,68.0,1.0,4.0,144.0,193.0,1.0,0.0,141.0,0.0,3.4,2.0,2.0,7.0,1
300,57.0,1.0,4.0,130.0,131.0,0.0,0.0,115.0,1.0,1.2,2.0,1.0,7.0,1


In [None]:
# Split the dataset into features (X) and target (y)
x = data.drop("target", axis=1)
y = data["target"]

In [None]:
x

In [None]:
y

In [None]:
# Split the data into training and testing sets (
x_train, x_test, y_train, y_test = train_test_split(x , y, test_size=0.2, random_state=42)


In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [None]:
# Initialize the Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=500, random_state=99)


In [None]:
# Train the classifier on the training data
rf_classifier.fit(x_train, y_train)

# Make predictions on the test data
y_pred = rf_classifier.predict(x_test)


In [None]:
# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Print the evaluation metrics
print(f"Accuracy: {accuracy}")
print("Confusion Matrix:")
print(confusion)


Accuracy: 0.8666666666666667
Confusion Matrix:
[[31  5]
 [ 3 21]]


In [None]:
user_input = []
for col in x.columns:
    value = float(input(f"Enter the value for {col}: "))
    user_input.append(value)

Enter the value for age: 88
Enter the value for sex: 0
Enter the value for cp: 65
Enter the value for trestbps: 32
Enter the value for chol: 513
Enter the value for fbs: 65
Enter the value for restecg: 15
Enter the value for thalach: 65
Enter the value for exang: 52
Enter the value for oldpeak: 12
Enter the value for slope: 54
Enter the value for ca: 21
Enter the value for thal: 32


In [None]:
user_input_scaled = scaler.transform([user_input])

# Step 8: Make prediction on the user's input
user_prediction = rf_classifier.predict(user_input_scaled)

if user_prediction[0] == 0:
    print("Based on the input, it is predicted that you do not have Heart disease.")
else:
    print("Based on the input, it is predicted that you have Heart disease.")

Based on the input, it is predicted that you have Heart disease.




In [None]:
#LOGISTIC REGRESSION
# Initialize and train the Logistic Regression model
logistic_regression = LogisticRegression(random_state=42)
logistic_regression.fit(x_train, y_train)

# Make predictions using Logistic Regression
y_pred_lr = logistic_regression.predict(x_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
# Evaluate the Logistic Regression model
accuracy_lr = accuracy_score(y_test, y_pred_lr)
confusion_lr = confusion_matrix(y_test, y_pred_lr)
classification_rep_lr = classification_report(y_test, y_pred_lr)

In [None]:
# Print evaluation metrics for Logistic Regression
print("Logistic Regression:")
print(f"Accuracy: {accuracy_lr}")
print("Confusion Matrix:")
print(confusion_lr)


Logistic Regression:
Accuracy: 0.8666666666666667
Confusion Matrix:
[[31  5]
 [ 3 21]]


In [None]:
user_input = []
for col in x.columns:
    value = float(input(f"Enter the value for {col}: "))
    user_input.append(value)

Enter the value for age: 80
Enter the value for sex: 0
Enter the value for cp: 2
Enter the value for trestbps: 120
Enter the value for chol: 135
Enter the value for fbs: 5
Enter the value for restecg: 3
Enter the value for thalach: 200
Enter the value for exang: 3
Enter the value for oldpeak: 4
Enter the value for slope: 3
Enter the value for ca: 5
Enter the value for thal: 1


In [None]:
user_input_scaled = scaler.transform([user_input])

# Step 8: Make prediction on the user's input
user_prediction = logistic_regression.predict(user_input_scaled)

if user_prediction[0] == 0:
    print("Based on the input, it is predicted that you do not have Heart disease.")
else:
    print("Based on the input, it is predicted that you have Heart disease.")

Based on the input, it is predicted that you do not have Heart disease.




In [None]:
#KNN
# Initialize and train the K-Nearest Neighbors (KNN) model with k=5
knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(x_train, y_train)

# Make predictions using KNN
y_pred_knn = knn_classifier.predict(x_test)

In [None]:
# Evaluate the KNN model
accuracy_knn = accuracy_score(y_test, y_pred_knn)
confusion_knn = confusion_matrix(y_test, y_pred_knn)
classification_rep_knn = classification_report(y_test, y_pred_knn)

In [None]:
# Print evaluation metrics for KNN
print("\nK-Nearest Neighbors (KNN):")
print(f"Accuracy: {accuracy_knn}")
print("Confusion Matrix:")
print(confusion_knn)



K-Nearest Neighbors (KNN):
Accuracy: 0.6833333333333333
Confusion Matrix:
[[24 12]
 [ 7 17]]


In [None]:
user_input = []
for col in x.columns:
    value = float(input(f"Enter the value for {col}: "))
    user_input.append(value)

In [None]:
user_input_scaled = scaler.transform([user_input])

# Step 8: Make prediction on the user's input
user_prediction = knn_classifier.predict(user_input_scaled)

if user_prediction[0] == 0:
    print("Based on the input, it is predicted that you do not have Heart disease.")
else:
    print("Based on the input, it is predicted that you have Heart disease.")

In [None]:
#NAIVE BEYERS
# Initialize and train the Naive Bayes (Gaussian) model
naive_bayes = GaussianNB()
naive_bayes.fit(x_train, y_train)

# Make predictions using Naive Bayes
y_pred_nb = naive_bayes.predict(x_test)

In [None]:
# Evaluate the Naive Bayes model
accuracy_nb = accuracy_score(y_test, y_pred_nb)
confusion_nb = confusion_matrix(y_test, y_pred_nb)
classification_rep_nb = classification_report(y_test, y_pred_nb)


In [None]:
# Print evaluation metrics for Naive Bayes
print("\nNaive Bayes (Gaussian):")
print(f"Accuracy: {accuracy_nb}")
print("Confusion Matrix:")
print(confusion_nb)



Naive Bayes (Gaussian):
Accuracy: 0.9166666666666666
Confusion Matrix:
[[35  1]
 [ 4 20]]


In [None]:
user_input = []
for col in x.columns:
    value = float(input(f"Enter the value for {col}: "))
    user_input.append(value)

Enter the value for age: 80
Enter the value for sex: 0
Enter the value for cp: 5
Enter the value for trestbps: 120
Enter the value for chol: 154
Enter the value for fbs: 3
Enter the value for restecg: 4
Enter the value for thalach: 1
Enter the value for exang: 6
Enter the value for oldpeak: 5
Enter the value for slope: 2
Enter the value for ca: 2
Enter the value for thal: 6


In [None]:
user_input_scaled = scaler.transform([user_input])

# Step 8: Make prediction on the user's input
user_prediction = naive_bayes.predict(user_input_scaled)

if user_prediction[0] == 0:
    print("Based on the input, it is predicted that you do not have Heart disease.")
else:
    print("Based on the input, it is predicted that you have Heart disease.")

Based on the input, it is predicted that you have Heart disease.


