# **HEART DISEASE PREDICTION**

### **Importing the Dependencies**

In [26]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, roc_auc_score, f1_score
import pickle
import warnings
warnings.filterwarnings('ignore')


### **Data Collection and Processing**

In [27]:
# Load the CSV data into a Pandas DataFrame
heart_data = pd.read_csv('heart.csv')

# Print first 5 rows of the dataset
print("First 5 rows of the dataset:")
print(heart_data.head())

# Print last 5 rows of the dataset
print("\nLast 5 rows of the dataset:")
print(heart_data.tail())

# Number of rows and columns in the dataset
print("\nDataset shape:", heart_data.shape)

# Getting some info about the data
print("\nDataset info:")
print(heart_data.info())

# Checking for missing values
print("\nMissing values in each column:")
print(heart_data.isnull().sum())

# Statistical measures about the data
print("\nStatistical measures of the dataset:")
print(heart_data.describe())

# Checking the distribution of Target Variable
print("\nDistribution of target variable:")
print(heart_data['target'].value_counts())


First 5 rows of the dataset:
   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  

Last 5 rows of the dataset:
     age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \
298   57    0   0       140   241    0        1      123      1      0.2   
299   45    1   3       110   264    0        1      132      0      1.2   
300   68    1   0       144   193    1        1      141      0      3.4   
301   57    1   0  

### **Splitting the Features and Target**

In [28]:
X = heart_data.drop(columns='target', axis=1)
Y = heart_data['target']

print("\nFeatures (X):")
print(X.head())

print("\nTarget (Y):")
print(Y.head())



Features (X):
   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  
0   0     1  
1   0     2  
2   0     2  
3   0     2  
4   0     2  

Target (Y):
0    1
1    1
2    1
3    1
4    1
Name: target, dtype: int64


### **Splitting the Data into Training Data & Test Data**

In [29]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)
print("\nShape of datasets:")
print("X:", X.shape, "X_train:", X_train.shape, "X_test:", X_test.shape)



Shape of datasets:
X: (303, 13) X_train: (242, 13) X_test: (61, 13)


### **Model Training and Evaluation**

In [30]:
# Define a function to evaluate models
def evaluate_model(model, X_train, X_test, Y_train, Y_test):
    model.fit(X_train, Y_train)
    Y_train_pred = model.predict(X_train)
    Y_test_pred = model.predict(X_test)

    accuracy = accuracy_score(Y_test, Y_test_pred)
    precision = precision_score(Y_test, Y_test_pred)
    auc_roc = roc_auc_score(Y_test, model.predict_proba(X_test)[:, 1])
    f1 = f1_score(Y_test, Y_test_pred)

    return accuracy, precision, auc_roc, f1

In [31]:
# Initialize the models
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "SVM": SVC(probability=True),
    "K-Nearest Neighbors": KNeighborsClassifier()
}

# Evaluate each model
results = {}


In [32]:
for model_name, model in models.items():
    accuracy, precision, auc_roc, f1 = evaluate_model(model, X_train, X_test, Y_train, Y_test)
    results[model_name] = {
        "Accuracy": accuracy,
        "Precision": precision,
        "AUC-ROC": auc_roc,
        "F1 Score": f1
    }

# Print the results
print("\nModel Evaluation Results:")
results_df = pd.DataFrame(results).T
print(results_df)


Model Evaluation Results:
                     Accuracy  Precision   AUC-ROC  F1 Score
Logistic Regression  0.989672   0.983750  0.900693  0.900769
Decision Tree        0.770492   0.787879  0.768939  0.787879
Random Forest        0.786885   0.812500  0.882576  0.800000
Gradient Boosting    0.737705   0.774194  0.822511  0.750000
SVM                  0.622951   0.625000  0.769481  0.684932
K-Nearest Neighbors  0.622951   0.647059  0.637446  0.656716


### **Building a Predictive System**

In [33]:
# Using Logistic Regression for predictive system
model = LogisticRegression()
model.fit(X_train, Y_train)

input_data = (62,0,0,140,268,0,0,160,0,3.6,0,2,2)
input_data_as_numpy_array = np.asarray(input_data)
input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)

prediction = model.predict(input_data_reshaped)
print("\nPrediction for input data:")
print("The Person has Heart Disease" if prediction[0] == 1 else "The Person does not have a Heart Disease")



Prediction for input data:
The Person does not have a Heart Disease


In [34]:
# Save the trained model
filename = 'heart_disease_model.sav'
pickle.dump(model, open(filename, 'wb'))

# Load the saved model
loaded_model = pickle.load(open('heart_disease_model.sav', 'rb'))
print("\nLoaded Model Columns:")
for column in X.columns:
    print(column)



Loaded Model Columns:
age
sex
cp
trestbps
chol
fbs
restecg
thalach
exang
oldpeak
slope
ca
thal


# **Conclusion**
This script covers the entire workflow from data loading to model evaluation and comparison. You can further refine the evaluation by using cross-validation or other techniques, but this script should give you a comprehensive start for your analysis.