<a href="https://colab.research.google.com/github/RicardoR002/NLP-CAI2300C/blob/main/Predicting_Heart_Disease_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Predicting Heart Disease Neural Network

Ricardo Ruano CAI2300C

##  Dataset contains various health metrics and risk factors to predict whether a patient has heart disease.

## Step 1: Explore and Preprocess the Data

In [7]:
import pandas as pd

# Load the dataset
df = pd.read_csv('https://github.com/RicardoR002/Data/raw/refs/heads/main/HeartDiseaseTrain-Test.csv')

# Display the first few rows of the dataset
print(df.head())

# Display the column names
print(df.columns)

   age     sex chest_pain_type  resting_blood_pressure  cholestoral  \
0   52    Male  Typical angina                     125          212   
1   53    Male  Typical angina                     140          203   
2   70    Male  Typical angina                     145          174   
3   61    Male  Typical angina                     148          203   
4   62  Female  Typical angina                     138          294   

      fasting_blood_sugar               rest_ecg  Max_heart_rate  \
0    Lower than 120 mg/ml  ST-T wave abnormality             168   
1  Greater than 120 mg/ml                 Normal             155   
2    Lower than 120 mg/ml  ST-T wave abnormality             125   
3    Lower than 120 mg/ml  ST-T wave abnormality             161   
4  Greater than 120 mg/ml  ST-T wave abnormality             106   

  exercise_induced_angina  oldpeak        slope vessels_colored_by_flourosopy  \
0                      No      1.0  Downsloping                           Two   
1 

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 14 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   age                            1025 non-null   int64  
 1   sex                            1025 non-null   object 
 2   chest_pain_type                1025 non-null   object 
 3   resting_blood_pressure         1025 non-null   int64  
 4   cholestoral                    1025 non-null   int64  
 5   fasting_blood_sugar            1025 non-null   object 
 6   rest_ecg                       1025 non-null   object 
 7   Max_heart_rate                 1025 non-null   int64  
 8   exercise_induced_angina        1025 non-null   object 
 9   oldpeak                        1025 non-null   float64
 10  slope                          1025 non-null   object 
 11  vessels_colored_by_flourosopy  1025 non-null   object 
 12  thalassemia                    1025 non-null   o

In [9]:
# Check for missing values
print(df.isnull().sum())

age                              0
sex                              0
chest_pain_type                  0
resting_blood_pressure           0
cholestoral                      0
fasting_blood_sugar              0
rest_ecg                         0
Max_heart_rate                   0
exercise_induced_angina          0
oldpeak                          0
slope                            0
vessels_colored_by_flourosopy    0
thalassemia                      0
target                           0
dtype: int64


In [10]:
from sklearn.preprocessing import LabelEncoder

# Handle missing values if necessary
df = df.dropna()

# Separate input features and target variable
X = df.drop(columns=['target'])  # 'class' is your target column
y = df['target'].values

# Encode the categorical features in X
X = pd.get_dummies(X, drop_first=True)

# Encode the binary target variable y
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)  # use this for binary classification

n_features = X.shape[1]
# y will now be 0 for one class (e.g., 'good') and 1 for the other class (e.g., 'bad')

## Step 2: Split the Data

In [11]:
from sklearn.model_selection import train_test_split

# Split into train and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

## Step 3: Define and Compile the Model

In [12]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

# Define the model
model = Sequential()
model.add(Dense(10, activation='relu', input_shape=(n_features,)))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## Step 4: Fit the Model

In [13]:
model.fit(X_train, y_train, epochs=150, batch_size=32, verbose=0)

<keras.src.callbacks.history.History at 0x7e9dcfcab130>

## Step 5: Evaluate the Model

In [14]:
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print('Test Accuracy: %.3f' % acc)

Test Accuracy: 0.808


## Step 6: Make Predictions

# Making Predictions with Actual Data

## Example Input Data (Row)

In [15]:
import numpy as np

# Constructing the input row
row = np.array([
    63,  # 'age'
    145,  # 'resting_blood_pressure'
    233,  # 'cholesterol'
    150,  # 'Max_heart_rate'
    2.3,  # 'oldpeak'
    1,  # 'sex_Male' (1 for Male, 0 for Female)
    0,  # 'chest_pain_type_Atypical angina'
    0,  # 'chest_pain_type_Non-anginal pain'
    1,  # 'chest_pain_type_Typical angina'
    0,  # 'fasting_blood_sugar_Lower than 120 mg/ml'
    0,  # 'rest_ecg_No'
    1,  # 'rest_ecg_ST wave abnormality'
    0,  # 'exercise_induced_angina_Yes'
    0,  # 'slope_Flat'
    1,  # 'slope_Upsloping'
    0,  # 'vessels_colored_by_flourosopy_One'
    0,  # 'vessels_colored_by_flourosopy_Three'
    1,  # 'vessels_colored_by_flourosopy_Two'
    0,  # 'vessels_colored_by_flourosopy_Zero'
    0,  # 'thalassemia_No'
    1,  # 'thalassemia_Normal'
    0   # 'thalassemia_Reversable Defect'
]).reshape(1, -1)

# Make the prediction
yhat = model.predict(row)
print('Predicted: %.3f' % yhat[0][0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
Predicted: 0.159


## Making a Prediction


In [16]:
yhat = model.predict([row])
print('Predicted Probability: %.3f' % yhat[0][0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
Predicted Probability: 0.159
