<a href="https://colab.research.google.com/github/SKAZEXE/Zaidi/blob/main/Heart_Disease_Pred.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

data = pd.read_csv("heart.csv")
data.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [25]:
data.shape


(918, 12)

In [26]:
data.describe()

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease
count,918.0,918.0,918.0,918.0,918.0,918.0,918.0
mean,53.510893,132.396514,198.799564,0.233115,136.809368,0.887364,0.553377
std,9.432617,18.514154,109.384145,0.423046,25.460334,1.06657,0.497414
min,28.0,0.0,0.0,0.0,60.0,-2.6,0.0
25%,47.0,120.0,173.25,0.0,120.0,0.0,0.0
50%,54.0,130.0,223.0,0.0,138.0,0.6,1.0
75%,60.0,140.0,267.0,0.0,156.0,1.5,1.0
max,77.0,200.0,603.0,1.0,202.0,6.2,1.0


In [27]:
# Encode categorical variables
encoder = LabelEncoder()
categorical_cols = ['Sex', 'ChestPainType', 'FastingBS', 'RestingECG', 'ExerciseAngina', 'ST_Slope']
for col in categorical_cols:
    data[col] = encoder.fit_transform(data[col])

# Split the data into features (X) and target (y)
X = data.drop('HeartDisease', axis=1)
y = data['HeartDisease']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [28]:
from sklearn.decomposition import PCA

# Create a PCA instance
pca = PCA(n_components=2)  # You can adjust the number of components as needed

# Fit and transform the training data
X_train_pca = pca.fit_transform(X_train)

# Transform the test data using the same PCA
X_test_pca = pca.transform(X_test)


In [29]:
from sklearn.ensemble import RandomForestClassifier

# Create and train the Random Forest Classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train_pca, y_train)


In [30]:
from sklearn.metrics import accuracy_score

# Make predictions on the test set
y_pred = model.predict(X_test_pca)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.7717391304347826


In [31]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [10, 50, 100],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train_pca, y_train)

# Get the best parameters
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Evaluate the best model
y_pred_best = best_model.predict(X_test_pca)
accuracy_best = accuracy_score(y_test, y_pred_best)
print("Best Model Accuracy:", accuracy_best)
print("Best Model Parameters:", best_params)


Best Model Accuracy: 0.7771739130434783
Best Model Parameters: {'max_depth': None, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 50}


In [32]:
from sklearn.model_selection import cross_val_score

# Perform 5-fold cross-validation with the best model
cv_scores = cross_val_score(best_model, X_train_pca, y_train, cv=5, scoring='accuracy')

# Calculate the mean and standard deviation of cross-validation scores
mean_cv_accuracy = cv_scores.mean()
std_cv_accuracy = cv_scores.std()

print("Cross-Validation Mean Accuracy:", mean_cv_accuracy)
print("Cross-Validation Accuracy Standard Deviation:", std_cv_accuracy)


Cross-Validation Mean Accuracy: 0.8364924051812507
Cross-Validation Accuracy Standard Deviation: 0.029611409490888557


In [46]:
import pandas as pd
import joblib

# Load the saved best model
loaded_model = joblib.load('best_model.pkl')

# Create a dictionary with specific input values for prediction
input_data = {
    'Age': [45],  # Replace with the desired age
    'Sex': ['M'],  # Replace with 'M' or 'F'
    'ChestPainType': ['ATA'],  # Replace with 'ATA', 'NAP', 'ASY', or 'NA'
    'RestingBP': [140],  # Replace with the desired resting blood pressure
    'Cholesterol': [300],  # Replace with the desired cholesterol level
    'FastingBS': [0],  # Replace with 0 or 1
    'RestingECG': ['Normal'],  # Replace with 'Normal', 'ST', or 'NA'
    'MaxHR': [170],  # Replace with the desired maximum heart rate
    'ExerciseAngina': ['N'],  # Replace with 'N', 'Y', or 'NA'
    'Oldpeak': [1.5],  # Replace with the desired ST depression value
    'ST_Slope': ['Down']  # Replace with 'Up', 'Flat', 'Down', or 'NA'
}

# Create a DataFrame from the input data
input_df = pd.DataFrame(input_data)

# Preprocess the input data (similar to preprocessing for the training data)
input_encoded = input_df.copy()

# Encode 'Sex' column
input_encoded['Sex'] = input_encoded['Sex'].apply(lambda x: 1 if x == 'M' else 0)  # Encode 'M' as 1 and 'F' as 0

# Encode 'ChestPainType' column
chest_pain_mapping = {'ATA': 0, 'NAP': 1, 'ASY': 2, 'NA': 3}
input_encoded['ChestPainType'] = input_encoded['ChestPainType'].map(chest_pain_mapping)

# Encode 'RestingECG' column
resting_ecg_mapping = {'Normal': 0, 'ST': 1, 'NA': 2}
input_encoded['RestingECG'] = input_encoded['RestingECG'].map(resting_ecg_mapping)

# Encode 'ExerciseAngina' column
exercise_angina_mapping = {'N': 0, 'Y': 1, 'NA': 2}
input_encoded['ExerciseAngina'] = input_encoded['ExerciseAngina'].map(exercise_angina_mapping)

# Encode 'ST_Slope' column
st_slope_mapping = {'Up': 0, 'Flat': 1, 'Down': 2, 'NA': 3}
input_encoded['ST_Slope'] = input_encoded['ST_Slope'].map(st_slope_mapping)

# Apply PCA transformation to the input data
input_pca = pca.transform(input_encoded)

# Make a prediction on the input data
prediction = loaded_model.predict(input_pca)

# Print the prediction
if prediction[0] == 0:
    print("Prediction: No Heart Disease")
else:
    print("Prediction: Heart Disease")


Prediction: No Heart Disease




In [50]:
import pandas as pd
import joblib

# Load the saved best model
loaded_model = joblib.load('best_model.pkl')

# Input new data during runtime
new_data = {}

# Function for input validation
def get_valid_input(prompt, valid_options=None, is_numeric=False):
    while True:
        user_input = input(prompt).strip()

        # Check if the input is empty
        if not user_input:
            print("Input cannot be empty. Please try again.")
            continue

        # Check if the input is numeric if required
        if is_numeric:
            try:
                user_input = float(user_input)
            except ValueError:
                print("Invalid input. Please enter a valid numeric value.")
                continue

        # Check if the input is in the list of valid options if provided
        if valid_options and user_input not in valid_options:
            print("Invalid input. Please choose from the provided options.")
            continue

        return user_input

# 1. Age
new_data['Age'] = [get_valid_input("What is your age? ", is_numeric=True)]

# 2. Sex
while True:
    sex_input = input("Please specify your gender as 'M' for Male or 'F' for Female: ").strip().upper()
    if sex_input in ['M', 'F']:
        new_data['Sex'] = [sex_input]
        break
    else:
        print("Invalid input. Please enter 'M' for Male or 'F' for Female.")

# 3. Chest Pain Type
while True:
    print("Can you describe the type of chest pain you're experiencing?")
    print("Options: 'ATA' for typical angina, 'NAP' for non-anginal pain, 'ASY' for atypical angina, 'NA' if unsure.")
    chest_pain_input = input("Your choice: ").strip().upper()
    if chest_pain_input in ['ATA', 'NAP', 'ASY', 'NA']:
        new_data['ChestPainType'] = [chest_pain_input]
        break
    else:
        print("Invalid input. Please choose from 'ATA', 'NAP', 'ASY', or 'NA'.")

# 4. Resting Blood Pressure (RestingBP)
new_data['RestingBP'] = [get_valid_input("What is your resting blood pressure (mm Hg)? ", is_numeric=True)]

# 5. Cholesterol
new_data['Cholesterol'] = [get_valid_input("What is your cholesterol level (mg/dL)? ", is_numeric=True)]

# 6. Fasting Blood Sugar (FastingBS)
while True:
    fasting_bs_input = input("Is your fasting blood sugar level higher than 120 mg/dL? Enter '0' for No or '1' for Yes: ").strip()
    if fasting_bs_input in ['0', '1']:
        new_data['FastingBS'] = [int(fasting_bs_input)]
        break
    else:
        print("Invalid input. Please enter '0' for No or '1' for Yes.")

# 7. Resting ECG
while True:
    print("How would you describe your resting ECG result?")
    print("Options: 'Normal', 'ST', or 'NA' if unsure.")
    resting_ecg_input = input("Your choice: ").strip().upper()
    if resting_ecg_input in ['NORMAL', 'ST', 'NA']:
        new_data['RestingECG'] = [resting_ecg_input]
        break
    else:
        print("Invalid input. Please choose from 'Normal', 'ST', or 'NA'.")

# 8. Maximum Heart Rate (MaxHR)
new_data['MaxHR'] = [get_valid_input("What is your maximum heart rate (beats per minute)? ", is_numeric=True)]

# 9. Exercise-Induced Angina (ExerciseAngina)
while True:
    exercise_angina_input = input("Do you experience exercise-induced angina? Enter 'N' for No, 'Y' for Yes, or 'NA' if unsure: ").strip().upper()
    if exercise_angina_input in ['N', 'Y', 'NA']:
        new_data['ExerciseAngina'] = [exercise_angina_input]
        break
    else:
        print("Invalid input. Please enter 'N' for No, 'Y' for Yes, or 'NA' if unsure.")

# 10. Oldpeak
new_data['Oldpeak'] = [get_valid_input("Please enter your ST depression induced by exercise relative to rest (Oldpeak): ", is_numeric=True)]

# 11. ST Slope
while True:
    print("How would you describe the slope of the ST segment during exercise?")
    print("Options: 'Up', 'Flat', 'Down', or 'NA' if unsure.")
    st_slope_input = input("Your choice: ").strip().upper()
    if st_slope_input in ['UP', 'FLAT', 'DOWN', 'NA']:
        new_data['ST_Slope'] = [st_slope_input]
        break
    else:
        print("Invalid input. Please choose from 'Up', 'Flat', 'Down', or 'NA'.")

# Create a DataFrame from the user input
new_data_df = pd.DataFrame(new_data)

# Preprocess the new data (similar to preprocessing for the training data)
new_data_encoded = new_data_df.copy()
new_data_encoded['Sex'] = new_data_encoded['Sex'].apply(lambda x: 1 if x == 'M' else 0)  # Encode 'M' as 1 and 'F' as 0

# Apply PCA transformation to the new data
new_data_pca = pca.transform(new_data_encoded)

# Make predictions on the new data
new_data_predictions = loaded_model.predict(new_data_pca)

# Print the predictions
if new_data_predictions[0] == 0:
    print("Prediction: No Heart Disease")
else:
    print("Prediction: Heart Disease")


What is your age? 456
Please specify your gender as 'M' for Male or 'F' for Female: M
Can you describe the type of chest pain you're experiencing?
Options: 'ATA' for typical angina, 'NAP' for non-anginal pain, 'ASY' for atypical angina, 'NA' if unsure.
Your choice: ATA
What is your resting blood pressure (mm Hg)? 130
What is your cholesterol level (mg/dL)? 300
Is your fasting blood sugar level higher than 120 mg/dL? Enter '0' for No or '1' for Yes: 1
How would you describe your resting ECG result?
Options: 'Normal', 'ST', or 'NA' if unsure.
Your choice: NA
What is your maximum heart rate (beats per minute)? 90
Do you experience exercise-induced angina? Enter 'N' for No, 'Y' for Yes, or 'NA' if unsure: N
Please enter your ST depression induced by exercise relative to rest (Oldpeak): 1
How would you describe the slope of the ST segment during exercise?
Options: 'Up', 'Flat', 'Down', or 'NA' if unsure.
Your choice: NA




ValueError: ignored