In [None]:
#tools used
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Accessing our data to propose a use case

In [None]:
heart = pd.read_csv("/kaggle/input/personal-key-indicators-of-heart-disease/heart_2020_cleaned.csv")
# the cleaned Data Set is called cleaned_df

In [None]:
heart.head(10)

In [None]:
heart.describe()

# Cleaning and transforming the data
1. Check if there are missing values and fill it or delete it 
2. converting non-numerical values into numbers
3. split data between x and y

In [None]:
# 1. Check if there are missing values
heart.isna().sum()

In [None]:
#2. Need to change all the categorical into numbers
# Checking the data types of each category 
heart.dtypes

In [None]:
#Making the Categoricals into numericals
dummies = pd.get_dummies(heart[[ "Smoking", "AlcoholDrinking", "Stroke", "DiffWalking", "Sex", "AgeCategory", "Diabetic", "Asthma", "KidneyDisease", "SkinCancer", "GenHealth"]])
dummies.head(10)

In [None]:
#Since the precence of heart disease is the target, it will be renamed to target and changed into numerical
heart["Target"] = heart["HeartDisease"].replace({'Yes': 1, 'No': 0}).astype(int)
#combining the columns
numerical = ["Target", "BMI", "PhysicalHealth", "SleepTime"]
cleaned_df = pd.concat([heart[numerical], dummies], axis = 1)
cleaned_df.head(10)

In [None]:
cleaned_df[cleaned_df["Target"] == 1].head(5)

In [None]:
# 3. Spliting up the data set
#The target Y or the independent variable
#the x is the independent varibles and the x is the features(excluding target)
X = cleaned_df.drop("Target", axis = 1)
y = cleaned_df["Target"]

In [None]:
#Spliting up the dataset into the test set and train set, the test size will be 20%
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

# Fitting the model/algorithm  
1. Choosing a supervised machine learning model that would fit the problem
2. Choose RandomForestClassifier as it predicts a catagetory for a classification
2. Evaluating the model
    

In [None]:
#Importing the algorithm
from sklearn.ensemble import RandomForestClassifier
np.random.seed(42)

clf = RandomForestClassifier()
clf.fit(X_train, y_train)
#Check the score
clf.score(X_test, y_test)
# what is does is it makes the model go through all of the Xs and all of the combonations of numbers that causes the "target" to be "1" meaning what about the data causes heart disease

In [None]:
#Check the score
clf.score(X_test, y_test)
# what is does is it makes the model go through all of the Xs and all of the combonations of numbers that causes the "target" to be "1" meaning what about the data causes heart disease

In [None]:
# since the data was split between the train and test this double checks the model score
# since the test was 20%, a 5-fold cross validation was used to check the score across 5 different versions of the data set
from sklearn.model_selection import cross_val_score
cross_val_score(clf, X, y, cv =5)

In [None]:
#mean of the cross val score
clf_cross_val_score = np.mean(cross_val_score(clf, X, y,cv = 5))
clf_cross_val_score

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
#evaluation of the model
np.array(y_test)
#this is the accuracy of the prediction model
#different classification metrics
y_preds = clf.predict(X_test)
print("Classifier metrics on the test set")
print(f"Accuracy: {accuracy_score(y_test, y_preds)*100:.2f}%")
print(f"Precision Score: {precision_score(y_test, y_preds)}")
print(f"Recall Score: {recall_score(y_test, y_preds)}")
print(f"f1 Score: {f1_score(y_test, y_preds)}")

In [None]:
y_preds[:10]

In [None]:
np.array(y_test[:10])

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(y_test, y_preds)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

report = classification_report(y_test, y_preds)
print("Classification Report:")
print(report)

# Analysis

In [None]:
importances = clf.feature_importances_
sorted_indices = np.argsort(importances)

plt.figure(figsize=(16, 9))
sns.barplot(y=X_train.columns[sorted_indices], x=importances[sorted_indices])
plt.xlabel("Relative Importance")
plt.ylabel("Features")
plt.title("Feature Importances")
plt.show()

In [None]:
column_names = cleaned_df.columns

# Print the column names
print(column_names)

num_columns = cleaned_df.shape[1]

# Print the number of columns
print("Number of columns:", num_columns)

# Main Code Block for the tkinter

In [None]:
### #HeartDisease	BMI	Smoking	AlcoholDrinking	Stroke	PhysicalHealth	MentalHealth	DiffWalking	Sex	AgeCategory	Race	Diabetic	PhysicalActivity	GenHealth	SleepTime	Asthma	KidneyDisease	SkinCancer
name = input("What is your Name: ")
print("Hi", name, "!")

age_value = None
while age_value is None:
    age = input("How old are you?")
    age_input = int(age)
    if 18 <= age_input <= 24:
        age_value = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    elif 25 <= age_input <= 29:
        age_value = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    elif 30 <= age_input <= 34:
        age_value = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    elif 35 <= age_input <= 39:
        age_value = [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    elif 40 <= age_input <= 44:
        age_value = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
    elif 45 <= age_input <= 49:
        age_value = [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
    elif 50 <= age_input <= 54:
        age_value = [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
    elif 55 <= age_input <= 59:
        age_value = [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
    elif 60 <= age_input <= 64:
        age_value = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
    elif 65 <= age_input <= 79:
        age_value = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]
    elif 70 <= age_input <= 74:
        age_value = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]
    elif 79 <= age_input <= 84:
        age_value = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]
    elif 85 < age_input:
        age_value = [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
    else:
        print("Please enter your age as a number")

sex_value = None
while sex_value is None:
    sex_input = input("What is your sex?")
    if sex_input.lower() == "male":
        sex_value = [0, 1]  
    elif sex_input.lower() == "female":
        sex_value = [1, 0]  
    else:
        print("Please enter male or female")

    
def calculate_bmi(weight, height_ft, height_in):
    height = (height_ft * 12) + height_in
    bmi = (weight / (height * height)) * 703
    return bmi

weight = float(input("What is your weight in pounds? "))
height_input = input("What is your height (e.g., 6'1)? ")
height_parts = height_input.split("'")
height_ft = int(height_parts[0])
height_in = int(height_parts[1].replace('"', ''))
bmi = calculate_bmi(weight, height_ft, height_in)
print("Your BMI is:", bmi)

sleep_value = None
while sleep_value is None:
    sleep_input = input("How many hours do you sleep a night?")
    sleep_value = int(sleep_input)
    
physicalhealth_value = None
while physicalhealth_value is None:
    physicalhealth_input = input("How many days within the last 30 days have you experienced physical illness and injury?")
    physicalhealth_value = int(physicalhealth_input)

smoking_value = None
while smoking_value is None:
    smoking_input = input("Do you smoke?")
    if smoking_input.lower() == "yes":
        smoking_value = [0, 1]  
    elif smoking_input.lower() == "no":
        smoking_value = [1, 0]  
    else:
        print("Please enter yes or no")

alcohol_value = None
while alcohol_value is None:
    alcohol_input = input("Do you drink alcohol?")
    if alcohol_input.lower() == "yes":
        alcohol_value = [0, 1]  
    elif alcohol_value.lower() == "no":
        alcohol_value = [1, 0]  
    else:
        print("Invalid smoking input.")

stroke_value = None
while stroke_value is None:
    stroke_input = input("Have you ever had a stroke?")
    if stroke_input.lower() == "yes":
        stroke_value = [0, 1]  
    elif stroke_input.lower() == "no":
        stroke_value = [1, 0]  
    else:
        print("Please enter yes or no")
        
diff_value = None
while diff_value is None:
    diff_input = input("Do you have difficulty walking or climbing stairs?")
    if diff_input.lower() == "yes":
        diff_value = [0, 1]  
    elif diff_input.lower() == "no":
        diff_value = [1, 0]  
    else:
        print("Please enter yes or no")

Diabetic_value = None
while Diabetic_value is None:
    Diabetic = input("Please select that best discribe you: 1) I am diabetic 2) I was diabetic durring pregnacy 3) I am at risk of diabetes  4) I am not diabetic")
    Diabetic_input = int(Diabetic)
    if Diabetic_input == 1:   
        Diabetic_value = [0, 0, 0, 1]
    elif Diabetic_input == 2:
        Diabetic_value = [0, 0, 1, 0]
    elif Diabetic_input == 3:
        Diabetic_value = [0, 1, 0, 0]
    elif Diabetic_input == 4:
        Diabetic_value = [1, 0, 0, 0]
    else:
        print("Please enter 1 - 4")

asthma_value = None
while asthma_value is None:
    asthma_input = input("Do you have asthma?")
    if asthma_input.lower() == "yes":
        asthma_value = [0,1]
    elif asthma_input.lower() == "no":
        asthma_value = [1,0]
    else: 
        print ("Please enter yes or no")
        
kidney_value = None
while kidney_value is None:
    kidney_input = input("Do you have or had kidney disease?")
    if kidney_input.lower() == "yes":
        kidney_value = [0,1]
    elif kidney_input.lower() == "no":
        kidney_value = [1,0]
    else: 
        print ("Please enter yes or no")

skin_value = None
while skin_value is None:
    skin_input = input("Do you have or had skin cancer?")
    if skin_input.lower() == "yes":
        skin_value = [0,1]
    elif skin_input.lower() == "no":
        skin_value = [1,0]
    else: 
        print ("Please enter yes or no")

health_value = None
while health_value is None:
    health = input("Please select that best describe your general health: 1) Excellent 2) Very Good 3) Good 4) Fair  5) Poor")
    health_input = int(health)
    if health_input == 1:   
        health_value = [1, 0, 0, 0, 0]
    elif health_input == 2:
        health_value = [0, 0, 0, 0, 1]
    elif health_input == 3:
        health_value = [0, 0, 1, 0, 0]
    elif health_input == 4:
        health_value = [0, 1, 0, 0, 0]
    elif health_input == 5:
        health_value = [0, 0, 0, 1, 0]
    else:
        print("Please enter 1 - 5")

input_values = [bmi]
input_values.append(physicalhealth_value)
input_values.append(mentalhealth_value)
input_values.extend(smoking_value)  
input_values.extend(alcohol_value)  
input_values.extend(stroke_value)
input_values.extend(diff_value)
input_values.extend(sex_value)
input_values.extend(age_value)
input_values.extend(Diabetic_value)
input_values.extend(asthma_value)
input_values.extend(kidney_value)
input_values.extend(skin_value)
input_values.extend(health_value)

## predictor
new_input = [(input_values)]  
print(new_input)

predictions = clf.predict(new_input)
  
for prediction in predictions:
    if prediction == 0:
        print("Based on your responses it is likley you are not currently at risk of heart disease.")
    else:
        print("Based on your responses it is likey you are at risk of heart disease." )