In [15]:
!pip install pandas




In [16]:
import pandas as pd

In [39]:
file_paths = {
    'Heart Disease': '/Users/dev/Downloads/Disease Datasets/Heart Disease.csv',
    'Obesity': '/Users/dev/Downloads/Disease Datasets/Obesity.csv',
    'Diabetes': '/Users/dev/Downloads/Disease Datasets/Diabetes.csv'
}

In [40]:
def load_dataset(file_path):
    df = pd.read_csv(file_path)
    print("First few rows of the dataset:")
    print(df.head())
    print()
    return df

In [41]:
datasets = {}
for disease, path in file_paths.items():
    print(f"Loading dataset for {disease}...")
    datasets[disease] = load_dataset(path)
    print()

Loading dataset for Heart Disease...
First few rows of the dataset:
   HeartDiseaseorAttack  HighBP  HighChol  CholCheck   BMI  Smoker  Stroke  \
0                   0.0     1.0       1.0        1.0  40.0     1.0     0.0   
1                   0.0     0.0       0.0        0.0  25.0     1.0     0.0   
2                   0.0     1.0       1.0        1.0  28.0     0.0     0.0   
3                   0.0     1.0       0.0        1.0  27.0     0.0     0.0   
4                   0.0     1.0       1.0        1.0  24.0     0.0     0.0   

   Diabetes  PhysActivity  Fruits  ...  AnyHealthcare  NoDocbcCost  GenHlth  \
0       0.0           0.0     0.0  ...            1.0          0.0      5.0   
1       0.0           1.0     0.0  ...            0.0          1.0      3.0   
2       0.0           0.0     1.0  ...            1.0          1.0      5.0   
3       0.0           1.0     1.0  ...            1.0          0.0      2.0   
4       0.0           1.0     1.0  ...            1.0          0.0  

In [42]:
def handle_missing_values(df):
    # Check for missing values
    missing_values = df.isnull().sum()
    
    if missing_values.sum() == 0:
        print("No missing values found.")
    else:
        print("Missing values found. Handling...")
        # Handle missing values based on your preferred method
        # For example, you can impute missing values with the mean or median for numerical columns,
        # and with the mode for categorical columns
        # Here's a simple approach using fillna() to impute missing values with the mean for numerical columns
        for col in df.columns:
            if df[col].dtype == 'float64' or df[col].dtype == 'int64':
                df[col] = df[col].fillna(df[col].mean())
            elif df[col].dtype == 'object':
                df[col] = df[col].fillna(df[col].mode()[0])  # Fill categorical columns with mode

        print("Missing values handled.")
    
    return df
# Handle missing values for each dataset
for disease, dataset in datasets.items():
    print(f"Handling missing values for {disease} dataset...")
    datasets[disease] = handle_missing_values(dataset)
    print()

# Verify if missing values are handled
for disease, dataset in datasets.items():
    print(f"Missing values after handling for {disease} dataset:")
    print(dataset.isnull().sum())
    print()

Handling missing values for Heart Disease dataset...
No missing values found.

Handling missing values for Obesity dataset...
No missing values found.

Handling missing values for Diabetes dataset...
No missing values found.

Missing values after handling for Heart Disease dataset:
HeartDiseaseorAttack    0
HighBP                  0
HighChol                0
CholCheck               0
BMI                     0
Smoker                  0
Stroke                  0
Diabetes                0
PhysActivity            0
Fruits                  0
Veggies                 0
HvyAlcoholConsump       0
AnyHealthcare           0
NoDocbcCost             0
GenHlth                 0
MentHlth                0
PhysHlth                0
DiffWalk                0
Sex                     0
Age                     0
Education               0
Income                  0
dtype: int64

Missing values after handling for Obesity dataset:
Gender                            0
Age                               0
Height  

In [43]:
!pip install tensorflow



In [44]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Assuming 'datasets' has been defined and preprocessed as previously discussed

# Data preparation
X = datasets['Heart Disease'].drop('HeartDiseaseorAttack', axis=1).values
y = datasets['Heart Disease']['HeartDiseaseorAttack'].values

# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Neural Network Architecture
model = Sequential([
    Dense(16, input_dim=X_train.shape[1], activation='relu'),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=10, validation_split=0.2, verbose=1)

# Evaluating the model
_, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# Predicting and converting to percentage
predictions = model.predict(X_test)
percent_predictions = predictions * 100

# Print example outputs
print("Example of percentage predictions for the first 5 individuals:")
for i, prediction in enumerate(percent_predictions[:5]):
    print(f"Individual {i+1}: {prediction[0]:.2f}% chance of having Heart Disease")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m16236/16236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 1ms/step - accuracy: 0.8964 - loss: 0.2698 - val_accuracy: 0.9067 - val_loss: 0.2417
Epoch 2/100
[1m16236/16236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 1ms/step - accuracy: 0.9070 - loss: 0.2374 - val_accuracy: 0.9074 - val_loss: 0.2383
Epoch 3/100
[1m16236/16236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 958us/step - accuracy: 0.9074 - loss: 0.2384 - val_accuracy: 0.9075 - val_loss: 0.2389
Epoch 4/100
[1m16236/16236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 893us/step - accuracy: 0.9078 - loss: 0.2365 - val_accuracy: 0.9077 - val_loss: 0.2385
Epoch 5/100
[1m16236/16236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 914us/step - accuracy: 0.9083 - loss: 0.2351 - val_accuracy: 0.9076 - val_loss: 0.2392
Epoch 6/100
[1m16236/16236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 914us/step - accuracy: 0.9088 - loss: 0.2345 - val_accuracy: 0.9075 - v

In [60]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Example dictionary input
user_input = {
    'HighBP': 1,
    'HighChol': 1,
    'CholCheck': 1,
    'BMI': 26,
    'Smoker': 1,
    'Stroke': 1,
    'Diabetes': 0,
    'PhysActivity': 0,
    'Fruits': 0,
    'Veggies': 0,
    'HvyAlcoholConsump': 1,
    'AnyHealthcare': 1,
    'NoDocbcCost': 0,
    'GenHlth': 4,
    'MentHlth': 7,
    'PhysHlth': 3,
    'DiffWalk': 0,
    'Sex': 1,
    'Age': 7,
    'Education': 1,
    'Income': 6
}

# Assume 'scaler' and 'model' are already defined and trained as discussed previously
# You need to make sure to transform new data in the same way as the training data

# Function to process the input dictionary
def preprocess_input(user_dict, feature_order, scaler):
    # Convert dictionary to DataFrame
    df = pd.DataFrame([user_dict])
    # Order columns as per the training data
    df = df[feature_order]
    # Scale features
    scaled_features = scaler.transform(df)
    return scaled_features

# Feature order used in the training model
feature_order = ['HighBP', 'HighChol', 'CholCheck', 'BMI', 'Smoker', 'Stroke', 'Diabetes',
                 'PhysActivity', 'Fruits', 'Veggies', 'HvyAlcoholConsump', 'AnyHealthcare',
                 'NoDocbcCost', 'GenHlth', 'MentHlth', 'PhysHlth', 'DiffWalk', 'Sex',
                 'Age', 'Education', 'Income']

# Preprocess user input
processed_input = preprocess_input(user_input, feature_order, scaler)

# Make prediction
prediction = model.predict(processed_input)
percentage = prediction[0][0] * 100  # Convert to percentage

print(f"Probability of having Heart Disease is {percentage:.2f}%")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Probability of having Heart Disease is 25.76%


