In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical


In [10]:
# Load the dataset
data = pd.read_csv('final_updated_health_dataset.csv')

# Display the first few rows of the dataset
data.head()

Unnamed: 0,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,Cardiac_Attack_Chances
0,18393,1,168,62.0,110,80,0,0,0,0,1,20
1,20228,0,156,85.0,140,90,2,0,0,0,1,100
2,18857,0,165,64.0,130,70,2,0,0,0,0,100
3,17623,1,169,82.0,150,100,0,0,0,0,1,100
4,17474,0,156,56.0,100,60,0,0,0,0,0,20


In [11]:
# # Define a dictionary for mapping the old values to new labels
# value_map = {20: "low", 40: "medium", 60: "high", 80: "very high", 100: "severe"}

# # Use the replace method to change values in the column
# data['Cardiac_Attack_Chances'] = data['Cardiac_Attack_Chances'].replace(value_map)
# data.head()

In [12]:
# Basic info about the dataset
data.info()
data.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70000 entries, 0 to 69999
Data columns (total 12 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   age                     70000 non-null  int64  
 1   gender                  70000 non-null  int64  
 2   height                  70000 non-null  int64  
 3   weight                  70000 non-null  float64
 4   ap_hi                   70000 non-null  int64  
 5   ap_lo                   70000 non-null  int64  
 6   cholesterol             70000 non-null  int64  
 7   gluc                    70000 non-null  int64  
 8   smoke                   70000 non-null  int64  
 9   alco                    70000 non-null  int64  
 10  active                  70000 non-null  int64  
 11  Cardiac_Attack_Chances  70000 non-null  int64  
dtypes: float64(1), int64(11)
memory usage: 6.4 MB


Unnamed: 0,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,Cardiac_Attack_Chances
count,70000.0,70000.0,70000.0,70000.0,70000.0,70000.0,70000.0,70000.0,70000.0,70000.0,70000.0,70000.0
mean,19468.865814,0.349571,164.359229,74.20569,128.817286,96.630414,0.366871,0.226457,0.088129,0.053771,0.803729,64.460857
std,2467.251667,0.476838,8.210126,14.395757,154.011419,188.47253,0.68025,0.57227,0.283484,0.225568,0.397179,37.301866
min,10798.0,0.0,55.0,10.0,-150.0,-70.0,0.0,0.0,0.0,0.0,0.0,20.0
25%,17664.0,0.0,159.0,65.0,120.0,80.0,0.0,0.0,0.0,0.0,1.0,20.0
50%,19703.0,0.0,165.0,72.0,120.0,80.0,0.0,0.0,0.0,0.0,1.0,100.0
75%,21327.0,1.0,170.0,82.0,140.0,90.0,1.0,0.0,0.0,0.0,1.0,100.0
max,23713.0,1.0,250.0,200.0,16020.0,11000.0,2.0,2.0,1.0,1.0,1.0,100.0


In [13]:
# Convert age from days to years for better interpretability
data['age'] = data['age'] / 365

# Handling outliers for blood pressure readings
data = data[(data['ap_hi'] >= 50) & (data['ap_hi'] <= 200)]
data = data[(data['ap_lo'] >= 30) & (data['ap_lo'] <= 120)]

# Exploratory Data Analysis (EDA)
# Visualizations (Histograms, boxplots, etc.)
# ... [Your EDA code here] ...

In [14]:
# Map the 'Cardiac_Attack_Chances' to categorical values
chance_mapping = {20: 0, 40: 1, 60: 2, 80: 3, 100: 4}
data['Cardiac_Attack_Chances'] = data['Cardiac_Attack_Chances'].map(chance_mapping)

# Features and Target Variable
X = data.drop('Cardiac_Attack_Chances', axis=1)
y = to_categorical(data['Cardiac_Attack_Chances'])

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [15]:
# Building the neural network model for classification
model = Sequential([
    Dense(64, input_dim=11, activation='relu'),  # Input layer with 11 features and first hidden layer with 64 neurons
    Dense(64, activation='relu'),  # Second hidden layer
    Dense(5, activation='softmax')  # Output layer for 5 classes
])

# Compiling the model for classification
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [16]:
# Training the model
model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test_scaled, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.8616682291030884, Test Accuracy: 0.6717168092727661


In [21]:
import json
import numpy as np
18393	1	168	62	110	80	0	0	0	0	1

json_data = '''
{
    "age": 60,
    "gender": 1,
    "height": 170,
    "weight": 80,
    "ap_hi": 100,
    "ap_lo": 80,
    "cholesterol": 0,
    "gluc": 1,
    "smoke":1,
    "alco": 0,
    "active": 1
}
'''

# Parse the JSON data into a Python dictionary
data_dict = json.loads(json_data)

# Create a NumPy array from the dictionary values
numpy_array = np.array(list(data_dict.values()))
numpy_array = numpy_array.reshape(1, -1)
# Print the resulting NumPy array
print(numpy_array)


[[ 60   1 170  80 100  80   0   0   0   0   1]]


In [34]:
numpy_array = [[10,0,150,35,80,80,0,0,0,0,1]]

In [35]:
my_prediction = model.predict(numpy_array)



In [38]:
model.save('model.h5')

In [36]:
my_prediction

array([[0.0000000e+00, 2.7902308e-20, 0.0000000e+00, 1.6338943e-16,
        1.0000000e+00]], dtype=float32)

In [37]:

# Define the class labels
class_labels = ["low", "medium", "high", "very high", "severe"]

# Find the index of the maximum probability
predicted_class_index = np.argmax(my_prediction)

# Get the predicted class label
predicted_class = class_labels[predicted_class_index]

print("Predicted Class:", predicted_class)

Predicted Class: severe
