In [1]:
import pandas as pd

# Load the dataset
file_path = 'Data/Dataa.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset to understand its structure
data.head()


Unnamed: 0,age,age_group,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75,Elderly,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55,Middle-aged,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65,Elderly,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50,Middle-aged,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65,Elderly,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [2]:
# Transform 'age' column
def age_group(age):
    if 40 <= age < 60:
        return 'Middle-aged'
    elif 60 <= age < 80:
        return 'Elderly'
    elif 80 <= age <= 100:
        return 'Senior Citizens'
    else:
        return 'Unknown'

# Transform 'serum_creatinine' column
def creatinine_level(creatinine):
    if creatinine < 1.0:
        return 'Low'
    elif 1.0 <= creatinine <= 1.5:
        return 'Normal'
    elif 1.5 < creatinine <= 2.0:
        return 'High'
    else:
        return 'Very High'

# Apply transformations
data['age_group'] = data['age'].apply(age_group)
data['creatinine_level'] = data['serum_creatinine'].apply(creatinine_level)

# Select relevant columns for the classifier
data = data[['age_group', 'anaemia', 'creatinine_level', 'high_blood_pressure', 'DEATH_EVENT']]

# Display the transformed dataset
data.head()


Unnamed: 0,age_group,anaemia,creatinine_level,high_blood_pressure,DEATH_EVENT
0,Elderly,0,High,1,1
1,Middle-aged,0,Normal,0,1
2,Elderly,0,Normal,0,1
3,Middle-aged,1,High,0,1
4,Elderly,1,Very High,0,1


In [3]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import accuracy_score

# Encode categorical variables
label_encoders = {}
for column in ['age_group', 'creatinine_level']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Define features and target variable
X = data[['age_group', 'anaemia', 'creatinine_level', 'high_blood_pressure']]
y = data['DEATH_EVENT']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Naive Bayes classifier
model = CategoricalNB()
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

accuracy


0.6666666666666666

In [4]:
# Encode the given input using the label encoders
input_data = {
    'age_group': label_encoders['age_group'].transform(['Middle-aged'])[0],
    'anaemia': 0,
    'creatinine_level': label_encoders['creatinine_level'].transform(['Normal'])[0],
    'high_blood_pressure': 1
}

# Convert input data to DataFrame
input_df = pd.DataFrame([input_data])

# Predict the class for the given input
prediction = model.predict(input_df)[0]

# Output the prediction
prediction


0