CHRONIC DISEASE PREDICTION

In [11]:
# HEART DISEASE PREDICTION

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB

# Load the dataset
file_path = 'heart.csv'  # Make sure to upload the file to Google Colab first
heart_data = pd.read_csv(file_path)

# Encode categorical variables
label_encoders = {}
categorical_columns = ['Sex', 'RestingECG', 'ExerciseAngina', 'ST_Slope']
for column in categorical_columns:
    le = LabelEncoder()
    heart_data[column] = le.fit_transform(heart_data[column])
    label_encoders[column] = le

# Split the data into features and target
X = heart_data.drop('HeartDisease', axis=1)
y = heart_data['HeartDisease']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train Naive Bayes Classifier
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

# Function to get input from user
print("Please enter the following data :")
def get_user_input():
    inputs = {}
    inputs['Age'] = float(input("Enter Age: "))
    inputs['Sex'] = input("Enter Sex (M/F): ").upper()
    inputs['RestingBP'] = float(input("Enter RestingBP: "))
    inputs['Cholesterol'] = float(input("Enter Cholesterol: "))
    inputs['FastingBS'] = int(input("Enter FastingBS (0 or 1): "))
    inputs['RestingECG'] = input("Enter RestingECG (Normal/ST/LVH): ").strip()
    if inputs['RestingECG'].lower() == 'normal':
        inputs['RestingECG'] = 'Normal'
    elif inputs['RestingECG'].lower() == 'st':
        inputs['RestingECG'] = 'ST'
    elif inputs['RestingECG'].lower() == 'lvh':
        inputs['RestingECG'] = 'LVH'
    inputs['MaxHR'] = float(input("Enter MaxHR: "))
    inputs['ExerciseAngina'] = input("Enter ExerciseAngina (Y/N): ").upper()
    inputs['Oldpeak'] = float(input("Enter Oldpeak: "))
    inputs['ST_Slope'] = input("Enter ST_Slope (Up/Flat/Down): ").strip()
    if inputs['ST_Slope'].lower() == 'up':
        inputs['ST_Slope'] = 'Up'
    elif inputs['ST_Slope'].lower() == 'flat':
        inputs['ST_Slope'] = 'Flat'
    elif inputs['ST_Slope'].lower() == 'down':
        inputs['ST_Slope'] = 'Down'
    return inputs

# Function to encode user inputs using label encoders
def encode_inputs(inputs):
    encoded_inputs = []
    for column in X.columns:
        if column in label_encoders:
            encoded_input = label_encoders[column].transform([inputs[column]])[0]
        else:
            encoded_input = inputs[column]
        encoded_inputs.append(encoded_input)
    return encoded_inputs

# Function to predict heart disease risk
def predict_heart_disease(encoded_inputs):
    prediction = nb_classifier.predict([encoded_inputs])
    return "Heart Disease Risk: High" if prediction[0] == 1 else "Heart Disease Risk: Low"

# Get user inputs
user_inputs = get_user_input()

# Encode user inputs
encoded_user_inputs = encode_inputs(user_inputs)

# Predict and print the result
prediction_result = predict_heart_disease(encoded_user_inputs)
print(prediction_result)



Please enter the following data :
Enter Age: 22
Enter Sex (M/F): f
Enter RestingBP: 21
Enter Cholesterol: 321
Enter FastingBS (0 or 1): 1
Enter RestingECG (Normal/ST/LVH): normal
Enter MaxHR: 23
Enter ExerciseAngina (Y/N): n
Enter Oldpeak: 12
Enter ST_Slope (Up/Flat/Down): up
Heart Disease Risk: High




In [13]:
# DIABETES PREDICTION

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Function to get valid categorical input
def get_valid_input(prompt, valid_options):
    while True:
        user_input = input(prompt).lower()
        if user_input in valid_options:
            return user_input
        else:
            print("Invalid input. Please try again.")

# Function to get numeric input
def get_numeric_input(prompt):
    while True:
        try:
            return float(input(prompt))
        except ValueError:
            print("Invalid input. Please enter a numeric value.")

# Function to predict diabetes based on user input
def predict_diabetes(knn_model, nb_model, scaler):
    print("Please enter the following details:")

    gender = get_valid_input("Gender (Male/Female): ", ['male', 'female'])
    age = get_numeric_input("Age: ")
    hypertension = get_valid_input("Hypertension (Yes/No): ", ['yes', 'no'])
    heart_disease = get_valid_input("Heart Disease (Yes/No): ", ['yes', 'no'])
    smoking_history = get_valid_input("Smoking History (Never, Former, Ever, Current, Not Current, No Info): ", ['never', 'former', 'ever', 'current', 'not current', 'no info'])

    bmi = get_numeric_input("BMI: ")
    hba1c_level = get_numeric_input("HbA1c Level: ")
    blood_glucose_level = get_numeric_input("Blood Glucose Level: ")

    # Converting inputs to encoded values
    gender_encoded = 1 if gender == 'male' else 0
    hypertension_encoded = 1 if hypertension == 'yes' else 0
    heart_disease_encoded = 1 if heart_disease == 'yes' else 0
    smoking_history_encoded = {'never': 4, 'former': 3, 'ever': 2, 'current': 1, 'not current': 5, 'no info': 0}[smoking_history]

    # Preparing the input for prediction
    user_input = np.array([gender_encoded, age, hypertension_encoded, heart_disease_encoded, smoking_history_encoded, bmi, hba1c_level, blood_glucose_level]).reshape(1, -1)
    scaled_input = scaler.transform(user_input)

    # Making predictions
    knn_prediction = knn_model.predict(scaled_input)
    nb_prediction = nb_model.predict(scaled_input)

    if knn_prediction[0] == 1 or nb_prediction[0] == 1:
        print("\nThere are high chances of diabetes.")
    else:
        print("\nThere are low chances of diabetes.")

# Load and preprocess the dataset
file_path = 'diabetes_prediction_dataset.csv'
data = pd.read_csv(file_path)

data['gender'] = LabelEncoder().fit_transform(data['gender'])
data['smoking_history'] = LabelEncoder().fit_transform(data['smoking_history'])

X = data.drop('diabetes', axis=1)
y = data['diabetes']

# Data splitting and scaling
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Model training
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
nb = GaussianNB()
nb.fit(X_train, y_train)

# Model evaluation
knn_pred = knn.predict(X_test)
knn_accuracy = accuracy_score(y_test, knn_pred)
nb_pred = nb.predict(X_test)
nb_accuracy = accuracy_score(y_test, nb_pred)

#print("KNN Accuracy:", knn_accuracy)
#print("Naive Bayes Accuracy:", nb_accuracy)

# Making predictions
predict_diabetes(knn, nb, scaler)

Please enter the following details:
Gender (Male/Female): male
Age: 22
Hypertension (Yes/No): yes
Heart Disease (Yes/No): no
Smoking History (Never, Former, Ever, Current, Not Current, No Info): current
BMI: 23
HbA1c Level: dfdhgh
Invalid input. Please enter a numeric value.
HbA1c Level: 234
Blood Glucose Level: 2

There are high chances of diabetes.




In [7]:
# KIDNEY DISEASE PREDICTION

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Load the dataset
file_path = '/content/chronic_kidney_disease_full.xlsx'  
ckd_data = pd.read_excel(file_path)

# Data Cleaning: Remove rows and columns with too many missing values
threshold = 0.3  # Threshold for removing columns
ckd_data.replace('?', np.nan, inplace=True)
ckd_data.dropna(axis=0, thresh=int(threshold * ckd_data.shape[1]), inplace=True)
ckd_data.dropna(axis=1, thresh=int(threshold * ckd_data.shape[0]), inplace=True)

# Convert columns to appropriate data types
for col in ckd_data.columns:
    if ckd_data[col].nunique() < 10:
        ckd_data[col] = ckd_data[col].astype('category')
    else:
        ckd_data[col] = pd.to_numeric(ckd_data[col], errors='coerce')


# Separating features (X) and the target variable (y)
X = ckd_data.drop(['class','sg','su','bgr','bu','sc','sod','pot','htn','dm'],axis=1)
y = ckd_data['class']

# Encoding categorical variables using Label Encoding
label_encoders = {}
for column in X.select_dtypes(include=['category']).columns:
    label_encoders[column] = LabelEncoder()
    X[column] = label_encoders[column].fit_transform(X[column].astype(str))

# Add a special category '<unknown>' for handling unseen labels in user input
for column in label_encoders:
    label_encoders[column].classes_ = np.append(label_encoders[column].classes_, '<unknown>')

def handle_unknowns(row):
    for col in label_encoders:
        if row[col] not in label_encoders[col].classes_:
            row[col] = '<unknown>'
    return row

# Impute missing values using the mean for each column
imputer = SimpleImputer(strategy='mean')
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

# Feature Scaling using StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting the dataset into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Training the K-Nearest Neighbors (KNN) model
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)

# Training the Naive Bayes model
nb = GaussianNB()
nb.fit(X_train, y_train)

#Abbrevation used are as follows:

print("ABBREVIATIONS USED :")
print("\n'age'  : 'Age'")
print("'bp'   : 'Blood Pressure (mmHg)'")
print("'al'   : 'Albumin Levels in Urine'")
print("'su'   : 'Sugar Levels in Urine'")
print("'rbc'  : 'Red Blood Cell Count'")
print("'pc'   : 'Pus Cell Count in Urine'")
print("'pcc'  : 'Presence of Pus Cell Clumps'")
print("'ba'   : 'Presence of Bacteria'")
print("'bgr'  : 'Blood Glucose Random'")
print("'bu'   : 'Blood Urea'")
print("'sc'   : 'Serum Creatinine'")
print("'sod'  : 'Sodium'")
print("'pot'  : 'Potassium'")
print("'hemo' : 'Hemoglobin'")
print("'pcv'  : 'Packed Cell Volume'")
print("'wbcc' : 'White Blood Cell Count'")
print("'rbcc' : 'Red Blood Cell Count'")
print("'htn'  : 'Hypertension'")
print("'dm'   : 'Diabetes Mellitus'")
print("'cad'  : 'Coronary Artery Disease'")
print("'appet': 'Appetite'")
print("'pe'   : 'Pedal Edema'")
print("'ane'  : 'Anemia'")
print("'class': 'Chronic Kidney Disease (CKD)'\n")


# Function to take user input and predict kidney disease
def predict_kidney_disease():
    input_data = []

    # Selecting a subset of important columns for user input
   # important_columns = ['age', 'bp', 'al', 'rbc', 'pcc', 'hemo', 'pcv','pe', 'ane']

    for column in X:
        if column in ['rbc', 'pc']:
            print(f"Enter {column} (normal/abnormal):")
            value = input().strip().lower()
            while value not in ['normal', 'abnormal']:
                print(f"Please enter 'normal' or 'abnormal' for {column}:")
                value = input().strip().lower()
            input_data.append(value)
        elif column in ['htn', 'dm', 'cad', 'appet']:
            print(f"Enter {column} (yes/no):")
            value = input().strip().lower()
            while value not in ['yes', 'no']:
                print(f"Please enter 'yes' or 'no' for {column}:")
                value = input().strip().lower()
            input_data.append(value)
        elif column in ['pcc', 'ba']:
            print(f"Enter {column} (present/notpresent):")
            value = input().strip().lower()
            while value not in ['present', 'notpresent']:
                print(f"Please enter 'present' or 'notpresent' for {column}:")
                value = input().strip().lower()
            input_data.append(value)
        elif column in ['appet']:
            print(f"Enter {column} (good/poor):")
            value = input().strip().lower()
            while value not in ['good', 'poor']:
                print(f"Please enter 'good' or 'poor' for {column}:")
                value = input().strip().lower()
            input_data.append(value)
        elif column in ['htn', 'dm', 'cad', 'pe', 'ane']:
            print(f"Enter {column} (yes/no):")
            value = input().strip().lower()
            while value not in ['yes', 'no']:
                print(f"Please enter 'yes' or 'no' for {column}:")
                value = input().strip().lower()
            input_data.append(value)
        else:
            print(f"Enter {column}:")
            value = input()
            while not value.replace(".", "").isdigit():
                print(f"Please enter a numeric value for {column}:")
                value = input()
            input_data.append(value)

    # Preprocess input data
    input_df = pd.DataFrame([input_data], columns=X.columns)
    input_df = input_df.apply(handle_unknowns, axis=1)
    for col in input_df.columns:
        if col in label_encoders:
            input_df[col] = label_encoders[col].transform(input_df[col])
        else:
            input_df[col] = pd.to_numeric(input_df[col], errors='coerce')

    input_df = pd.DataFrame(imputer.transform(input_df), columns=input_df.columns)
    input_scaled = scaler.transform(input_df)

    # Making predictions
    knn_pred = knn.predict(input_scaled)[0]
    nb_pred = nb.predict(input_scaled)[0]

    # Print final prediction
    if knn_pred == nb_pred:
      print("\nHigh chances of chronic kidney disease")
    else:
        print("\nlow chances of chronic kidney disease")
# Call the function to take user input and predict
predict_kidney_disease()

ABBREVIATIONS USED :

'age'  : 'Age'
'bp'   : 'Blood Pressure (mmHg)'
'al'   : 'Albumin Levels in Urine'
'su'   : 'Sugar Levels in Urine'
'rbc'  : 'Red Blood Cell Count'
'pc'   : 'Pus Cell Count in Urine'
'pcc'  : 'Presence of Pus Cell Clumps'
'ba'   : 'Presence of Bacteria'
'bgr'  : 'Blood Glucose Random'
'bu'   : 'Blood Urea'
'sc'   : 'Serum Creatinine'
'sod'  : 'Sodium'
'pot'  : 'Potassium'
'hemo' : 'Hemoglobin'
'pcv'  : 'Packed Cell Volume'
'wbcc' : 'White Blood Cell Count'
'rbcc' : 'Red Blood Cell Count'
'htn'  : 'Hypertension'
'dm'   : 'Diabetes Mellitus'
'cad'  : 'Coronary Artery Disease'
'appet': 'Appetite'
'pe'   : 'Pedal Edema'
'ane'  : 'Anemia'
'class': 'Chronic Kidney Disease (CKD)'

Enter age:
22
Enter bp:
3
Enter al:
2
Enter rbc (normal/abnormal):
normal
Enter pc (normal/abnormal):
abnormal
Enter pcc (present/notpresent):
present
Enter ba (present/notpresent):
present
Enter hemo:
45
Enter pcv:
3
Enter wbcc:
10000
Enter rbcc:
323
Enter cad (yes/no):
yes
Enter appet (yes/no