In [None]:
import joblib
import pandas as pd

# Function to preprocess data for prediction
def preprocess_data_for_prediction(data, disease_type):
    # Load encoders and scaler
    encoder = joblib.load(f"models/{disease_type}_label_encoders.pkl")
    scaler = joblib.load(f"models/{disease_type}_scaler.pkl")

    # Load the feature names used during training
    feature_names = joblib.load(f"models/{disease_type}_features.pkl")  # Saved in training

    # Fill missing columns with 0 (or another appropriate value)
    for col in feature_names:
        if col not in data.columns:
            data[col] = 0  # Default value for missing columns

    # Ensure columns are in the correct order
    data = data[feature_names]

    # Encode categorical features
    for col in data.select_dtypes(include=['object']).columns:
        if col in encoder:
            data[col] = encoder[col].transform(data[col])

    # Standardize the features
    data_scaled = scaler.transform(data)

    return data_scaled

# Function to classify disease
def classify_disease():
    disease_type = input("Enter disease type (diabetes/heart/kidney): ").strip().lower()

    if disease_type not in ["diabetes", "heart", "kidney"]:
        print("Invalid disease type!")
        return

    # Collect user input
    if disease_type == "diabetes":
        pregnancies = int(input("Enter number of pregnancies: "))
        glucose = float(input("Enter glucose level: "))
        blood_pressure = float(input("Enter blood pressure: "))
        skin_thickness = float(input("Enter skin thickness: "))
        insulin = float(input("Enter insulin level: "))
        bmi = float(input("Enter BMI: "))
        diabetes_pedigree = float(input("Enter diabetes pedigree function: "))
        age = int(input("Enter age: "))
        data = pd.DataFrame([[pregnancies, glucose, blood_pressure, skin_thickness, insulin, bmi, diabetes_pedigree, age]],
                            columns=['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age'])
    elif disease_type == "heart":
        age = int(input("Enter age: "))
        sex = int(input("Enter sex (0 for female, 1 for male): "))
        chest_pain = int(input("Enter chest pain type (0-3): "))
        cholesterol = float(input("Enter cholesterol level: "))
        data = pd.DataFrame([[age, sex, chest_pain, cholesterol]],
                            columns=['Age', 'Sex', 'Chest Pain', 'Cholesterol'])
    else:  # Kidney
        age = int(input("Enter age: "))
        bp = float(input("Enter blood pressure: "))
        sugar = float(input("Enter blood sugar level: "))
        rbc = input("Enter red blood cell count (normal/abnormal): ")
        data = pd.DataFrame([[age, bp, sugar, rbc]],
                            columns=['Age', 'BloodPressure', 'BloodSugar', 'RBC'])

    # Preprocess the data for prediction
    data_scaled = preprocess_data_for_prediction(data, disease_type)

    # Load the trained model
    model = joblib.load(f"{disease_type}_xgb_model.pkl")

    # Predict
    prediction = model.predict(data_scaled)
    
    if prediction[0] == 1:
        print("\nYou might have the disease. Please consult a doctor.")
    else:
        print("\nYou do not have the disease.")

# Run the classification function
classify_disease()
