# **Import Libraries**

In [None]:
import numpy as np
import pandas as pd

# **Mount Database From Google Drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Upload File in Google Colab**

In [None]:
file_path='/content/drive/MyDrive/dataset/merged_medical_data.csv'
df = pd.read_csv(file_path)
df

Unnamed: 0,Gender,Disease,Glucose,Cholesterol,Hemoglobin,Platelets,White Blood Cells,Red Blood Cells,Hematocrit,Mean Corpuscular Volume,...,Triglycerides,HbA1c,LDL Cholesterol,HDL Cholesterol,ALT,AST,Heart Rate,Creatinine,Troponin,C-reactive Protein
0,Female,Healthy,126.133433,135.697821,17.223487,413126.659043,9633.481003,4.308518,40.112301,96.647243,...,102.038729,4.262777,94.413633,50.442121,15.021353,11.581306,74.124497,1.192936,0.030856,1.596364
1,Male,Liver Disease,73.076612,172.129506,14.380013,231268.420207,7935.663167,4.640516,40.330323,96.490459,...,56.520280,4.520946,76.787424,57.526713,73.418336,45.925814,81.824868,0.858017,0.013965,8.376125
2,Male,Liver Disease,88.990913,134.445934,14.007950,187658.874598,4738.452192,4.783822,48.188792,99.837856,...,147.221341,5.662226,96.326715,48.354164,43.857592,74.892397,72.178234,0.662849,0.013128,8.923041
3,Male,Hypercholesterolemia,81.433721,211.139585,15.633140,305052.014174,5007.593726,4.782938,48.181803,99.202799,...,159.887170,4.283218,197.321307,26.534159,17.922907,23.583538,92.310347,1.087095,0.027800,0.619570
4,Male,Liver Disease,78.649430,127.499634,17.116293,256392.808544,9380.790272,4.556660,50.100075,99.339765,...,86.092050,5.111424,107.699464,49.603548,54.851907,51.221055,78.347281,0.940594,0.029538,11.769606
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
349147,Male,Diabetes & Thalassemia & Anemia & Thrombocytop...,195.429648,283.571617,6.774627,90910.494038,5895.710504,3.301981,31.125314,68.959258,...,153.236683,8.434946,156.350440,32.855673,43.267858,51.551276,92.303219,3.949515,6.246726,9.184126
349148,Female,Diabetes & Thalassemia & Anemia & Thrombocytop...,192.904533,241.271032,7.459403,91886.849596,7423.406474,3.993353,34.686714,63.084707,...,202.124608,7.483220,138.705103,20.355824,46.100786,54.871108,100.319396,4.250698,9.767200,4.024387
349149,Female,Diabetes & Thalassemia & Anemia & Thrombocytop...,153.460912,248.549461,6.685512,105112.286240,8376.210372,3.192933,33.157727,67.634913,...,176.486043,11.893319,176.264933,20.965457,32.989377,39.858546,94.365884,1.976259,1.533946,3.546476
349150,Female,Diabetes & Thalassemia & Anemia & Thrombocytop...,160.192502,213.882117,8.938067,50117.113296,7994.611990,3.565482,25.963072,71.161767,...,172.085867,8.768746,141.181859,30.762153,45.856040,31.767431,104.236236,1.601472,2.456239,4.357553


# **Train Model Using Xgboost**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score
from sklearn.preprocessing import LabelEncoder # Import LabelEncoder

# Load dataset (assuming df is already loaded)
features = ['Gender', 'Glucose', 'Cholesterol', 'Hemoglobin', 'Platelets',
            'White Blood Cells', 'Red Blood Cells', 'Hematocrit', 'Mean Corpuscular Volume',
            'Mean Corpuscular Hemoglobin', 'Mean Corpuscular Hemoglobin Concentration',
            'Insulin', 'BMI', 'Systolic Blood Pressure', 'Diastolic Blood Pressure',
            'Triglycerides', 'HbA1c', 'LDL Cholesterol', 'HDL Cholesterol', 'ALT', 'AST',
            'Heart Rate', 'Creatinine', 'Troponin', 'C-reactive Protein']

# Select features and target
X = df[features].copy()  # Avoid modifying the original DataFrame
y = df['Disease']

# Convert categorical variables to numeric using Label Encoding
label_encoder = LabelEncoder()  # Create a LabelEncoder object
y = label_encoder.fit_transform(y)  # Fit and transform the target variable

# Convert categorical features to numeric using One-Hot Encoding
X = pd.get_dummies(X, columns=['Gender'], drop_first=True)  # "Female" -> 0, "Male" -> 1

# Convert all columns to numeric (forcefully)
# This loop is now redundant as Label Encoding and One-Hot Encoding already handle categorical features
# for col in X.columns:
#     X[col] = pd.to_numeric(X[col], errors='coerce')  # Convert and handle non-numeric values

# Handle missing values (replace NaN with column mean)
X.fillna(X.mean(), inplace=True)

# Train-test split (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Initialize and train KNN model
# knn = KNeighborsRegressor(n_neighbors=5)
# knn.fit(X_train, y_train)
# knn_predictions = knn.predict(X_test)
# knn_r2 = r2_score(y_test, knn_predictions)
# print(f"KNN Regression R² Score: {knn_r2:.4f}")

# Initialize and train XGBoost model
xgb = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
xgb.fit(X_train, y_train)
xgb_predictions = xgb.predict(X_test)
xgb_r2 = r2_score(y_test, xgb_predictions)
print(f"XGBoost Regression R² Score: {xgb_r2:.4f}")

joblib.dump(xgb, "disease_model.pkl")

XGBoost Regression R² Score: 0.9724


['disease_model.pkl']

In [None]:
# Model trained with r2_score of 0.9724
# It gives excellent accuracy
# load model using joblib

# **Install Libraries**

In [None]:
pip install gradio

Collecting gradio
  Downloading gradio-5.23.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

# **Import Label Encoder**

In [None]:
# import joblib and after load the model

In [None]:
import joblib
joblib.dump(label_encoder, "label_encoder.pkl")

['label_encoder.pkl']

# **Checking Trained Model**

In [None]:
import numpy as np
import pandas as pd

# Function to take user input and predict disease
def predict_disease():
    # Feature names
    features = [
        'Gender', 'Glucose', 'Cholesterol', 'Hemoglobin', 'Platelets',
        'White Blood Cells', 'Red Blood Cells', 'Hematocrit', 'Mean Corpuscular Volume',
        'Mean Corpuscular Hemoglobin', 'Mean Corpuscular Hemoglobin Concentration',
        'Insulin', 'BMI', 'Systolic Blood Pressure', 'Diastolic Blood Pressure',
        'Triglycerides', 'HbA1c', 'LDL Cholesterol', 'HDL Cholesterol', 'ALT', 'AST',
        'Heart Rate', 'Creatinine', 'Troponin', 'C-reactive Protein'
    ]

    # Dictionary to store user inputs
    user_data = {}

    # Get input from user
    print("\nEnter patient details (Enter 0 if unknown):")

    for feature in features:
        value = input(f"{feature}: ")

        # Convert numeric input
        if feature != 'Gender':
            value = float(value)  # Convert to float

            # Replace 0 with column mean
            if value == 0:
                value = X[feature].mean()  # Replace with dataset mean

        user_data[feature] = value

    # Convert Gender to numeric (1 for Male, 0 for Female)
    if user_data['Gender'].lower() == 'male':
        user_data['Gender_Male'] = 1
    else:
        user_data['Gender_Male'] = 0

    # Remove original 'Gender' key
    del user_data['Gender']

    # Convert to DataFrame
    user_df = pd.DataFrame([user_data])

    # Ensure the input matches training data columns
    user_df = user_df[X_test.columns]

    # Predict disease
    predicted_class = xgb.predict(user_df)[0]

    # Convert prediction back to original label
    predicted_label = label_encoder.inverse_transform([int(round(predicted_class))])[0]

    print(f"\n🔹 Predicted Disease: **{predicted_label}**")

# Run the function
predict_disease()



Enter patient details (Enter 0 if unknown):
Gender: female
Glucose: 82.155
Cholesterol: 158.77
Hemoglobin: 10.72
Platelets: 153955
White Blood Cells: 10967
Red Blood Cells: 3.82
Hematocrit: 25.86
Mean Corpuscular Volume: 73
Mean Corpuscular Hemoglobin: 27
Mean Corpuscular Hemoglobin Concentration: 30
Insulin: 10.66
BMI: 20.15
Systolic Blood Pressure: 115.105
Diastolic Blood Pressure: 65.201
Triglycerides: 146.76
HbA1c: 5.49
LDL Cholesterol: 124.068
HDL Cholesterol: 46.82
ALT: 22.43
AST: 29.15
Heart Rate: 72.23
Creatinine: 0.95
Troponin: 0.022
C-reactive Protein: 1.74

🔹 Predicted Disease: **Anemia & Thrombocytopenia & Hypertension & Coronary Artery Disease & Liver Disease & Kidney Disease & Heart Attack Risk**


In [None]:
# Enters input and it gaves proper output

# **Make Gradio Dashboard**

In [None]:
# making gradio dashboard for see proper output

In [None]:
import gradio as gr
import numpy as np
import pandas as pd
import joblib

# Load trained XGBoost model
model = joblib.load("disease_model.pkl")

# Load label encoder to inverse transform predicted labels
label_encoder = joblib.load("label_encoder.pkl")

# Load dataset for mean value calculation
# df = pd.read_csv('/content/drive/MyDrive/dataset/merged_medical_data.csv')

# Define feature columns
features = ['Gender', 'Glucose', 'Cholesterol', 'Hemoglobin', 'Platelets',
            'White Blood Cells', 'Red Blood Cells', 'Hematocrit', 'Mean Corpuscular Volume',
            'Mean Corpuscular Hemoglobin', 'Mean Corpuscular Hemoglobin Concentration',
            'Insulin', 'BMI', 'Systolic Blood Pressure', 'Diastolic Blood Pressure',
            'Triglycerides', 'HbA1c', 'LDL Cholesterol', 'HDL Cholesterol', 'ALT', 'AST',
            'Heart Rate', 'Creatinine', 'Troponin', 'C-reactive Protein']

# Replace zero values with mean of that column
def handle_missing_values(data):
    for i, value in enumerate(data):
        if value == 0:  # Replace 0 with mean
            data[i] = df[features[i]].mean()
    return data

# Prediction function
def predict_disease(*inputs):
    # Replace 0 with mean values
    processed_data = handle_missing_values(list(inputs))

    # Create DataFrame with inputs
    input_df = pd.DataFrame([processed_data], columns=features)

    # One-Hot Encode Gender
    input_df = pd.get_dummies(input_df, columns=['Gender'], drop_first=True)

    # Ensure all columns are present after encoding
    if 'Gender_Male' not in input_df.columns:
        input_df['Gender_Male'] = 0

    # Predict disease
    prediction_encoded = model.predict(input_df)[0]

    # Decode predicted label to disease name
    predicted_disease = label_encoder.inverse_transform([int(round(prediction_encoded))])[0]
    return f"Predicted Disease: {predicted_disease}"

# Define Gradio interface
inputs = [
    gr.Dropdown(['Female', 'Male'], label="Gender"),
    *[gr.Number(label=feature) for feature in features[1:]]
]

output = gr.Textbox(label="Disease Prediction")

demo = gr.Interface(fn=predict_disease, inputs=inputs, outputs=output, title="Disease Prediction Dashboard")
demo.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://b89598945c3410044b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


