In [None]:
import pandas as pd

diabetes_df = pd.read_csv("/content/Diabetes Disease Analysis.csv")
heart_df = pd.read_csv("/content/Heart Disease Analysis.csv")
liver_df = pd.read_csv("/content/Liver Disease Analysis.csv")

In [None]:
liver_df["Gender"] = liver_df["Gender"].map({"Male": 1, "Female": 0})

In [None]:
heart_df.rename(columns={"age": "Age", "sex": "Gender", "target": "Heart_Disease"}, inplace=True)
diabetes_df.rename(columns={"Outcome": "Diabetes"}, inplace=True)
liver_df.rename(columns={"Dataset": "Liver_Disease"}, inplace=True)

In [None]:
liver_df["Liver_Disease"] = liver_df["Liver_Disease"].apply(lambda x: 1 if x == 1 else 0)

In [None]:
diabetes_df["Gender"] = -1

diabetes_df = diabetes_df[['Age', 'Gender'] + [col for col in diabetes_df.columns if col not in ['Age', 'Gender']]]

In [None]:
merged_df = pd.merge(diabetes_df, heart_df, on=["Age", "Gender"], how="outer")
merged_df = pd.merge(merged_df, liver_df, on=["Age", "Gender"], how="outer")

In [None]:
merged_df.fillna(-1, inplace=True)

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
columns_to_normalize = [col for col in merged_df.columns if col not in ["Age", "Gender", "Diabetes", "Heart_Disease", "Liver_Disease"]]
merged_df[columns_to_normalize] = scaler.fit_transform(merged_df[columns_to_normalize])

In [None]:
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

df = merged_df

target_columns = ["Diabetes", "Heart_Disease", "Liver_Disease"]

X = df.drop(columns=["Diabetes", "Heart_Disease", "Liver_Disease"])
Y = df[target_columns]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

models = {}
for disease in target_columns:
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, Y_train[disease])
    models[disease] = model
    joblib.dump(model, f"{disease}_model.pkl")

for disease, model in models.items():
    Y_pred = model.predict(X_test)
    acc = accuracy_score(Y_test[disease], Y_pred)
    print(f"Accuracy for {disease}: {acc:.2f}")

Accuracy for Diabetes: 0.97
Accuracy for Heart_Disease: 1.00
Accuracy for Liver_Disease: 0.99


In [None]:
import joblib

# Save feature names
joblib.dump(X.columns.tolist(), "feature_columns.pkl")


['feature_columns.pkl']

In [None]:
import pandas as pd

def predict_diseases(patient_data):

    try:
        feature_columns = [col for col in X.columns if col not in target_columns]  # Exclude target columns
        if len(patient_data) != len(feature_columns):
            return {"error": f"Expected {len(feature_columns)} features, but got {len(patient_data)}"}

        patient_df = pd.DataFrame([patient_data], columns=feature_columns)
        predictions = {disease: models[disease].predict(patient_df)[0] for disease in target_columns}
        return predictions
    except Exception as e:
        return {"error": str(e)}

# Example usage (Replace values with actual patient data)
new_patient = [30, 1, 5, 120, 70, 30, 120, 25, 0.5, 0.6, 190, 1, 2.5, 1, 250, 40, 30, 6, 4, 1.5, 0.8, 90, 3, 0, 130, 7.2, 5, 2.8]  # Example input with 28 features
predicted_diseases = predict_diseases(new_patient)
print("\nPredicted Diseases for New Patient:", predicted_diseases)



Predicted Diseases for New Patient: {'Diabetes': 1.0, 'Heart_Disease': 0.0, 'Liver_Disease': 1.0}
