In [43]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import tkinter as tk
from tkinter import messagebox
from fpdf import FPDF

In [44]:
data = pd.read_csv(r'C:\Users\Jashwanth\OneDrive\Documents\MEDPREDICT\Finalised Data\Medpredict_Dataset_Cleaned.csv')
data.head()

Unnamed: 0,Gender,Hemoglobin,MCH,MCHC,MCV,Anemia,Age,BMI,HighChol,CholCheck,...,Stroke,HighBP,Diabetes,id,male,weight,height,case.id,futime,Fatty Liver
0,1,14.9,22.7,29.1,83.7,0.0,40,25.0,0.525703,0.975259,...,0.062171,0.563458,0.5,8784.215967,0.46732,86.35335,169.434949,8840.924478,2410.600547,0.077725
1,1,14.7,22.0,28.2,99.5,0.0,40,25.0,0.525703,0.975259,...,0.062171,0.563458,0.5,8784.215967,0.46732,86.35335,169.434949,8840.924478,2410.600547,0.077725
2,1,12.7,19.5,28.9,82.9,1.0,40,25.0,0.525703,0.975259,...,0.062171,0.563458,0.5,8784.215967,0.46732,86.35335,169.434949,8840.924478,2410.600547,0.077725
3,1,12.7,28.5,28.2,92.3,1.0,40,25.0,0.525703,0.975259,...,0.062171,0.563458,0.5,8784.215967,0.46732,86.35335,169.434949,8840.924478,2410.600547,0.077725
4,1,14.9,25.8,31.3,82.9,0.0,40,25.0,0.525703,0.975259,...,0.062171,0.563458,0.5,8784.215967,0.46732,86.35335,169.434949,8840.924478,2410.600547,0.077725


In [45]:
targets = ['HeartDiseaseorAttack', 'Stroke', 'Diabetes', 'HighBP', 'Fatty Liver', 'Anemia']
features = ['Age', 'Gender', 'BMI', 'Hemoglobin', 'CholCheck', 'Smoker', 'PhysActivity', 'Diabetes', 'HighBP', 'Fatty Liver']

In [46]:
doc_mapping = {
    'HeartDiseaseorAttack': 'Cardiologist',
    'Stroke': 'Neurologist',
    'Diabetes': 'Endocrinologist',
    'HighBP': 'Cardiologist / General Physician',
    'Fatty Liver': 'Gastroenterologist / Hepatologist',
    'Anemia': 'Hematologist / General Physician'
}

In [47]:
X = data[features]
y = data[targets].applymap(lambda x: 1 if x >= 0.5 else 0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

  y = data[targets].applymap(lambda x: 1 if x >= 0.5 else 0)


In [48]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [49]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [50]:
def predict_disease():
    try:
        input_data = [
            float(age_entry.get()), int(gender_var.get()), float(bmi_entry.get()),
            float(hemoglobin_entry.get()), int(chol_check_var.get()), int(smoker_var.get()),
            int(phys_activity_var.get()), int(diabetes_var.get()), int(highbp_var.get()),
            int(fatty_liver_var.get())
        ]
        input_data = np.array(input_data).reshape(1, -1)
        input_data = scaler.transform(input_data)
        prediction = model.predict(input_data)

        risk_levels = []
        specialists = []
        results = []
        for i, disease in enumerate(targets):
            if prediction[0][i]:
                risk_levels.append(f"{disease}: HIGH RISK")
                specialists.append(f"{disease}: {doc_mapping[disease]}")
                results.append(1)
            else:
                risk_levels.append(f"{disease}: LOW RISK")
                results.append(0)
        
        plt.figure(figsize=(8, 5))
        plt.plot(targets, results, marker='o', linestyle='-', color='b')
        plt.title("Disease Prediction Graph")
        plt.xlabel("Diseases")
        plt.ylabel("Prediction (0 = No, 1 = Yes)")
        plt.ylim(-0.2, 1.2)
        plt.grid()
        plt.show()

        messagebox.showinfo("Prediction Result", "\n".join(risk_levels + specialists))
        generate_pdf(risk_levels, specialists)
    except Exception as e:
        messagebox.showerror("Error", f"Invalid Input: {e}")

In [51]:
def generate_pdf(risks, doctors):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.cell(200, 10, txt="MedPredict - Disease Prediction Report", ln=True, align='C')
    pdf.ln(10)
    for risk in risks:
        pdf.cell(200, 10, txt=risk, ln=True)
    pdf.ln(5)
    pdf.cell(200, 10, txt="Recommended Specialists:", ln=True)
    for doc in doctors:
        pdf.cell(200, 10, txt=doc, ln=True)
    pdf.output("Prediction_Report.pdf")
    messagebox.showinfo("PDF Generated", "Prediction report saved as 'Prediction_Report.pdf'")

In [52]:
root = tk.Tk()
root.title("MedPredict - Disease Prediction")
root.geometry("600x700")
root.configure(bg="#f0f0f0")

header_label = tk.Label(root, text="Enter Patient Details", font=("Arial", 16, "bold"), bg="#f0f0f0")
header_label.pack(pady=10)

frame = tk.Frame(root, bg="#f0f0f0")
frame.pack(pady=10)

In [53]:
labels = ["Age", "Gender (1=Male, 0=Female)", "BMI", "Hemoglobin", "Cholesterol Check (1=Yes, 0=No)",
          "Smoker (1=Yes, 0=No)", "Physical Activity (1=Yes, 0=No)", "Diabetes (1=Yes, 0=No)",
          "High Blood Pressure (1=Yes, 0=No)", "Fatty Liver (1=Yes, 0=No)"]

In [54]:
variables = []
for label in labels:
    row = tk.Frame(frame, bg="#f0f0f0")
    row.pack(fill="x", pady=5)
    tk.Label(row, text=label+":", width=30, anchor="w", bg="#f0f0f0").pack(side="left")
    var = tk.StringVar(value="0")
    tk.Entry(row, textvariable=var, width=10).pack(side="right")
    variables.append(var)

In [55]:
age_entry, gender_var, bmi_entry, hemoglobin_entry, chol_check_var, smoker_var, phys_activity_var, diabetes_var, highbp_var, fatty_liver_var = variables

predict_button = tk.Button(root, text="Predict Disease", command=predict_disease, font=("Arial", 12, "bold"), bg="#4CAF50", fg="white", padx=10, pady=5)
predict_button.pack(pady=15)

result_text = tk.StringVar()
result_label = tk.Label(root, textvariable=result_text, font=("Arial", 12), bg="#f0f0f0", wraplength=550, justify="left")
result_label.pack(pady=10)

In [57]:
root.mainloop()