<a href="https://colab.research.google.com/github/TechJas/Heart-Disease-Mini-Prediction-Model-/blob/main/heart_dataset_expo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# ==========================================
# 🫀 HEART DISEASE PREDICTION MODEL
# Author: Jasmine Banu M
# Works in Google Colab & VS Code
# ==========================================

import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
import joblib

# ==========================================
# 🗂️ 1. FILE UPLOAD (CSV or JSON)
# ==========================================

data_file = "medical_dataset_saved.pkl"
filename = None

# Check if saved data exists first
if os.path.exists(data_file):
    print("✅ Existing saved dataset found.")
    print("\n📂 Choose dataset option:")
    print("1️⃣  Upload a new dataset")
    print("2️⃣  Use existing dataset (previously uploaded and saved)")
    choice = input("Enter 1 or 2: ")

    if choice == "1":
        # Prompt for upload if user chooses to upload a new file
        try:
            from google.colab import files
            print("💡 Please upload your dataset (CSV or JSON)...")
            uploaded = files.upload()  # opens upload icon
            filename = next(iter(uploaded.keys()))
            print(f"📁 Uploaded file: {filename}")
        except:
            # For VS Code / local Python
            import tkinter as tk
            from tkinter import filedialog
            root = tk.Tk()
            root.withdraw()
            print("💡 Choose your file (CSV or JSON) from dialog window...")
            filename = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv"), ("JSON files", "*.json")])
            print(f"📁 Selected: {filename}")

        # Load and save the new file
        if filename:
            if filename.endswith(".csv"):
                df = pd.read_csv(filename)
            elif filename.endswith(".json"):
                df = pd.read_json(filename)
            else:
                raise ValueError("❌ Please upload a CSV or JSON file.")
            joblib.dump(df, data_file)
            print("💾 New dataset saved!")
        else:
             raise FileNotFoundError("❌ No valid dataset selected or found.")

    elif choice == "2":
        # Use existing dataset
        df = joblib.load(data_file)
    else:
        raise ValueError("❌ Invalid choice. Please enter 1 or 2.")

else:
    # If no saved data exists, always prompt for upload
    try:
        from google.colab import files
        print("💡 Please upload your dataset (CSV or JSON)...")
        uploaded = files.upload()  # opens upload icon
        filename = next(iter(uploaded.keys()))
        print(f"📁 Uploaded file: {filename}")
    except:
        # For VS Code / local Python
        import tkinter as tk
        from tkinter import filedialog
        root = tk.Tk()
        root.withdraw()
        print("💡 Choose your file (CSV or JSON) from dialog window...")
        filename = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv"), ("JSON files", "*.json")])
        print(f"📁 Selected: {filename}")

    if filename:
         # Load file based on type
        if filename.endswith(".csv"):
            df = pd.read_csv(filename)
        elif filename.endswith(".json"):
            df = pd.read_json(filename)
        else:
            raise ValueError("❌ Please upload a CSV or JSON file.")

        # Save dataset for reuse
        joblib.dump(df, data_file)
        print("💾 Dataset saved for next time!")
    else:
        raise FileNotFoundError("❌ No valid dataset selected or found.")


# ==========================================
# 🧩 2. DATA PREPARATION
# ==========================================
print("\n🧩 Preparing dataset...")

# Basic cleaning
df.columns = [col.strip().lower().replace(" ", "_") for col in df.columns]

# Encode target
if "result" in df.columns:
    le = LabelEncoder()
    df["result"] = le.fit_transform(df["result"])  # positive=1, negative=0
else:
    raise KeyError("⚠️ 'Result' column missing from dataset.")

# Define features
features = ['age', 'gender', 'heart_rate', 'systolic_blood_pressure',
            'diastolic_blood_pressure', 'blood_sugar', 'ck-mb', 'troponin']

# Handle feature name variations (e.g., ck_mb)
df.columns = [col.replace('-', '_') for col in df.columns]
features = [col.replace('-', '_') for col in features]


X = df[features]
y = df["result"]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ==========================================
# 🧮 3. TRAIN THE MODEL
# ==========================================
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)
acc = model.score(X_test, y_test)
print(f"✅ Model trained successfully! Accuracy: {acc*100:.2f}%")

# Save model & scaler
joblib.dump(model, "heart_model.pkl")
joblib.dump(scaler, "scaler.pkl")

# ==========================================
# 🧠 4. PREDICTION SECTION
# ==========================================
print("\n🩺 Enter new patient details for prediction:")

def get_input(prompt, dtype=float):
    user_input = input(f"{prompt}: ")
    if prompt == "Gender (1=Male, 0=Female)":
        if user_input.lower() == 'female':
            return 0
        elif user_input.lower() == 'male':
            return 1
        else:
            return int(user_input)
    else:
        return dtype(user_input)

age = get_input("Age (years)", int)
gender = get_input("Gender (1=Male, 0=Female)", int)
heart_rate = get_input("Heart rate (bpm)", float)
sbp = get_input("Systolic blood pressure (mmHg)", float)
dbp = get_input("Diastolic blood pressure (mmHg)", float)
sugar = get_input("Blood sugar (mg/dL)", float)
ckmb = get_input("CK-MB (ng/mL)", float)
troponin = get_input("Troponin (ng/mL)", float)

input_data = np.array([[age, gender, heart_rate, sbp, dbp, sugar, ckmb, troponin]])
scaler = joblib.load("scaler.pkl")
model = joblib.load("heart_model.pkl")

input_scaled = scaler.transform(input_data)
pred = model.predict(input_scaled)[0]
prob = model.predict_proba(input_scaled)[0][1]

result_text = "🩸 Positive (Heart Disease Detected)" if pred == 1 else "💚 Negative (No Heart Disease)"
print("\n🔍 Prediction Result:", result_text)
print(f"📊 Confidence: {prob*100:.2f}%")

# ==========================================
# END
# ==========================================

✅ Existing saved dataset found.

📂 Choose dataset option:
1️⃣  Upload a new dataset
2️⃣  Use existing dataset (previously uploaded and saved)
Enter 1 or 2: 2

🧩 Preparing dataset...
✅ Model trained successfully! Accuracy: 98.11%

🩺 Enter new patient details for prediction:
Age (years): 25
Gender (1=Male, 0=Female): 0
Heart rate (bpm): 85
Systolic blood pressure (mmHg): 120
Diastolic blood pressure (mmHg): 80
Blood sugar (mg/dL): 85
CK-MB (ng/mL): 0.2
Troponin (ng/mL): 1.8

🔍 Prediction Result: 🩸 Positive (Heart Disease Detected)
📊 Confidence: 88.50%


