Step 1 : Data collection and loading

In [7]:
import pandas as pd
df = pd.read_csv("C:/Users/User/Downloads/heart.csv")
print(" Dataset loaded successfully!")
print(df.head())


 Dataset loaded successfully!
   Age Sex ChestPainType  RestingBP  Cholesterol  FastingBS RestingECG  MaxHR  \
0   40   M           ATA        140          289          0     Normal    172   
1   49   F           NAP        160          180          0     Normal    156   
2   37   M           ATA        130          283          0         ST     98   
3   48   F           ASY        138          214          0     Normal    108   
4   54   M           NAP        150          195          0     Normal    122   

  ExerciseAngina  Oldpeak ST_Slope  HeartDisease  
0              N      0.0       Up             0  
1              N      1.0     Flat             1  
2              N      0.0       Up             0  
3              Y      1.5     Flat             1  
4              N      0.0       Up             0  


Step 2 : Data exploration

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
import pickle
import sklearn

print("Using scikit-learn version:", sklearn.__version__)  

df = pd.read_csv("C:/Users/User/Downloads/heart.csv")
categorical_cols = ["Sex", "ChestPainType", "RestingECG", "ExerciseAngina", "ST_Slope"]

le = LabelEncoder()
for col in categorical_cols:
    df[col] = le.fit_transform(df[col])

X = df.drop("HeartDisease", axis=1)
y = df["HeartDisease"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

with open("heart_model.pkl", "wb") as f:
    pickle.dump(model, f)

print("Model saved with scikit-learn version:", sklearn.__version__)


Using scikit-learn version: 1.5.1
Model saved with scikit-learn version: 1.5.1


Step 3 : Model training

In [27]:
from sklearn.tree import DecisionTreeClassifier
import pickle
model = DecisionTreeClassifier()
model.fit(X_encoded, y)
with open("heart_model.pkl", "wb") as f:
    pickle.dump(model, f)

print("✅ Model trained and saved as heart_model.pkl")


✅ Model trained and saved as heart_model.pkl


Step 4 : Model Training and Serialization with Label Encoding

In [25]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pickle
import sklearn

print("Saving with version:", sklearn.__version__)  

df = pd.read_csv("C:/Users/User/Downloads/heart.csv")
cat_cols = ["Sex", "ChestPainType", "RestingECG", "ExerciseAngina", "ST_Slope"]
le = LabelEncoder()
for col in cat_cols:
    df[col] = le.fit_transform(df[col])

X = df.drop("HeartDisease", axis=1)
y = df["HeartDisease"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = DecisionTreeClassifier()
model.fit(X_train, y_train)

with open("C:/Users/User/Documents/heart_disease_app/heart_model.pkl", "wb") as f:
    pickle.dump(model, f)

print("Model saved with scikit-learn version:", sklearn.__version__)


Saving with version: 1.5.1
Model saved with scikit-learn version: 1.5.1


Step 5 : Training and Exporting the Model Using One-Hot Encoding

In [22]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import pickle
df = pd.read_csv("C:/Users/User/Downloads/heart.csv")
X = df.drop("HeartDisease", axis=1)
y = df["HeartDisease"]
X_encoded = pd.get_dummies(X)
pd.Series(X_encoded.columns).to_csv("model_columns.csv", index=False, header=False)
print("Saved model feature columns to 'model_columns.csv'")

model = DecisionTreeClassifier()
model.fit(X_encoded, y)
with open("heart_model.pkl", "wb") as f:
    pickle.dump(model, f)

print("Model trained and saved as heart_model.pkl")


Saved model feature columns to 'model_columns.csv'
Model trained and saved as heart_model.pkl


In [18]:
pd.Series(X_encoded.columns).to_csv("model_columns.csv", index=False, header=False)


In [23]:
encoded_columns = pd.Series(X_encoded.columns)
print("Encoded feature columns used for model training:\n")
print(encoded_columns.to_string(index=False))
encoded_columns.to_csv("model_columns.csv", index=False, header=False)
print("\nSaved model feature columns to 'model_columns.csv'")


Encoded feature columns used for model training:

              Age
        RestingBP
      Cholesterol
        FastingBS
            MaxHR
          Oldpeak
            Sex_F
            Sex_M
ChestPainType_ASY
ChestPainType_ATA
ChestPainType_NAP
 ChestPainType_TA
   RestingECG_LVH
RestingECG_Normal
    RestingECG_ST
 ExerciseAngina_N
 ExerciseAngina_Y
    ST_Slope_Down
    ST_Slope_Flat
      ST_Slope_Up

Saved model feature columns to 'model_columns.csv'


Step 6 : Flask app (backend)

In [24]:
from flask import Flask, render_template, request
import pickle
import pandas as pd

app = Flask(__name__)

# Load trained model
with open("heart_model.pkl", "rb") as f:
    model = pickle.load(f)

@app.route("/")
def home():
    return render_template("index.html")

@app.route("/predict", methods=["POST"])
def predict():
    input_data = {
        'Age': int(request.form['Age']),
        'Sex': request.form['Sex'],
        'ChestPainType': request.form['ChestPainType'],
        'RestingBP': int(request.form['RestingBP']),
        'Cholesterol': int(request.form['Cholesterol']),
        'FastingBS': int(request.form['FastingBS']),
        'RestingECG': request.form['RestingECG'],
        'MaxHR': int(request.form['MaxHR']),
        'ExerciseAngina': request.form['ExerciseAngina'],
        'Oldpeak': float(request.form['Oldpeak']),
        'ST_Slope': request.form['ST_Slope']
    }

    input_df = pd.DataFrame([input_data])
    input_encoded = pd.get_dummies(input_df)

    model_columns = pd.read_csv("model_columns.csv", header=None).squeeze().tolist()
    for col in model_columns:
        if col not in input_encoded.columns:
            input_encoded[col] = 0
    input_encoded = input_encoded[model_columns]

    prediction = model.predict(input_encoded)[0]
    result = "Positive for Heart Disease" if prediction == 1 else "Negative for Heart Disease"
    return render_template("index.html", prediction_text=f"Prediction: {result}")

if __name__ == "__main__":
    app.run(debug=True)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with watchdog (windowsapi)


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
