In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib  # model save karne ke liye

In [6]:
df = pd.read_csv("processed_heart.csv")
print(df)
df.head(3)

           age  sex  chest_pain_type  resting_blood_pressure  cholestoral  \
0    -0.268437    1                3               -0.377636    -0.659332   
1    -0.158157    1                3                0.479107    -0.833861   
2     1.716595    1                3                0.764688    -1.396233   
3     0.724079    1                3                0.936037    -0.833861   
4     0.834359    0                3                0.364875     0.930822   
...        ...  ...              ...                     ...          ...   
1020  0.503520    1                1                0.479107    -0.484803   
1021  0.613800    1                3               -0.377636     0.232705   
1022 -0.819834    1                3               -1.234378     0.562371   
1023 -0.488996    0                3               -1.234378     0.155137   
1024 -0.047877    1                3               -0.663216    -1.124743   

      fasting_blood_sugar  rest_ecg  Max_heart_rate  exercise_induced_angin

Unnamed: 0,age,sex,chest_pain_type,resting_blood_pressure,cholestoral,fasting_blood_sugar,rest_ecg,Max_heart_rate,exercise_induced_angina,oldpeak,slope,vessels_colored_by_flourosopy,thalassemia,target
0,-0.268437,1,3,-0.377636,-0.659332,1,2,0.821321,0,-0.060888,0,3,3,0
1,-0.158157,1,3,0.479107,-0.833861,0,1,0.255968,1,1.727137,2,4,3,0
2,1.716595,1,3,0.764688,-1.396233,1,2,-1.048692,1,1.301417,2,4,3,0


# Features & Target



In [7]:
X = df.drop("target", axis=1)  # all columns except target
y = df["target"]               # target column

# Train/Test Split


In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


# Model Training


In [9]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions


In [10]:
y_pred = model.predict(X_test)


# Evaluation:

In [11]:
acc = accuracy_score(y_test, y_pred)
print(f" Accuracy: {acc*100:.2f}%\n")
print(" Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\n Classification Report:")
print(classification_report(y_test, y_pred))

 Accuracy: 100.00%

 Confusion Matrix:
[[100   0]
 [  0 105]]

 Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       100
           1       1.00      1.00      1.00       105

    accuracy                           1.00       205
   macro avg       1.00      1.00      1.00       205
weighted avg       1.00      1.00      1.00       205



# Save trained model


In [12]:
joblib.dump(model, "heart_disease_model.pkl")
print("\n Model saved as 'heart_disease_model.pkl'")


 Model saved as 'heart_disease_model.pkl'


# MLOps Pipeline for Heart Disease Prediction:
# Baseline Performance Logging

In [13]:
report_dict = classification_report(y_test, y_pred, output_dict=True)

baseline_metrics = {
    "accuracy": [acc],
    "precision_0": [report_dict['0']['precision']],
    "recall_0": [report_dict['0']['recall']],
    "f1_0": [report_dict['0']['f1-score']],
    "precision_1": [report_dict['1']['precision']],
    "recall_1": [report_dict['1']['recall']],
    "f1_1": [report_dict['1']['f1-score']]
}

baseline_df = pd.DataFrame(baseline_metrics)
baseline_df.to_csv("baseline_model_performance.csv", index=False)
print("Baseline performance saved as 'baseline_model_performance.csv'")

Baseline performance saved as 'baseline_model_performance.csv'


In [14]:
import joblib

# Load trained model
model = joblib.load(r"C:\Users\Zain\Research-Paper-project\heart_disease_model.pkl")

# Test prediction example
import pandas as pd
df = pd.read_csv(r"C:\Users\Zain\Research-Paper-project\processed_heart.csv")
sample = df.drop("target", axis=1).iloc[[0]]

prediction = model.predict(sample)
print("Predicted target:", prediction[0])

Predicted target: 0


In [1]:
import pandas as pd
data = pd.read_csv("processed_heart.csv")
print(list(data.columns))

['age', 'sex', 'chest_pain_type', 'resting_blood_pressure', 'cholestoral', 'fasting_blood_sugar', 'rest_ecg', 'Max_heart_rate', 'exercise_induced_angina', 'oldpeak', 'slope', 'vessels_colored_by_flourosopy', 'thalassemia', 'target']
