In [1]:
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv("dataset.csv")

# Split features and target
X = df.drop("target", axis=1)
y = df["target"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Scaling (CRITICAL)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)

# Evaluate
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy:", accuracy)

# Save model + scaler
joblib.dump((model, scaler), "heart_models.pkl")

print("Model and scaler saved successfully")


Model Accuracy: 0.8032786885245902
Model and scaler saved successfully


In [3]:
import pandas as pd

df = pd.read_csv("dataset.csv")
print(df["target"].value_counts())
print(df.groupby("target").mean()[["age", "chol", "thalach"]])



target
1    165
0    138
Name: count, dtype: int64
              age        chol     thalach
target                                   
0       56.601449  251.086957  139.101449
1       52.496970  242.230303  158.466667
