In [1]:
# 1. Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# 2. Load the dataset
df = pd.read_csv('heart.csv')

# 3. Split features and target
X = df.drop('target', axis=1)
y = df['target']

# 4. Feature Scaling (important for logistic regression)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 5. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 6. Initialize and Train Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)

# 7. Make Predictions
y_pred = model.predict(X_test)

# 8. Evaluate the Model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

# 9. Print the Results
print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", report)


Accuracy: 0.7951219512195122
Confusion Matrix:
 [[73 29]
 [13 90]]
Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.72      0.78       102
           1       0.76      0.87      0.81       103

    accuracy                           0.80       205
   macro avg       0.80      0.79      0.79       205
weighted avg       0.80      0.80      0.79       205



In [2]:
# 1. Import required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# 2. Load the dataset
df = pd.read_csv('heart.csv')

# 3. Split features and target
X = df.drop('target', axis=1)
y = df['target']

# 4. Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 5. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 6. Initialize and train logistic regression model with balanced class weight
model = LogisticRegression(class_weight='balanced', max_iter=1000)
model.fit(X_train, y_train)

# 7. Predict on the test set
y_pred = model.predict(X_test)

# 8. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

# 9. Print results
print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", report)


Accuracy: 0.8
Confusion Matrix:
 [[75 27]
 [14 89]]
Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.74      0.79       102
           1       0.77      0.86      0.81       103

    accuracy                           0.80       205
   macro avg       0.80      0.80      0.80       205
weighted avg       0.80      0.80      0.80       205



In [3]:
from sklearn.model_selection import GridSearchCV

# Define range of C values
params = {'C': [0.01, 0.1, 1, 10, 100]}

# Grid search with balanced class weight
grid = GridSearchCV(LogisticRegression(class_weight='balanced', max_iter=1000), params, cv=5)
grid.fit(X_train, y_train)

# Use best model
best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)

# Evaluate
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

print("Best C value:", grid.best_params_)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Best C value: {'C': 10}
Accuracy: 0.8
Confusion Matrix:
 [[75 27]
 [14 89]]
Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.74      0.79       102
           1       0.77      0.86      0.81       103

    accuracy                           0.80       205
   macro avg       0.80      0.80      0.80       205
weighted avg       0.80      0.80      0.80       205



In [5]:
# 1. Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# 2. Load dataset
df = pd.read_csv('heart.csv')

# 3. Features and target
X = df.drop('target', axis=1)
y = df['target']

# 4. Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 5. Create polynomial interaction features
poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
X_poly = poly.fit_transform(X_scaled)

# 6. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.2, random_state=42)

# 7. GridSearchCV to tune regularization (C)
param_grid = {'C': [0.01, 0.1, 1, 10, 100]}
grid = GridSearchCV(LogisticRegression(class_weight='balanced', max_iter=1000), param_grid, cv=5)
grid.fit(X_train, y_train)

# 8. Predict using best model
best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)

# 9. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

# 10. Output
print("✅ Best C value:", grid.best_params_)
print("✅ Accuracy:", accuracy)
print("✅ Confusion Matrix:\n", conf_matrix)
print("✅ Classification Report:\n", report)


✅ Best C value: {'C': 100}
✅ Accuracy: 0.9853658536585366
✅ Confusion Matrix:
 [[102   0]
 [  3 100]]
✅ Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.99       102
           1       1.00      0.97      0.99       103

    accuracy                           0.99       205
   macro avg       0.99      0.99      0.99       205
weighted avg       0.99      0.99      0.99       205



In [6]:
pip install joblib






[notice] A new release of pip is available: 25.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
import joblib

# Save the trained model
joblib.dump(best_model, 'heart_disease_logistic_model.pkl')

# Optionally, save the scaler and polynomial transformer too
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(poly, 'poly_features.pkl')


['poly_features.pkl']

In [8]:
# Load the model
model = joblib.load('heart_disease_logistic_model.pkl')
scaler = joblib.load('scaler.pkl')
poly = joblib.load('poly_features.pkl')

# Example: predict on new data
new_data = [[63, 1, 3, 145, 233, 1, 0, 150, 0, 2.3, 0, 0, 1]]  # one row of raw input
scaled = scaler.transform(new_data)
poly_features = poly.transform(scaled)
prediction = model.predict(poly_features)
print("Prediction:", prediction)


Prediction: [1]


