In [None]:
import pandas as pd; print("Ready!")

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')


plt.style.use('seaborn-v0_8')
%matplotlib inline

In [None]:

cols = ['age','sex','cp','trestbps','chol','fbs','restecg',
        'thalach','exang','oldpeak','slope','ca','thal','target']

df = pd.read_csv('../data/heart.csv', header=None, names=cols)

print("Shape:", df.shape)
df.head()

#this commandes are for getting the information of the data and assigning the names to the columns 

In [None]:
df.info()


#this commandes are for visualizing the data and getting a better understanding of the data

In [None]:
df = df.replace('?', np.nan)
df = df.apply(pd.to_numeric)

print("Missing values:")
print(df.isnull().sum())

#this commandes are for filling the missing values with the median and the mode

In [None]:
# Target distribution
plt.figure(figsize=(12,5))

plt.subplot(1,2,1)
sns.countplot(data=df, x='target', palette='Set2')
plt.title('Heart Disease Distribution')
plt.xticks([0,1], ['No Disease', 'Disease'])

# Age vs Disease
plt.subplot(1,2,2)
sns.histplot(data=df, x='age', hue='target', multiple='stack', bins=20, palette='Set1')
plt.title('Age Distribution by Disease')

plt.tight_layout()
plt.show()

# Correlation heatmap
plt.figure(figsize=(12,8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Feature Correlation Matrix')
plt.show()

In [None]:
from sklearn.model_selection import train_test_split

X = df.drop('target', axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Train size: {X_train.shape[0]}, Test size: {X_test.shape[0]}")

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Train the model
logreg = LogisticRegression(max_iter=1000, random_state=42)
logreg.fit(X_train, y_train)

# Predict on test set
y_pred = logreg.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.3f} ({accuracy*100:.1f}%)")

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# Full report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['No Disease', 'Disease']))

In [None]:
import joblib
import os

# Create models folder if not exist
os.makedirs('../models', exist_ok=True)

# Save model
joblib.dump(logreg, '../models/logistic_regression_heart.pkl')
print("Model saved to models/logistic_regression_heart.pkl")

In [None]:

example_patient = X_test.iloc[0:1]  
prediction = logreg.predict(example_patient)[0]
probability = logreg.predict_proba(example_patient)[0]

print("Example patient features:")
print(example_patient)
print(f"\nPrediction: {'Heart Disease' if prediction==1 else 'No Disease'}")
print(f"Probability of Disease: {probability[1]:.2f}")