# Task 3 - Heart Disease Prediction

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, roc_curve, classification_report


sns.set(style='whitegrid')

In [None]:
from google.colab import files
uploaded = files.upload()


heart = pd.read_csv(list(uploaded.keys())[0])

In [None]:
print('\nShape:', heart.shape)
print('\nColumns:', list(heart.columns))
print('\nHead:\n', heart.head())
print('\nInfo:')
print(heart.info())
print('\nDescribe:\n', heart.describe())


# Check missing values
print('\nMissing values per column:')
print(heart.isnull().sum())

In [None]:
numeric_cols = heart.select_dtypes(include=[np.number]).columns.tolist()


# Pairplot
sns.pairplot(heart, hue='target', diag_kind='hist')
plt.show()


# Correlation heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(heart.corr(), annot=False, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()


# Boxplots
plt.figure(figsize=(14, 10))
for i, col in enumerate(numeric_cols[:9], 1):
    plt.subplot(3, 3, i)
    sns.boxplot(data=heart, y=col, x='target')
    plt.title(f'{col} vs Target')
    plt.tight_layout()
plt.show()

In [None]:
X = heart.drop('target', axis=1)
y = heart['target']


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
# logoistic regresion

log_reg = LogisticRegression(max_iter=200)
log_reg.fit(X_train, y_train)
log_preds = log_reg.predict(X_test)


print('\nLogistic Regression Accuracy:', accuracy_score(y_test, log_preds))
print('\nClassification Report:\n', classification_report(y_test, log_preds))


# Confusion Matrix
cm = confusion_matrix(y_test, log_preds)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix - Logistic Regression')
plt.show()


# ROC Curve
log_probs = log_reg.predict_proba(X_test)[:, 1]
fpr, tpr, _ = roc_curve(y_test, log_probs)
plt.plot(fpr, tpr)
plt.plot([0, 1], [0, 1], linestyle='--')
plt.title('ROC Curve - Logistic Regression')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.show()
print('ROC AUC Score:', roc_auc_score(y_test, log_probs))



#  Decision Tree Model

tree = DecisionTreeClassifier(max_depth=5, random_state=42)
tree.fit(X_train, y_train)
tree_preds = tree.predict(X_test)


print('\nDecision Tree Accuracy:', accuracy_score(y_test, tree_preds))
print('\nClassification Report:\n', classification_report(y_test, tree_preds))


# Confusion Matrix
cm_tree = confusion_matrix(y_test, tree_preds)
sns.heatmap(cm_tree, annot=True, fmt='d', cmap='Greens')
plt.title('Confusion Matrix - Decision Tree')
plt.show()


# ROC Curve
tree_probs = tree.predict_proba(X_test)[:, 1]
fpr2, tpr2, _ = roc_curve(y_test, tree_probs)
plt.plot(fpr2, tpr2)
plt.plot([0, 1], [0, 1], linestyle='--')
plt.title('ROC Curve - Decision Tree')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.show()
print('ROC AUC Score:', roc_auc_score(y_test, tree_probs))

In [None]:
plt.figure(figsize=(10, 6))
importance = tree.feature_importances_
plt.barh(heart.drop('target', axis=1).columns, importance)
plt.title('Feature Importance (Decision Tree)')
plt.show()