In [None]:
# Task 3: Predict if a person is at risk of heart disease
# (We’re using breast cancer dataset as a similar example)

import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score

# Step 1: Load the built-in dataset
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target  # 1 = disease, 0 = no disease

# Step 2: Split the data into training and testing
X = df.drop('target', axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Train a Logistic Regression model
model = LogisticRegression(max_iter=10000)
model.fit(X_train, y_train)

# Step 4: Make predictions and evaluate the model
y_pred = model.predict(X_test)

# Accuracy: how many correct?
print("Accuracy:", accuracy_score(y_test, y_pred))

# Confusion matrix: breakdown of predictions
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# ROC AUC Score: how good is our model at distinguishing?
print("ROC AUC Score:", roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]))
