In [2]:
import pandas as pd
import numpy as np
import seaborn as sbn
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/sharmaroshan/Heart-UCI-Dataset/master/heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [None]:
# Select feature(s) and target
# Here we are using only 'age' as input feature
x = df[["age"]]# Features (independent variable)
y = df["target"]# Target (dependent variable, 0 = No Disease, 1 = Disease)

# Split dataset into training and testing sets
# 80% training, 20% testing, random_state for reproducibility
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
# Scale the feature(s)
# StandardScaler standardizes the feature by removing the mean and scaling to unit variance
scaler = StandardScaler()

x_train_scaled = scaler.fit_transform(x_train) # Fit on train and transform
x_test_scaled = scaler.transform(x_test)# Transform test using same scaler

# Create and train Logistic Regression model
# max_iter=1100 ensures convergence even if dataset is small
model = LogisticRegression(max_iter=1100)
model.fit(x_train_scaled, y_train)

# Predict target values for test set
y_pred = model.predict(x_test_scaled)

# Step 8: Evaluate model performance using classification report
# This gives Precision, Recall, F1-score and Support for each class
print("Classification Report: \n",classification_report(y_test, y_pred))

Classification Report: 
               precision    recall  f1-score   support

           0       0.55      0.38      0.45        29
           1       0.56      0.72      0.63        32

    accuracy                           0.56        61
   macro avg       0.56      0.55      0.54        61
weighted avg       0.56      0.56      0.54        61



In [None]:
"""
Use-case of False Positives (FP) and False Negatives (FN):

1. Healthcare (e.g., Disease Detection):
   - False Positive (FP):
       * Model predicts "Disease" when actually there is "No Disease".
       * Consequence: Patient undergoes unnecessary stress, further tests, and treatments.
       * Example: Healthy patient diagnosed as having cancer.
   - False Negative (FN):
       * Model predicts "No Disease" when actually there is "Disease".
       * Consequence: Patient is sent home without treatment, disease progresses undetected.
       * Example: A cancer patient classified as healthy → delayed treatment, life-threatening.
   - In healthcare, **both FP and FN are dangerous**:
       * FP → harms through unnecessary procedures and costs.
       * FN → harms through missed diagnosis, sometimes fatal.

2. Email Spam Detection:
   - False Positive (FP):
       * Model predicts "Spam" when the email is actually "Not Spam".
       * Consequence: Important emails (job offers, client mails) go to spam folder.
       * Annoying but recoverable, since users can check spam folder.
   - False Negative (FN):
       * Model predicts "Not Spam" when the email is actually "Spam".
       * Consequence: Malicious/phishing/spam emails land in inbox.
       * Dangerous because users may click harmful links → hacking, fraud, malware.
   - In email filtering, **False Negatives are more dangerous**:
       * FN directly exposes the user to threats.
       * FP is just an inconvenience.

Summary:
- Healthcare → FP and FN both are critical (affect patient’s life and safety).
- Email Spam Filtering → FN is most dangerous (lets spam bypass filters).
"""