# Notebook - Heart Disease LR Analysis

**Escuela Colombiana de Ingeniería Julio Garavito**  
**Student:** Santiago Botero García

## Prerequisites

In [None]:
%pip install kagglehub[pandas-datasets] numpy matplotlib qiskit

Collecting matplotlib
  Downloading matplotlib-3.10.8-cp311-cp311-win_amd64.whl.metadata (52 kB)
Collecting qiskit
  Downloading qiskit-2.3.0-cp310-abi3-win_amd64.whl.metadata (13 kB)
Collecting kagglehub[pandas-datasets]
  Using cached kagglehub-0.4.1-py3-none-any.whl.metadata (38 kB)
Collecting kagglesdk<1.0,>=0.1.14 (from kagglehub[pandas-datasets])
  Using cached kagglesdk-0.1.15-py3-none-any.whl.metadata (13 kB)
Collecting pyyaml (from kagglehub[pandas-datasets])
  Using cached pyyaml-6.0.3-cp311-cp311-win_amd64.whl.metadata (2.4 kB)
Collecting requests (from kagglehub[pandas-datasets])
  Using cached requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting tqdm (from kagglehub[pandas-datasets])
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting pandas (from kagglehub[pandas-datasets])
  Downloading pandas-3.0.0-cp311-cp311-win_amd64.whl.metadata (19 kB)
Collecting protobuf (from kagglesdk<1.0,>=0.1.14->kagglehub[pandas-datasets])
  Using cached protobuf-

## 1. Load and Prepare the Dataset

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("neurocipher/heartdisease")

print("Path to dataset files:", path)

## 2. Implement Basic Logistic Regression

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from abc import ABC, abstractmethod


class Regression(ABC):
    @abstractmethod
    def predict(x, w, b):
        pass

class LinearRegression(Regression):
    @staticmethod
    def predict(x, w, b):
        return w * x + b

class PolynomialRegression(Regression):
    @staticmethod
    def predict(x, w, b):
        return x @ w + b

class CostFunction:
    @staticmethod
    def compute_cost(w, b, X, y):
        m, n = X.shape

        # For each example i: f_{w,b}^{(i)}(x^{(i)}) = sigmoid(w^T x^{(i)} + b)
        z = X @ w + b
        f = ActivationFunction.sigmoid(z)

        # To avoid log(0), clip probabilities
        eps = 1e-8
        f_clipped = np.clip(f, eps, 1 - eps)

        J = -(1 / m) * np.sum(
            y * np.log(f_clipped) + (1 - y) * np.log(1 - f_clipped)
        )
        return J

class ActivationFunction:
    @staticmethod
    def sigmoid(z):
        return 1 / (1 + np.exp(-z))

class Gradient:
    @staticmethod
    def gradient_descent(X, y, w_init, b_init, alpha, num_iters, print_every=0):
        w = w_init.copy()
        b = b_init
        j_history = []

        for i in range(num_iters):
            dj_dw, dj_db = Gradient.compute_gradient(w, b, X, y)
            w = w - alpha * dj_dw
            b = b - alpha * dj_db
            J = CostFunction.compute_cost(w, b, X, y)
            j_history.append(J)

            if print_every > 0 and (i % print_every == 0 or i == num_iters - 1):
                print(f"Iteration {i:4d}: J(w, b) = {J:.4f}")

        return w, b, j_history

    @staticmethod    
    def compute_gradient(w, b, X, y):
        m, n = X.shape

        # f_{w,b}^{(i)}(x^{(i)})
        z = X @ w + b
        f = ActivationFunction.sigmoid(z)

        error = f - y  # e^{(i)} = f_{w,b}^{(i)}(x^{(i)}) - y^{(i)}

        dj_dw = (1 / m) * (X.T @ error)
        dj_db = (1 / m) * np.sum(error)

        return dj_dw, dj_db


## 3 Visualize Decision Boundaries

## 4. Repeat with Regularization