In [3]:
import pandas as pd

def classify_blood_pressure(bp):
    """
    Classify Blood Pressure:
      < 120   = Normal
      120-129 = Elevated
      130-139 = High
      >= 140  = Very High
    """
    if bp < 120:
        return "Normal"
    elif bp <= 129:
        return "Elevated"
    elif bp <= 139:
        return "High"
    else:
        return "Very High"

def classify_cholesterol(chol):
    """
    Classify Cholesterol:
      < 200   = Normal
      200-239 = Elevated
      >= 240  = High
    """
    if chol < 200:
        return "Normal"
    elif chol <= 239:
        return "Elevated"
    else:
        return "High"

def classify_bmi(bmi):
    """
    Classify BMI:
      < 18.5    = Underweight
      18.5-24.9 = Normal
      25-29.9   = Overweight
      >= 30     = Obesity
    """
    if bmi < 18.5:
        return "Underweight"
    elif bmi <= 24.9:
        return "Normal"
    elif bmi <= 29.9:
        return "Overweight"
    else:
        return "Obesity"

def classify_sleep_hours(age, hours):
    """
    Classify Sleep Hours based on age-specific recommended ranges:
    
      Age   | Recommended Range
      1-2   | 11 to 14
      3-5   | 10 to 13
      6-13  |  9 to 11
      14-17 |  8 to 10
      18-25 |  7 to 9
      26-64 |  7 to 9
      65+   |  7 to 8
    
    Final Classification: Low / Normal / High
    """
    if 1 <= age <= 2:
        min_hr, max_hr = 11, 14
    elif 3 <= age <= 5:
        min_hr, max_hr = 10, 13
    elif 6 <= age <= 13:
        min_hr, max_hr = 9, 11
    elif 14 <= age <= 17:
        min_hr, max_hr = 8, 10
    elif 18 <= age <= 64:
        min_hr, max_hr = 7, 9
    else:  # age >= 65
        min_hr, max_hr = 7, 8

    if hours < min_hr:
        return "Low"
    elif hours > max_hr:
        return "High"
    else:
        return "Normal"

def classify_triglyceride(tg):
    """
    Classify Triglyceride:
      < 150 = Normal
      150 - 199 = Elevated
      200 - 499 = High
    """
    if tg < 150:
        return "Normal"
    elif tg <= 199:
        return "Elevated"
    else:
        return "High"

def classify_fasting_sugar(fs):
    """
    Classify Fasting Blood Sugar:
      < 100   = Normal
      100-125 = Elevated
      >= 126  = High
    """
    if fs < 100:
        return "Normal"
    elif fs <= 125:
        return "Elevated"
    else:
        return "High"

def classify_crp(crp):
    """
    Classify CRP Level:
      < 0.3   = Normal
      0.3-1.0 = Elevated
      1.0-10  = High
      > 10    = Very High
    """
    if crp < 0.3:
        return "Normal"
    elif crp <= 1.0:
        return "Elevated"
    elif crp <= 10:
        return "High"
    else:
        return "Very High"

def classify_homocysteine(h):
    """
    Classify Homocysteine Level:
      < 15   = Normal
      15-30  = Elevated
      30-100 = High
      > 100  = Very High
    """
    if h < 15:
        return "Normal"
    elif h <= 30:
        return "Elevated"
    elif h <= 100:
        return "High"
    else:
        return "Very High"

# 2. Read the dataset
df = pd.read_csv("./data/heart_disease_remove_empty.csv")  # Replace with your actual CSV filename

# 3. Create new columns with the classifications
df['Blood Pressure'] = df['Blood Pressure'].apply(classify_blood_pressure)
df['Cholesterol Level'] = df['Cholesterol Level'].apply(classify_cholesterol)
df['BMI'] = df['BMI'].apply(classify_bmi)
df['Sleep Hours'] = df.apply(lambda x: classify_sleep_hours(x['Age'], x['Sleep Hours']), axis=1)
df['Triglyceride Level'] = df['Triglyceride Level'].apply(classify_triglyceride)
df['Fasting Blood Sugar'] = df['Fasting Blood Sugar'].apply(classify_fasting_sugar)
df['CRP Level'] = df['CRP Level'].apply(classify_crp)
df['Homocysteine Level'] = df['Homocysteine Level'].apply(classify_homocysteine)

# 4. (Optional) Save the resulting DataFrame to a new CSV
df.to_csv("./data/heart_disease_manipulated.csv", index=False)


In [4]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.utils.class_weight import compute_class_weight

# 1. Load Dataset
df = pd.read_csv("./data/heart_disease_manipulated.csv")

# 2. Separate Features (X) and Target (y)
#    Convert "Yes"/"No" target to 1/0
y = df["Heart Disease Status"].map({"Yes": 1, "No": 0})
X = df.drop(columns=["Heart Disease Status"])

# 3. One-Hot Encode Categorical Columns
#    This automatically handles columns like "Blood Pressure" (Normal, High, etc.)
X = pd.get_dummies(X, drop_first=False)  
# drop_first=False means keep all dummy columns. 
# If you prefer to avoid dummy-variable trap, set drop_first=True.

# 4. Optional Feature Selection
#    Select Top 10 Features
selector = SelectKBest(score_func=f_classif, k=10)
X_selected = selector.fit_transform(X, y)

# 5. Normalize Selected Features
#    (StandardScaler on the 10 selected features)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_selected)

# 6. Train / Validation / Test Split
#    70% Train, 15% Val, 15% Test
X_train, X_temp, y_train, y_temp = train_test_split(
    X_scaled, y, test_size=0.3, random_state=42
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42
)
print(f"Train size: {X_train.shape[0]}")
print(f"Validation size: {X_val.shape[0]}")
print(f"Test size: {X_test.shape[0]}")

# ------------------------------
# 7. Convert to PyTorch Tensors
# ------------------------------
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

# ------------------------------
# 8. Compute Class Weights
#    For Imbalanced Data
# ------------------------------
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights = torch.tensor(class_weights, dtype=torch.float32)
# For binary classification with BCELoss, you can pass a scalar weight 
# if your classes are 0/1, but typically you’d need separate weighting 
# for each class in a multi-class scenario. 
# We'll use the weight for the "positive" class:
criterion = nn.BCELoss(weight=class_weights[1])  # weighting the '1' class

# ------------------------------
# 9. (Optional) Add Gaussian Noise to Training Data
#    Data Augmentation for numeric features
# ------------------------------
def add_noise(data, noise_level=0.02):
    noise = np.random.normal(0, noise_level, data.shape)
    return data + noise

X_train_noisy = add_noise(X_train, noise_level=0.02)
X_train_tensor = torch.tensor(X_train_noisy, dtype=torch.float32)

# ------------------------------
# 10. Define the Model
#     Using ReLU instead of Swish
# ------------------------------
class HeartDiseaseModel(nn.Module):
    def __init__(self, input_size):
        super(HeartDiseaseModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.bn1 = nn.BatchNorm1d(128)
        
        self.fc2 = nn.Linear(128, 256)
        self.bn2 = nn.BatchNorm1d(256)
        
        self.fc3 = nn.Linear(256, 128)
        self.bn3 = nn.BatchNorm1d(128)
        
        self.fc4 = nn.Linear(128, 1)
        
        self.dropout = nn.Dropout(0.4)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = self.bn1(self.fc1(x))
        x = torch.relu(x)               # ReLU
        x = self.dropout(x)
        
        x = self.bn2(self.fc2(x))
        x = torch.relu(x)               # ReLU
        x = self.dropout(x)
        
        x = self.bn3(self.fc3(x))
        x = torch.relu(x)               # ReLU
        
        x = self.fc4(x)
        x = self.sigmoid(x)            # Output for binary classification
        return x

# Instantiate the model
input_size = X_train.shape[1]  # number of selected features
model = HeartDiseaseModel(input_size)

# ------------------------------
# 11. Define Optimizer + LR Scheduler
#     Using SGD with Momentum + CyclicLR
# ------------------------------
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CyclicLR(
    optimizer, 
    base_lr=0.001, 
    max_lr=0.01, 
    step_size_up=10, 
    mode='triangular2'
)

# ------------------------------
# 12. Training Loop
# ------------------------------
num_epochs = 700
best_val_loss = float("inf")

for epoch in range(num_epochs):
    model.train()
    
    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    
    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # Validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        val_loss = criterion(val_outputs, y_val_tensor)
    
    # Update learning rate (CyclicLR)
    scheduler.step()
    
    # Save best model
    if val_loss.item() < best_val_loss:
        best_val_loss = val_loss.item()
        torch.save(model.state_dict(), "best_model.pth")
    
    # Print every 10 epochs
    if (epoch + 1) % 10 == 0:
        current_lr = optimizer.param_groups[0]['lr']
        print(f"Epoch [{epoch+1}/{num_epochs}], "
              f"Train Loss: {loss.item():.4f}, "
              f"Val Loss: {val_loss.item():.4f}, "
              f"LR: {current_lr:.6f}")

# ------------------------------
# 13. Load Best Model and Evaluate on Test
# ------------------------------
model.load_state_dict(torch.load("best_model.pth"))
model.eval()

with torch.no_grad():
    y_pred = model(X_test_tensor)
    y_pred_class = (y_pred >= 0.5).float()  # threshold at 0.5
    accuracy = (y_pred_class.eq(y_test_tensor).sum().item()) / y_test_tensor.shape[0]
    print(f"Test Accuracy: {accuracy:.4f}")


Train size: 4946
Validation size: 1060
Test size: 1061
Epoch [10/700], Train Loss: 1.3341, Val Loss: 1.4530, LR: 0.010000
Epoch [20/700], Train Loss: 1.3452, Val Loss: 1.3801, LR: 0.001000
Epoch [30/700], Train Loss: 1.2596, Val Loss: 1.4320, LR: 0.005500
Epoch [40/700], Train Loss: 1.2841, Val Loss: 1.4193, LR: 0.001000
Epoch [50/700], Train Loss: 1.2619, Val Loss: 1.3663, LR: 0.003250
Epoch [60/700], Train Loss: 1.2626, Val Loss: 1.3464, LR: 0.001000
Epoch [70/700], Train Loss: 1.2685, Val Loss: 1.3487, LR: 0.002125
Epoch [80/700], Train Loss: 1.2601, Val Loss: 1.3515, LR: 0.001000
Epoch [90/700], Train Loss: 1.2634, Val Loss: 1.3500, LR: 0.001563
Epoch [100/700], Train Loss: 1.2624, Val Loss: 1.3473, LR: 0.001000
Epoch [110/700], Train Loss: 1.2643, Val Loss: 1.3446, LR: 0.001281
Epoch [120/700], Train Loss: 1.2610, Val Loss: 1.3426, LR: 0.001000
Epoch [130/700], Train Loss: 1.2651, Val Loss: 1.3421, LR: 0.001141
Epoch [140/700], Train Loss: 1.2502, Val Loss: 1.3415, LR: 0.001000
Ep

  model.load_state_dict(torch.load("best_model.pth"))
