In [1]:
import pandas as pd

In [2]:
df=pd.read_csv('preprocessed.csv')

In [5]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU detected")

True
NVIDIA GeForce RTX 2050


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, recall_score, classification_report

# ===============================
# üîß Device Setup (GPU / CPU)
# ===============================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ===============================
# üìä Example Dataset (replace with your df)
# ===============================
df = pd.read_csv("preprocessed.csv")  # your actual dataset path
target_col = "CVDCRHD4"  # heart attack column (1=Yes, 2=No)

# Drop missing / unknown labels
df = df[df[target_col].isin([1, 2])]
df[target_col] = df[target_col].replace({1: 1, 2: 0})

# Split features and target
X = df.drop(columns=[target_col])
y = df[target_col]

# Identify numeric and categorical columns
numeric_cols = X.select_dtypes(include=["int64", "float64"]).columns
categorical_cols = X.select_dtypes(exclude=["int64", "float64"]).columns

from imblearn.over_sampling import SMOTE

# ===============================
# üßπ Preprocessing
# ===============================
numeric_pipeline = Pipeline([
    ('impute', SimpleImputer(strategy='median')),
    ('scale', StandardScaler())
])
categorical_pipeline = Pipeline([
    ('impute', SimpleImputer(strategy='most_frequent')),
    ('ohe', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])

preprocessor = ColumnTransformer([
    ('num', numeric_pipeline, numeric_cols),
    ('cat', categorical_pipeline, categorical_cols)
])

# Apply preprocessing
X_processed = preprocessor.fit_transform(X)

# Split before SMOTE (so test set is untouched)
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.2, random_state=42, stratify=y
)

# ===============================
# ‚öôÔ∏è Apply SMOTE to training data
# ===============================
smote = SMOTE(sampling_strategy=0.5, random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

print("Before SMOTE:", np.bincount(y_train))
print("After SMOTE:", np.bincount(y_train_res))

# Convert to tensors
X_train_tensor = torch.tensor(X_train_res, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train_res.values, dtype=torch.long).to(device)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long).to(device)


# Create DataLoaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# ===============================
# üß† Model Definition
# ===============================
class AttentionLayer(nn.Module):
    def __init__(self, input_dim):
        super(AttentionLayer, self).__init__()
        self.attn = nn.Sequential(
            nn.Linear(input_dim, input_dim),
            nn.Tanh(),
            nn.Linear(input_dim, 1)
        )

    def forward(self, x):
        attn_weights = torch.softmax(self.attn(x), dim=1)
        context = torch.sum(attn_weights * x, dim=1)
        return context

class MLPWithAttention(nn.Module):
    def __init__(self, input_dim):
        super(MLPWithAttention, self).__init__()
        self.attn = AttentionLayer(input_dim)
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 2)
        )

    def forward(self, x):
        context = self.attn(x)
        out = self.net(x)
        return out

model = MLPWithAttention(X_train.shape[1]).to(device)

# ===============================
# ‚öôÔ∏è Training Setup
# ===============================
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)
epochs = 100

# ===============================
# üöÄ Training Loop
# ===============================
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for xb, yb in train_loader:
        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}")

# ===============================
# üßæ Evaluation
# ===============================
model.eval()
y_true, y_pred = [], []
with torch.no_grad():
    for xb, yb in test_loader:
        preds = model(xb)
        y_true.extend(yb.cpu().numpy())
        y_pred.extend(torch.argmax(preds, dim=1).cpu().numpy())

acc = accuracy_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
print(f"\n‚úÖ Accuracy: {acc*100:.2f}%")
print(f"üìà Recall: {recall*100:.2f}%")
print("\nüìã Classification Report:\n", classification_report(y_true, y_pred))


Using device: cuda


  print("Before SMOTE:", np.bincount(y_train))
  print("After SMOTE:", np.bincount(y_train_res))


Before SMOTE: [145302   7561]
After SMOTE: [145302 145302]
Epoch 1/40, Loss: 0.4550
Epoch 2/40, Loss: 0.4349
Epoch 3/40, Loss: 0.4282
Epoch 4/40, Loss: 0.4236
Epoch 5/40, Loss: 0.4193
Epoch 6/40, Loss: 0.4155
Epoch 7/40, Loss: 0.4116
Epoch 8/40, Loss: 0.4089
Epoch 9/40, Loss: 0.4057
Epoch 10/40, Loss: 0.4033
Epoch 11/40, Loss: 0.3999
Epoch 12/40, Loss: 0.3973
Epoch 13/40, Loss: 0.3938
Epoch 14/40, Loss: 0.3917
Epoch 15/40, Loss: 0.3892
Epoch 16/40, Loss: 0.3873
Epoch 17/40, Loss: 0.3845
Epoch 18/40, Loss: 0.3823
Epoch 19/40, Loss: 0.3803
Epoch 20/40, Loss: 0.3782
Epoch 21/40, Loss: 0.3760
Epoch 22/40, Loss: 0.3747
Epoch 23/40, Loss: 0.3728
Epoch 24/40, Loss: 0.3709
Epoch 25/40, Loss: 0.3688
Epoch 26/40, Loss: 0.3670
Epoch 27/40, Loss: 0.3659
Epoch 28/40, Loss: 0.3642
Epoch 29/40, Loss: 0.3632
Epoch 30/40, Loss: 0.3618
Epoch 31/40, Loss: 0.3603
Epoch 32/40, Loss: 0.3590
Epoch 33/40, Loss: 0.3579
Epoch 34/40, Loss: 0.3562
Epoch 35/40, Loss: 0.3551
Epoch 36/40, Loss: 0.3543
Epoch 37/40, L

In [22]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import classification_report, roc_auc_score
from imblearn.over_sampling import SMOTE
import numpy as np



# ---------------- FOCAL LOSS CLASS ----------------
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction
        self.ce = nn.CrossEntropyLoss(reduction='none')

    def forward(self, inputs, targets):
        ce_loss = self.ce(inputs, targets)
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss
        return focal_loss.mean() if self.reduction == 'mean' else focal_loss
# --------------------------------------------------

from sklearn.preprocessing import StandardScaler

# Split your data (if not done already)
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.2, random_state=42,
)

# 1Ô∏è‚É£ Initialize scaler
scaler = StandardScaler()

# 2Ô∏è‚É£ Fit only on training data, then transform both
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 3Ô∏è‚É£ If using SMOTE after scaling:
from imblearn.over_sampling import SMOTE
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train_scaled, y_train)


# ===============================
# ‚öôÔ∏è 1Ô∏è‚É£ SMOTE Oversampling
# ===============================
smote = SMOTE(sampling_strategy=0.6, random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)
print("After SMOTE:", np.bincount(y_res))

# ===============================
# ‚öôÔ∏è 2Ô∏è‚É£ Convert to Tensors
# ===============================
X_train_tensor = torch.tensor(X_res, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_res, dtype=torch.long).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test.to_numpy(), dtype=torch.long).to(device)
# ===============================
# üß† 3Ô∏è‚É£ Model Architecture (Enhanced)
# ===============================
class ImprovedMLP(nn.Module):
    def __init__(self, input_dim):
        super(ImprovedMLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.4),

            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.25),

            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(64, 2)
        )

    def forward(self, x):
        return self.model(x)

# ===============================
# ‚öôÔ∏è 4Ô∏è‚É£ Loss + Optimizer + Scheduler
# ===============================
class_counts = np.bincount(y_res)
weights = 1.0 / torch.tensor(class_counts, dtype=torch.float32)
weights = weights / weights.sum()
weights = weights.to(device)

model = ImprovedMLP(X_train.shape[1]).to(device)

class_counts = np.bincount(y_res)
class_weights = torch.tensor(len(y_res) / (2.0 * class_counts), dtype=torch.float32).to(device)

criterion = FocalLoss(alpha=2.5, gamma=2)


optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer, step_size=10, gamma=0.9)

# ===============================
# üöÄ 5Ô∏è‚É£ Training Loop with Early Stopping
# ===============================
best_loss = float('inf')
patience = 5
trigger = 0

epochs = 300
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    scheduler.step(loss)

    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

    # Early stopping
    if loss.item() < best_loss:
        best_loss = loss.item()
        torch.save(model.state_dict(), "best_model.pt")
        trigger = 0
    else:
        trigger += 1
        if trigger >= patience:
            print("Early stopping triggered!")
            break

# ===============================
# üßæ 6Ô∏è‚É£ Evaluation with Threshold Tuning
# ===============================
model.load_state_dict(torch.load("best_model.pt"))
model.eval()
with torch.no_grad():
    probs = torch.softmax(model(X_test_tensor), dim=1)[:, 1].cpu().numpy()

best_f1 = 0
best_t = 0.5
for t in np.arange(0.2, 0.7, 0.05):
    preds = (probs > t).astype(int)
    report = classification_report(y_test, preds, digits=3, output_dict=True)

    # Safely get F1 for class 1 (handle missing key)
    f1 = report.get('1', {}).get('f1-score', 0.0)

    if f1 > best_f1:
        best_f1 = f1
        best_t = t


print(f"\n‚úÖ Best Threshold = {best_t:.2f}")
final_preds = (probs > best_t).astype(int)
print(classification_report(y_test, final_preds, digits=3))
print("ROC-AUC:", roc_auc_score(y_test, probs))


  print("After SMOTE:", np.bincount(y_res))
  class_counts = np.bincount(y_res)
  class_counts = np.bincount(y_res)


After SMOTE: [145289  87173]




Epoch 1/300, Loss: 0.4517
Epoch 2/300, Loss: 0.4015
Epoch 3/300, Loss: 0.3712
Epoch 4/300, Loss: 0.3485
Epoch 5/300, Loss: 0.3324
Epoch 6/300, Loss: 0.3206
Epoch 7/300, Loss: 0.3131
Epoch 8/300, Loss: 0.3081
Epoch 9/300, Loss: 0.3040
Epoch 10/300, Loss: 0.3012
Epoch 11/300, Loss: 0.3001
Epoch 12/300, Loss: 0.2981
Epoch 13/300, Loss: 0.2961
Epoch 14/300, Loss: 0.2952
Epoch 15/300, Loss: 0.2935
Epoch 16/300, Loss: 0.2923
Epoch 17/300, Loss: 0.2913
Epoch 18/300, Loss: 0.2910
Epoch 19/300, Loss: 0.2896
Epoch 20/300, Loss: 0.2894
Epoch 21/300, Loss: 0.2888
Epoch 22/300, Loss: 0.2883
Epoch 23/300, Loss: 0.2884
Epoch 24/300, Loss: 0.2875
Epoch 25/300, Loss: 0.2866
Epoch 26/300, Loss: 0.2863
Epoch 27/300, Loss: 0.2864
Epoch 28/300, Loss: 0.2858
Epoch 29/300, Loss: 0.2851
Epoch 30/300, Loss: 0.2846
Epoch 31/300, Loss: 0.2844
Epoch 32/300, Loss: 0.2842
Epoch 33/300, Loss: 0.2839
Epoch 34/300, Loss: 0.2835
Epoch 35/300, Loss: 0.2829
Epoch 36/300, Loss: 0.2828
Epoch 37/300, Loss: 0.2834
Epoch 38/3