In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler
import shap 
import os

df = pd.read_csv("datasets/diabetic_data.csv")
target_col = "readmitted"

X = df.drop(columns=["encounter_id", "patient_nbr", target_col])
y = df[target_col]
X.drop(columns=['diag_1', 'diag_2', 'diag_3', 'medical_specialty', 'citoglipton', 'glimepiride-pioglitazone'], inplace=True, errors='ignore')

categorical_cols = X.select_dtypes(include=["object"]).columns.tolist()
for col in categorical_cols: X[col] = X[col].astype(str)

X_encoded = X.copy()
encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    X_encoded[col] = le.fit_transform(X_encoded[col])
    encoders[col] = le

print("Original class distribution:")
print(y.value_counts())

target_size_after_undersampling = 27432
under_strategy = {'NO': target_size_after_undersampling}
over_strategy = {"<30": target_size_after_undersampling}
under = RandomUnderSampler(sampling_strategy=under_strategy, random_state=42)
over = SMOTE(sampling_strategy=over_strategy, random_state=42, k_neighbors=5)
pipeline = Pipeline([("under", under), ("over", over)])
X_resampled_num, y_resampled = pipeline.fit_resample(X_encoded, y)

X_resampled_decoded = X_resampled_num.copy()
for col, le in encoders.items():
    X_resampled_decoded[col] = le.inverse_transform(X_resampled_num[col].astype(int))

X_resampled_ohe = pd.get_dummies(X_resampled_decoded, drop_first=True)
target_mapping = {'<30': 0, '>30': 1, 'NO': 2}
y_resampled_encoded = y_resampled.map(target_mapping)
balanced_df = X_resampled_ohe.copy()
balanced_df[target_col] = y_resampled_encoded
feature_names = X_resampled_ohe.columns.tolist()

print("\nNew class distribution (encoded):")
print(y_resampled_encoded.value_counts())

X_train, X_test, y_train, y_test = train_test_split(
    X_resampled_ohe, y_resampled_encoded, test_size=0.2, random_state=42, stratify=y_resampled_encoded
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

train_ds = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
test_ds = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_ds, batch_size=128)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_dim = X_train_tensor.shape[1]
num_classes = len(y_resampled_encoded.unique())

class MulticlassNN(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(64, 32), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(32, num_classes)
        )
    def forward(self, x): return self.net(x)

print("\n--- Training Target Model ---")
baseline_model = MulticlassNN(input_dim, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(baseline_model.parameters(), lr=1e-3)
epochs = 50 

for epoch in range(1, epochs + 1):
    baseline_model.train()
    total_loss = 0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        outputs = baseline_model(xb)
        loss = criterion(outputs, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    if epoch % 10 == 0:
        print(f"Epoch {epoch}/{epochs}, Loss: {total_loss/len(train_loader):.4f}")

print("\n--- Evaluating Target Model Utility ---")
baseline_model.eval()
all_preds, all_true = [], []
with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.to(device)
        outputs = baseline_model(xb)
        _, predicted = torch.max(outputs.data, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_true.extend(yb.cpu().numpy())

# --- 2. Explicit Metrics Reporting ---
accuracy = accuracy_score(all_true, all_preds)
report = classification_report(all_true, all_preds, target_names=['<30', '>30', 'NO'])
cm = confusion_matrix(all_true, all_preds)

print("\n" + "---" * 15)
print("###Model Evaluation Metrics (Baseline) ###")
print(f"\n**Accuracy:** {accuracy:.4f}")

print("\n**Confusion Matrix:**")
print(cm)

print("\n**Classification Report (Precision, Recall, F1-Score):**")
print(report)

print("\n**Privacy Metrics:**")
print("  - Epsilon: Not Applicable (Baseline Model)")
print("  - Delta:   Not Applicable (Baseline Model)")
print("---" * 15 + "\n")

MODEL_PATH = "baseline_target_model_copy.pth"
torch.save(baseline_model.state_dict(), MODEL_PATH)
print(f"\nTrained model saved to {MODEL_PATH}")

print("\nStep 5: Generating SHAP explanations for Baseline model.")

clean_model = MulticlassNN(input_dim, num_classes).to(device)
clean_model.load_state_dict(baseline_model.state_dict())
clean_model.eval()

background_size = min(100, len(X_train_tensor))
background_data = X_train_tensor[np.random.choice(len(X_train_tensor), background_size, replace=False)].to(device)
X_explain = X_test_tensor[:20].to(device)

explainer = shap.DeepExplainer(clean_model, background_data)
shap_values_list = explainer.shap_values(X_explain)

mean_abs_shap = np.mean(np.abs(np.array(shap_values_list)), axis=(0, 2))
feature_names = X_train.columns.tolist()
feature_importance = dict(zip(feature_names, mean_abs_shap))

print("\n### SHAP Feature Importance (Baseline) ###")
print("\nTop 10 features by mean absolute SHAP value:")
sorted_importance = sorted(feature_importance.items(), key=lambda item: item[1], reverse=True)
for feature, value in sorted_importance[:10]:
    print(f"{feature}: {value:.4f}")

print("\nSaving SHAP results.")

shap_df = pd.DataFrame({
    'feature': feature_names,
    'mean_abs_shap': mean_abs_shap
}).sort_values('mean_abs_shap', ascending=False)

shap_df.to_csv('shap_feature_importance_baseline_copy.csv', index=False)
print("Saved SHAP feature importance to 'shap_feature_importance_baseline_copy.csv'.")

Original class distribution:
readmitted
NO     54864
>30    35545
<30    11357
Name: count, dtype: int64

New class distribution (encoded):
readmitted
1    35545
0    27432
2    27432
Name: count, dtype: int64

--- Training Target Model ---
Epoch 10/50, Loss: 0.9225
Epoch 20/50, Loss: 0.9118
Epoch 30/50, Loss: 0.9087
Epoch 40/50, Loss: 0.9060
Epoch 50/50, Loss: 0.9040

--- Evaluating Target Model Utility ---

---------------------------------------------
###Model Evaluation Metrics (Baseline) ###

**Accuracy:** 0.5547

**Confusion Matrix:**
[[2467 2369  651]
 [ 277 5549 1283]
 [ 222 3250 2014]]

**Classification Report (Precision, Recall, F1-Score):**
              precision    recall  f1-score   support

         <30       0.83      0.45      0.58      5487
         >30       0.50      0.78      0.61      7109
          NO       0.51      0.37      0.43      5486

    accuracy                           0.55     18082
   macro avg       0.61      0.53      0.54     18082
weighted avg  

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from torch.utils.data import TensorDataset, DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_dim = X_train_tensor.shape[1]
num_classes = len(torch.unique(y_train_tensor))
MODEL_PATH = "baseline_target_model_copy.pth"

class MulticlassNN(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(64, 32), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(32, num_classes)
        )
    def forward(self, x): return self.net(x)

try:
    baseline_model = MulticlassNN(input_dim, num_classes).to(device)
    baseline_model.load_state_dict(torch.load(MODEL_PATH))
    baseline_model.eval()
except FileNotFoundError:
    print(f"ERROR: Model file not found at {MODEL_PATH}. Run the baseline training script first.")
    #exit()

class AttackNN_AllLogits(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 32), nn.ReLU(),
            nn.Linear(32, 16), nn.ReLU(),
            nn.Linear(16, 1),
        )
    def forward(self, x): return self.net(x)

def create_attack_dataset_all_logits(model, train_tensor, test_tensor):
    """Extracts all logits (Z) as features for the attack model."""
    model.eval()
    with torch.no_grad():
        train_outputs = model(train_tensor.to(device)).cpu()
        test_outputs = model(test_tensor.to(device)).cpu()

    attack_X = torch.cat([train_outputs, test_outputs], dim=0)
    train_labels = torch.ones(len(train_outputs))
    test_labels = torch.zeros(len(test_outputs))
    attack_y = torch.cat([train_labels, test_labels], dim=0)
    return attack_X, attack_y

def run_mia_trial(attack_X, attack_y, random_seed):
    """Runs one trial of the MIA and returns the advantage score."""
    attack_X_train, attack_X_test, attack_y_train, attack_y_test = train_test_split(
        attack_X, attack_y, test_size=0.3, random_state=random_seed, stratify=attack_y
    )
    attack_train_ds = TensorDataset(attack_X_train, attack_y_train)
    attack_train_loader = DataLoader(attack_train_ds, batch_size=64, shuffle=True)
    
    n0 = np.sum(attack_y_train.numpy() == 0)
    n1 = np.sum(attack_y_train.numpy() == 1)
    pos_weight_val = n0 / n1
    criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(pos_weight_val, dtype=torch.float32).to(device))

    attack_model = AttackNN_AllLogits(attack_X.shape[1]).to(device)
    optimizer = optim.Adam(attack_model.parameters(), lr=1e-3)

    for epoch in range(50):
        attack_model.train()
        for xb, yb in attack_train_loader:
            xb, yb = xb.to(device), yb.to(device).unsqueeze(1)
            optimizer.zero_grad()
            loss = criterion(attack_model(xb), yb)
            loss.backward()
            optimizer.step()

    attack_model.eval()
    all_preds, all_true = [], []
    with torch.no_grad():
        attack_X_test_tensor = attack_X_test.to(device)
        preds_logits = attack_model(attack_X_test_tensor).squeeze()
        predicted_classes = (preds_logits > 0.0).float()
        all_preds.extend(predicted_classes.cpu().numpy())
        all_true.extend(attack_y_test.cpu().numpy())

    report_dict = classification_report(all_true, all_preds, output_dict=True, zero_division=0)
    TPR = report_dict['1.0']['recall']
    FPR = 1 - report_dict['0.0']['recall']
    advantage = TPR - FPR
    return advantage

print("Step 1: Creating attack dataset for the Baseline Model.")
attack_X_baseline, attack_y_baseline = create_attack_dataset_all_logits(baseline_model, X_train_tensor, X_test_tensor)

num_trials = 10
all_advantages = []

print(f"\nStep 2: Running {num_trials} MIA trials on the Baseline Model...")
for i in range(num_trials):
    seed = 42 + i
    advantage = run_mia_trial(attack_X_baseline, attack_y_baseline, seed)
    print(f"  Trial {i+1}/{num_trials} (Seed: {seed}) -> MIA Advantage: {advantage:.4f}")
    all_advantages.append(advantage)

mean_advantage = np.mean(all_advantages)
std_advantage = np.std(all_advantages)

print("\n---" * 10)
print("Final Robust MIA Results for Baseline Model")
print(f"  Mean MIA Advantage: {mean_advantage:.4f}")
print(f"  Std Dev of MIA Advantage: {std_advantage:.4f}")
print("---\n" * 10)

np.save("baseline_mia_copy.npy", np.array([mean_advantage, std_advantage]))
print("Successfully executed weighted MIA. Mean and Std Dev saved.")

  baseline_model.load_state_dict(torch.load(MODEL_PATH))


Step 1: Creating attack dataset for the Baseline Model.

Step 2: Running 10 MIA trials on the Baseline Model...
  Trial 1/10 (Seed: 42) -> MIA Advantage: 0.0033
  Trial 2/10 (Seed: 43) -> MIA Advantage: -0.0018
  Trial 3/10 (Seed: 44) -> MIA Advantage: 0.0089
  Trial 4/10 (Seed: 45) -> MIA Advantage: -0.0009
  Trial 5/10 (Seed: 46) -> MIA Advantage: -0.0155
  Trial 6/10 (Seed: 47) -> MIA Advantage: -0.0073
  Trial 7/10 (Seed: 48) -> MIA Advantage: 0.0099
  Trial 8/10 (Seed: 49) -> MIA Advantage: -0.0048
  Trial 9/10 (Seed: 50) -> MIA Advantage: -0.0057
  Trial 10/10 (Seed: 51) -> MIA Advantage: -0.0033

---
---
---
---
---
---
---
---
---
---
Final Robust MIA Results for Baseline Model
  Mean MIA Advantage: -0.0017
  Std Dev of MIA Advantage: 0.0072
---
---
---
---
---
---
---
---
---
---

Successfully executed weighted MIA. Mean and Std Dev saved.
