# MIA attacks on Length-of-Stay predictor, Logistic Regression
## Installation of Packages in Conda

To install the required packages in your conda environment, you can use the following commands:

```bash
conda install h5py
conda install pytables


In [5]:
import os
import sys

project_root = os.path.abspath(os.path.join(os.getcwd(), "../../.."))
sys.path.append(project_root)


## Train the classifier
For the LR, the data should be flatten. So set the value to True for the LR model anb False for the GRU-D

### Load the dataset
The dataset is generated by the notebook file ....

In [6]:
import yaml
import pickle

# Load the config.yaml file
with open('train_config.yaml', 'r') as file:
    train_config = yaml.safe_load(file)

use_LR = train_config['train']['training_method'] == 'LR'
data_path = train_config['data']['data_dir']

if use_LR:
    path = data_path + "LR_data/"
else:
    path = data_path + "GRUD_data/"

dataset_path = os.path.join(path, "dataset.pkl")

if os.path.exists(dataset_path):
        print("Loading dataset...")
        with open(dataset_path, "rb") as f:
            population_dataset = pickle.load(f)  # Load the dataset
        print(f"Loaded dataset from {dataset_path}")
else:
    print("\nDataset not found.\n→ Run 'mimic_dataset_prep.ipynb' to generate the required dataset.\n")

data, targets = population_dataset.x, population_dataset.y

Loading dataset...
Loaded dataset from ./data/LR_data/dataset.pkl


In [10]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import numpy as np
from mimic_handler import MIMICInputHandler

train_fraction = train_config["data"]["f_train"]
test_fraction = train_config["data"]["f_test"]
batch_size = train_config["data"]["batch_size"]

dataset_size = len(population_dataset)
train_size = int(train_fraction * dataset_size)
test_size = int(test_fraction * dataset_size)

selected_index = np.random.choice(np.arange(dataset_size), train_size + test_size, replace=False)
train_indices, test_indices = train_test_split(selected_index, test_size=test_size)

train_subset = MIMICInputHandler.UserDataset(data[train_indices], targets[train_indices])
test_subset = MIMICInputHandler.UserDataset(data[test_indices], targets[test_indices], **train_subset.return_params())


NameError: name 'data' is not defined

In [None]:
from utils.data_processing import get_mimic_dataloaders, get_mimic_dataset


# Generate the dataset and dataloaders
path = os.path.join(os.getcwd(), "data/")
use_LR = True # If True, use a logistic regression model. If False, use a GRUD model.
dataset, train_indices, validation_indices, test_indices, early_stop_indices = get_mimic_dataset(path, train_frac = 0.3,
                                                                            test_frac = 0.2,
                                                                            validation_frac = 0,
                                                                            early_stop_frac = 0,
                                                                            use_LR = use_LR)

In [None]:
train_loader, validation_loader, test_loader, early_stop_loader = get_mimic_dataloaders(dataset,
                          train_indices,
                          validation_indices,
                          test_indices,
                          early_stop_indices,
                          batch_size=128)

In [None]:
from utils.model_LR import LR, create_trained_model_and_metadata
n_features = dataset.x.shape[1]
print(f"Number of features: {n_features}")

model = LR(n_features)
train_acc, train_loss, test_acc, test_loss = create_trained_model_and_metadata(model,
                                                                               train_loader,
                                                                               test_loader,
                                                                               lr = 0.0001,
                                                                               weight_decay = 5.392,
                                                                               epochs=20)

In [None]:
import matplotlib.pyplot as plt

# Plot training and test accuracy
plt.figure(figsize=(5, 4))

plt.subplot(1, 2, 1)
plt.plot(train_acc, label="Train Accuracy")
plt.plot(test_acc, label="Test Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Accuracy over Epochs")
plt.legend()

# Plot training and test loss
plt.subplot(1, 2, 2)
plt.plot(train_loss, label="Train Loss")
plt.plot(test_loss, label="Test Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Loss over Epochs")
plt.legend()

plt.tight_layout()
plt.show()

## Attack the LR model
Modify ```audit.yaml ``` file to attack LR model: 
  
  ```
  module_path: "utils/model_LR.py" 
  model_class: "LR"
  target_folder: "./target_LR"
  data_path: "./data/LR_data/dataset.pkl"
  ```


In [None]:
from mimic_LR_handler import MimicInputHandler

from leakpro import LeakPro

# Read the config file
config_path = "audit.yaml"

# Prepare leakpro object
leakpro = LeakPro(MimicInputHandler, config_path)

# Run the audit
mia_results = leakpro.run_audit(return_results=True)

## Generate report

In [None]:
# Import and initialize ReportHandler
from leakpro.reporting.report_handler import ReportHandler

# report_handler = ReportHandler()
report_handler = ReportHandler(report_dir="./leakpro_output/results")

# Save MIA resuls using report handler
for res in mia_results:
    report_handler.save_results(attack_name=res.attack_name, result_data=res, config=res.configs)

# # Create the report by compiling the latex text
report_handler.create_report()