#### Import Libraries

In [5]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.optimizers import SGD
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import membership_inference_attack as mia
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData, AttackResultsCollection, AttackType, PrivacyMetric, PrivacyReportMetadata, SlicingSpec
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import privacy_report

#### Data Preprocessing

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the dataset
data = pd.read_csv('../data/patient_processed.csv')

# Select features and labels
features = data[['tanimoto', 'feature_jsim', 'feature_dsim', 'feature_osim']]
labels = data['DDI']

# Drop rows with NaN values in the feature columns
features_clean = features.dropna()

# Select features and labels after cleaning
labels_clean = data['DDI'].loc[features_clean.index]
features_clean = features_clean[['tanimoto', 'feature_jsim', 'feature_dsim', 'feature_osim']]

# Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels_clean)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features_clean, labels_encoded, test_size=0.2, random_state=42)

#### Define Hyperparameters

In [11]:
l2_norm_clip = 1.0
noise_multiplier = 1.1
num_microbatches = 1
learning_rate = 0.01
batch_size = 32
epochs = 10

#### Build and Compile the Models

##### Differential Privacy Model

In [12]:
import tensorflow as tf
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer

# Define the model
model_dp = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(4,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(len(label_encoder.classes_), activation='softmax')
])

optimizer_dp = DPKerasSGDOptimizer(
    l2_norm_clip=l2_norm_clip,
    noise_multiplier=noise_multiplier,
    num_microbatches=num_microbatches,
    learning_rate=learning_rate
)

loss_dp = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction=tf.losses.Reduction.NONE)

model_dp.compile(optimizer=optimizer_dp, loss=loss_dp, metrics=['accuracy'])

# Train the DP model
model_dp.fit(X_train, y_train, epochs=10, validation_data=(X_train, y_train), batch_size=32)


Epoch 1/10



  output, from_logits = _get_logits(


Epoch 2/10
Epoch 3/10

  output, from_logits = _get_logits(


Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1cea86f2b50>

##### Non-Differential Privacy Model

In [13]:
from keras.optimizers import SGD

# Define the model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(4,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(len(label_encoder.classes_), activation='softmax')
])

optimizer_dp = DPKerasSGDOptimizer(
    l2_norm_clip=l2_norm_clip,
    noise_multiplier=noise_multiplier,
    num_microbatches=num_microbatches,
    learning_rate=learning_rate
)

loss_dp = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction=tf.losses.Reduction.NONE)

model_dp.compile(optimizer=SGD(learning_rate=0.01, momentum=0.9), loss=loss_dp, metrics=['accuracy'])

# Train the DP model
model_dp.fit(X_train, y_train, epochs=10, validation_data=(X_train, y_train), batch_size=32)

Epoch 1/10


  output, from_logits = _get_logits(


Epoch 2/10


  output, from_logits = _get_logits(


Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1cea8c7dad0>

#### Define Callback for Privacy Metrics

In [14]:
class PrivacyMetrics(tf.keras.callbacks.Callback):
    def __init__(self, epochs_per_report, model_name):
        self.epochs_per_report = epochs_per_report
        self.model_name = model_name
        self.attack_results = []

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.epochs_per_report != 0:
            return

        logits_train = self.model.predict(X_train, batch_size=batch_size)
        logits_test = self.model.predict(X_test, batch_size=batch_size)

        prob_train = tf.nn.softmax(logits_train, axis=1)
        prob_test = tf.nn.softmax(logits_test, axis=1)

        privacy_report_metadata = PrivacyReportMetadata(
            accuracy_train=logs['accuracy'],
            accuracy_test=logs['val_accuracy'],
            epoch_num=epoch + 1,
            model_variant_label=self.model_name
        )

        attack_results = mia.run_attacks(
            AttackInputData(
                labels_train=y_train,
                labels_test=y_test,
                probs_train=prob_train,
                probs_test=prob_test
            ),
            SlicingSpec(entire_dataset=True, by_class=True),
            attack_types=(AttackType.THRESHOLD_ATTACK, AttackType.LOGISTIC_REGRESSION),
            privacy_report_metadata=privacy_report_metadata
        )

        self.attack_results.append(attack_results)

#### Train Model and Collect Privacy Metrics

In [15]:
callback = PrivacyMetrics(epochs_per_report=2, model_name="DP Model")
history = model_dp.fit(
    X_train, y_train,
    epochs=epochs,
    validation_data=(X_test, y_test),
    batch_size=batch_size,
    callbacks=[callback]
)

Epoch 1/10
Epoch 2/10


ValueError: probs_train should be a numpy array.

#### Visualise Privacy Metrics

In [None]:
all_reports = callback.attack_results
results = AttackResultsCollection(all_reports)

##### Epoch Plots

In [None]:
privacy_metrics = (PrivacyMetric.AUC, PrivacyMetric.ATTACKER_ADVANTAGE)
epoch_plot = privacy_report.plot_by_epochs(results, privacy_metrics=privacy_metrics)

##### Privacy VS Utility Plots

In [None]:
utility_privacy_plot = privacy_report.plot_privacy_vs_accuracy(results, privacy_metrics=privacy_metrics)
for axis in utility_privacy_plot.axes:
    axis.set_xlabel('Validation accuracy')