<a href="https://colab.research.google.com/github/Mrinal18/AI_for_Security/blob/main/Differential_Privacy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow-privacy

In [None]:
"""
1. You should train differentially private models on the CIFAR-100 dataset. 
2. You will use TensorFlow Privacy which provide DP implementations of standard optimizers, but you only need to pick one. 
3. The goal is to train a well performing model while keeping a small value for epsilon during training. 
4. You will use a fixed delta of 4 × 10−5. 
"""

import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt
import math
import logging
import time
import os
import sys

from tensorflow_privacy.privacy.analysis import compute_dp_sgd_privacy
from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer

from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense

from tensorflow.keras.models import Sequential

from tensorflow.keras.datasets import cifar100

from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
# load data
(train_images, train_labels), (test_images, test_labels) = cifar100.load_data()

# normalize data
train_images = train_images / 255.0
test_images = test_images / 255.0

# split data into validation and training set
validation_images = train_images[:5000]
validation_labels = train_labels[:5000]
train_images = train_images[5000:]
train_labels = train_labels[5000:]

# create model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(100, activation='softmax'))

# compile model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# create data generator
data_generator = ImageDataGenerator(width_shift_range=0.1,
                                    height_shift_range=0.1,
                                    horizontal_flip=True)

# prepare iterator
train_iterator = data_generator.flow(train_images, train_labels, batch_size=64)

# prepare validation iterator
test_iterator = data_generator.flow(test_images, test_labels, batch_size=64)

# prepare validation iterator
validation_iterator = data_generator.flow(validation_images, validation_labels, batch_size=64)


In [None]:

# create optimizer Note: need to experitment with this optimizer wrt to report
optimizer = DPGradientDescentGaussianOptimizer(
    l2_norm_clip=1.0,
    noise_multiplier=1.1,
    num_microbatches=250,
    learning_rate=0.15)


In [None]:
#compute epsilon
epsilon, delta = compute_dp_sgd_privacy.compute_dp_sgd_privacy(n=60000, batch_size=250, noise_multiplier=1.1, epochs=15, delta=1e-5)

# train model
print("Epsilon: ", epsilon)

# train model
model.fit(train_iterator,
          epochs=100,
          validation_data=validation_iterator,
          callbacks=[tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)])

# evaluate model
test_loss, test_acc = model.evaluate(test_iterator, verbose=2)

# print results
print('\nTest accuracy:', test_acc)

# save model
model.save('cifar100_model.h5')
# # define loss function
# loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# # define metrics
# metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]

# # define privacy budget
# epsilon = math.exp(math.log(1.25) / 10)

# # define number of epochs
# epochs = 10

# # train model
# model.fit(train_iterator,
#           epochs=epochs,
#           validation_data=validation_iterator,
#           validation_steps=1,
#           verbose=1)

# # evaluate model
# model.evaluate(test_iterator, verbose=2)

# # save model
# model.save('cifar100_model.h5')