In [18]:
#!pip install --quiet --upgrade nest-asyncio

import nest_asyncio
nest_asyncio.apply()

import io
import datetime
import collections
import functools
from typing import List, Optional, Tuple
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow_ranking as tfr
import tensorflow_federated as tff
import tensorflow.keras as keras
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import (
    Concatenate,
    Dense,
    Embedding,
    Flatten,
    Input,
    Multiply,
)

In [19]:
np.random.seed(42)


In [20]:
ratings_df = pd.read_csv('data/Ratings.csv',delimiter=';')
books_df = pd.read_csv('data/Books.csv',delimiter=';')
ratings_df.head(1)

Unnamed: 0,User-ID,ISBN,Rating
0,276725,034545104X,0


In [21]:
ratings_df.shape

(1149780, 3)

In [22]:
counts = ratings_df['User-ID'].value_counts()
ratings_df = ratings_df[ratings_df['User-ID'].isin(counts[counts>60].index)]
#ratings_df = ratings_df[ratings_df.Rating > 2]

tot_rating_df = ratings_df.groupby(by='ISBN')['Rating'].sum().reset_index().rename(columns={'Rating':'TotRating'})
ratings_df = ratings_df.merge(tot_rating_df)
ratings_df = ratings_df[ratings_df['TotRating']>20] 

In [23]:
ratings_df.drop(columns=['TotRating'])

Unnamed: 0,User-ID,ISBN,Rating
8,276925,002542730X,10
9,277427,002542730X,10
10,3363,002542730X,0
11,8019,002542730X,9
12,10030,002542730X,7
...,...,...,...
715085,250925,0836213092,10
715086,271284,0836213092,8
719995,243294,0006510183,10
719996,263344,0006510183,8


In [24]:
book_mapping = {
      old_book: new_book for new_book, old_book in enumerate(
          ratings_df.ISBN.unique())
  }
user_mapping = {
    old_user: new_user for new_user, old_user in enumerate(
        ratings_df['User-ID'].unique())
}

ratings_df.ISBN = ratings_df.ISBN.map(book_mapping)
ratings_df['User-ID'] = ratings_df['User-ID'].map(user_mapping)
books_df.ISBN = books_df.ISBN.map(book_mapping)


In [25]:
ratings_df.head(1)

Unnamed: 0,User-ID,ISBN,Rating,TotRating
8,0,0,10,297


In [26]:
def create_tf_datasets(ratings_df: pd.DataFrame,
                       batch_size: int = 1,
                       max_examples_per_user: Optional[int] = None,
                       max_clients: Optional[int] = None) -> List[tf.data.Dataset]:
  """Creates TF Datasets containing the movies and ratings for all users."""
  num_users = len(set(ratings_df['User-ID']))
  # Optionally limit to `max_clients` to speed up data loading.
  if max_clients is not None:
    num_users = min(num_users, max_clients)

  def rating_batch_map_fn(rating_batch):
    """Maps a rating batch to an OrderedDict with tensor values."""
    # Each example looks like: {x: movie_id, y: rating}.
    # We won't need the UserID since each client will only look at their own
    # data.
    return collections.OrderedDict([
        ("x", tf.cast(rating_batch[:, 1:2], tf.int64)),
        ("y", tf.cast(rating_batch[:, 2:3], tf.float32))
    ])

  tf_datasets = []
  for user_id in range(num_users):
    # Get subset of ratings_df belonging to a particular user.
    user_ratings_df = ratings_df[ratings_df['User-ID'] == user_id]

    tf_dataset = tf.data.Dataset.from_tensor_slices(user_ratings_df)

    # Define preprocessing operations.
    tf_dataset = tf_dataset.take(max_examples_per_user).shuffle(
        buffer_size=max_examples_per_user, seed=42).batch(batch_size).map(
        rating_batch_map_fn,
        num_parallel_calls=tf.data.experimental.AUTOTUNE)
    tf_datasets.append(tf_dataset)

  return tf_datasets


def split_tf_datasets(
    tf_datasets: List[tf.data.Dataset],
    train_fraction: float = 0.8,
    val_fraction: float = 0.1,
) -> Tuple[List[tf.data.Dataset], List[tf.data.Dataset], List[tf.data.Dataset]]:
  """Splits a list of user TF datasets into train/val/test by user.
  """
  np.random.seed(42)
  np.random.shuffle(tf_datasets)

  train_idx = int(len(tf_datasets) * train_fraction)
  val_idx = int(len(tf_datasets) * (train_fraction + val_fraction))

  # Note that the val and test data contains completely different users, not
  # just unseen ratings from train users.
  return (tf_datasets[:train_idx], tf_datasets[train_idx:val_idx],
          tf_datasets[val_idx:])


In [27]:
#list(federated_train_data[0].as_numpy_iterator())

In [28]:
# We limit the number of clients to speed up dataset creation. Feel free to pass
# max_clients=None to load all clients' data.
tf_datasets = create_tf_datasets(
    ratings_df=ratings_df,
    batch_size=5,
    max_examples_per_user=300,
    max_clients=2000)

# Split the ratings into training/val/test by client.
tf_train_datasets, tf_val_datasets, tf_test_datasets = split_tf_datasets(
    tf_datasets,
    train_fraction=0.8,
    val_fraction=0.1)


In [29]:
n_users, n_books = len(ratings_df['User-ID'].unique()), len(ratings_df['ISBN'].unique())
n_users, n_books

(2868, 13784)

In [30]:
class UserEmbedding(tf.keras.layers.Layer):
  """Keras layer representing an embedding for a single user, used below."""

  def __init__(self, num_latent_factors, **kwargs):
    super().__init__(**kwargs)
    self.num_latent_factors = num_latent_factors

  def build(self, input_shape):
    self.embedding = self.add_weight(
        shape=(1, self.num_latent_factors),
        initializer='uniform',
        dtype=tf.float32,
        name='UserEmbeddingKernel')
    super().build(input_shape)

  def call(self, inputs):
    return self.embedding

  def compute_output_shape(self):
    return (1, self.num_latent_factors)


def ncf_model(
    num_items: int,
    num_latent_factors: int,
    dense_layers: List[int] = [8, 4],
    reg_layers: List[int] = [0.01, 0.01],
    activation_dense: str = "relu") -> tff.learning.models.ReconstructionModel:
  """Defines a Keras matrix factorization model."""
  # Layers with variables will be partitioned into global and local layers.
  # We'll pass this to `tff.learning.models.ReconstructionModel.from_keras_model_and_layers`.
  global_layers_mf = []
  local_layers_mf = []
  global_layers_mlp = []
  local_layers_mlp = []
# Extract the item embedding.
  item_input = tf.keras.layers.Input(shape=[1], name='Item')


  mf_item_embedding_layer = tf.keras.layers.Embedding(
      num_items,
      num_latent_factors,
      name='mf-ItemEmbedding')
      

  mlp_item_embedding_layer = tf.keras.layers.Embedding(
      num_items,
      num_latent_factors,
      name='mlp-ItemEmbedding')

  global_layers_mf.append(mf_item_embedding_layer)
  global_layers_mlp.append(mlp_item_embedding_layer)

  flat_item_vec_mf = tf.keras.layers.Flatten(name='FlattenItems-mf')(
      mf_item_embedding_layer(item_input))
  flat_item_vec_mlp = tf.keras.layers.Flatten(name='FlattenItems-mlp')(
      mlp_item_embedding_layer(item_input))
  # Extract the user embedding.
  mf_user_embedding_layer = UserEmbedding(
      num_latent_factors,
      name='mf-UserEmbedding')
  
  mlp_user_embedding_layer = UserEmbedding(
      num_latent_factors,
      name='mlp-UserEmbedding')
  
  local_layers_mf.append(mf_user_embedding_layer)
  local_layers_mlp.append(mlp_user_embedding_layer)

  # The item_input never gets used by the user embedding layer,
  # but this allows the model to directly use the user embedding.
  flat_user_vec_mf = tf.keras.layers.Flatten()(mf_user_embedding_layer(item_input))
  flat_user_vec_mlp = tf.keras.layers.Flatten()(mlp_item_embedding_layer(item_input))
  


  mf_vector =  Multiply()([flat_user_vec_mf, flat_item_vec_mf])
  mlp_vector = Concatenate()([flat_user_vec_mlp, flat_item_vec_mlp])


  layer_1 = Dense(
          8,
          activation=activation_dense,
          name = 'layer_1')


  mlp_vector = layer_1(mlp_vector)

  layer_2 = Dense(
          4,
          activation=activation_dense,
          name = 'layer_2')


  mlp_vector = layer_2(mlp_vector)

  predict_layer = Concatenate()([mf_vector, mlp_vector])

  result = Dense(
      1, activation="sigmoid", kernel_initializer="lecun_uniform", name="Rating"
  )
 

  output = result(predict_layer)
  model = tf.keras.Model(inputs=item_input, outputs=output)
  # Compute the dot product between the user embedding, and the item one.
  
  print(model.trainable_weights)
  input_spec = collections.OrderedDict(
      x=tf.TensorSpec(shape=[None, 1], dtype=tf.int64),
      y=tf.TensorSpec(shape=[None, 1], dtype=tf.float32))

  return tff.learning.models.ReconstructionModel.from_keras_model_and_variables(
      keras_model=model,
      global_trainable_variables=[model.trainable_weights[0],model.trainable_weights[2]],
      local_trainable_variables=[model.trainable_weights[1]],
      global_non_trainable_variables = [],
      local_non_trainable_variables = model.trainable_weights[3:],
      input_spec=input_spec)




In [31]:
# This will be used to produce our training process.
# User and item embeddings will be 50-dimensional.
import functools 

model_fn = functools.partial(
    ncf_model,
    num_items=n_books,
    num_latent_factors=50)


In [32]:
class RatingAccuracy(tf.keras.metrics.Mean):


  def __init__(self,
               name: str = 'rating_accuracy',
               **kwargs):
    super().__init__(name=name, **kwargs)

  def update_state(self,
                   y_true: tf.Tensor,
                   y_pred: tf.Tensor,
                   sample_weight: Optional[tf.Tensor] = None):
    absolute_diffs = tf.abs(y_true - y_pred)
    # A [batch_size, 1] tf.bool tensor indicating correctness within the
    # threshold for each example in a batch. A 0.5 threshold corresponds
    # to correctness when predictions are rounded to the nearest whole
    # number.
    example_accuracies = tf.less_equal(absolute_diffs, 0.5)
    super().update_state(example_accuracies, sample_weight=sample_weight)


loss_fn = lambda: tf.keras.losses.MeanSquaredError()
#metrics_fn = lambda: [RatingAccuracy()]

metrics_fn = lambda: [RatingAccuracy(),tfr.keras.metrics.NDCGMetric(name='ndcg_10',topn=10),tfr.keras.metrics.HitsMetric(name='hr_10', topn=10) ]


In [52]:
import tensorflow_privacy

from tensorflow_privacy.privacy.analysis import compute_dp_sgd_privacy

from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer

In [57]:
tf.compat.v1.disable_v2_behavior()

In [35]:
from absl import logging
import collections


In [40]:
"""
def make_optimizer_class(cls):
  """Constructs a DP optimizer class from an existing one."""
  parent_code = tf.compat.v1.train.Optimizer.compute_gradients.__code__
  child_code = cls.compute_gradients.__code__
  GATE_OP = tf.compat.v1.train.Optimizer.GATE_OP  # pylint: disable=invalid-name
  if child_code is not parent_code:
    logging.warning(
        'WARNING: Calling make_optimizer_class() on class %s that overrides '
        'method compute_gradients(). Check to ensure that '
        'make_optimizer_class() does not interfere with overridden version.',
        cls.__name__)

  class DPOptimizerClass(cls):
    """Differentially private subclass of given class cls."""

    _GlobalState = collections.namedtuple(
      '_GlobalState', ['l2_norm_clip', 'stddev'])
    
    def __init__(
        self,
        dp_sum_query,
        num_microbatches=None,
        unroll_microbatches=False,
        *args,  # pylint: disable=keyword-arg-before-vararg, g-doc-args
        **kwargs):
      """Initialize the DPOptimizerClass.

      Args:
        dp_sum_query: DPQuery object, specifying differential privacy
          mechanism to use.
        num_microbatches: How many microbatches into which the minibatch is
          split. If None, will default to the size of the minibatch, and
          per-example gradients will be computed.
        unroll_microbatches: If true, processes microbatches within a Python
          loop instead of a tf.while_loop. Can be used if using a tf.while_loop
          raises an exception.
      """
      super(DPOptimizerClass, self).__init__(*args, **kwargs)
      self._dp_sum_query = dp_sum_query
      self._num_microbatches = num_microbatches
      self._global_state = self._dp_sum_query.initial_global_state()
      # TODO(b/122613513): Set unroll_microbatches=True to avoid this bug.
      # Beware: When num_microbatches is large (>100), enabling this parameter
      # may cause an OOM error.
      self._unroll_microbatches = unroll_microbatches

    def compute_gradients(self,
                          loss,
                          var_list,
                          gate_gradients=GATE_OP,
                          aggregation_method=None,
                          colocate_gradients_with_ops=False,
                          grad_loss=None,
                          gradient_tape=None,
                          curr_noise_mult=0,
                          curr_norm_clip=1):

      self._dp_sum_query = gaussian_query.GaussianSumQuery(curr_norm_clip, 
                                                           curr_norm_clip*curr_noise_mult)
      self._global_state = self._dp_sum_query.make_global_state(curr_norm_clip, 
                                                                curr_norm_clip*curr_noise_mult)
      

      # TF is running in Eager mode, check we received a vanilla tape.
      if not gradient_tape:
        raise ValueError('When in Eager mode, a tape needs to be passed.')

      vector_loss = loss()
      if self._num_microbatches is None:
        self._num_microbatches = tf.shape(input=vector_loss)[0]
      sample_state = self._dp_sum_query.initial_sample_state(var_list)
      microbatches_losses = tf.reshape(vector_loss, [self._num_microbatches, -1])
      sample_params = (self._dp_sum_query.derive_sample_params(self._global_state))

      def process_microbatch(i, sample_state):
        """Process one microbatch (record) with privacy helper."""
        microbatch_loss = tf.reduce_mean(input_tensor=tf.gather(microbatches_losses, [i]))
        grads = gradient_tape.gradient(microbatch_loss, var_list)
        sample_state = self._dp_sum_query.accumulate_record(sample_params, sample_state, grads)
        return sample_state
    
      for idx in range(self._num_microbatches):
        sample_state = process_microbatch(idx, sample_state)

      if curr_noise_mult > 0:
        grad_sums, self._global_state = (self._dp_sum_query.get_noised_result(sample_state, self._global_state))
      else:
        grad_sums = sample_state

      def normalize(v):
        return v / tf.cast(self._num_microbatches, tf.float32)

      final_grads = tf.nest.map_structure(normalize, grad_sums)
      grads_and_vars = final_grads#list(zip(final_grads, var_list))
    
      return grads_and_vars

  return DPOptimizerClass


def make_gaussian_optimizer_class(cls):
  """Constructs a DP optimizer with Gaussian averaging of updates."""

  class DPGaussianOptimizerClass(make_optimizer_class(cls)):
    """DP subclass of given class cls using Gaussian averaging."""

    def __init__(
        self,
        l2_norm_clip=1.5,
        noise_multiplier=1.3,
        num_microbatches=250,
        ledger=None,
        unroll_microbatches=False,
        *args,  # pylint: disable=keyword-arg-before-vararg
        **kwargs):
      dp_sum_query = gaussian_query.GaussianSumQuery(
          l2_norm_clip, l2_norm_clip * noise_multiplier)

      super(DPGaussianOptimizerClass, self).__init__(
          dp_sum_query,
          num_microbatches,
          unroll_microbatches,
          *args,
          **kwargs)

    @property
    def ledger(self):
      return self._dp_sum_query

  return DPGaussianOptimizerClass

GradientDescentOptimizer = tf.compat.v1.train.GradientDescentOptimizer
DPGradientDescentGaussianOptimizer_NEW = make_gaussian_optimizer_class(GradientDescentOptimizer)"""

In [44]:
"""optimizer_tf_dp = tensorflow_privacy.DPKerasSGDOptimizer(
    l2_norm_clip=l2_norm_clip,
    noise_multiplier=noise_multiplier,
    num_microbatches=num_microbatches,
    learning_rate=learning_rate)

"""

In [68]:
BUFFER_SIZE = 60000 # Total size of training data
BATCH_SIZE = 600
NR_MICROBATCHES = 600 # Each batch of data is split in smaller units called microbatches.


NORM_CLIP = 1.1 # Does NOT affect EPSILON, but increases NOISE on gradients
NOISE_MULT = 1.15


DP_DELTA = 1e-5 # Needs to be smaller than 1/BUFFER_SIZE
EPOCHS = 249


N_DISC = 1 # Number of times we train DISC before training GEN once


# Learning Rate for DISCRIMINATOR
LR_DISC = tf.compat.v1.train.polynomial_decay(learning_rate=0.150,
                                              global_step=tf.compat.v1.train.get_or_create_global_step(),
                                              decay_steps=10000,
                                              end_learning_rate=0.052,
                                              power=1)

if BATCH_SIZE % NR_MICROBATCHES != 0:
    raise ValueError('Batch size should be an integer multiple of the number of microbatches')

RuntimeError: resource: Attempting to capture an EagerTensor without building a function.

In [67]:
BATCH_SIZE / NR_MICROBATCHES

1.0

In [69]:
optimizer_dp = DPGradientDescentGaussianOptimizer_NEW

loss_fn_dp = lambda: tf.keras.losses.MeanSquaredError(
     reduction=tf.losses.Reduction.NONE)



compute_dp_sgd_privacy.compute_dp_sgd_privacy_statement(number_of_examples= 1,batch_size= BATCH_SIZE,
    num_epochs= EPOCHS,
    noise_multiplier= NOISE_MULT,
    delta = DP_DELTA
    )

ValueError: math domain error

In [45]:

training_process_dp = tff.learning.algorithms.build_fed_recon(
    model_fn=model_fn,
    loss_fn=loss_fn_dp,
    metrics_fn=metrics_fn,
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(0.5),
    client_optimizer_fn=lambda: optimizer_dp,
    reconstruction_optimizer_fn=lambda: tf.keras.optimizers.SGD(0.001))



[<tf.Variable 'mlp-ItemEmbedding/embeddings:0' shape=(13784, 50) dtype=float32>, <tf.Variable 'mf-UserEmbedding/UserEmbeddingKernel:0' shape=(1, 50) dtype=float32>, <tf.Variable 'mf-ItemEmbedding/embeddings:0' shape=(13784, 50) dtype=float32>, <tf.Variable 'layer_1/kernel:0' shape=(100, 8) dtype=float32>, <tf.Variable 'layer_1/bias:0' shape=(8,) dtype=float32>, <tf.Variable 'layer_2/kernel:0' shape=(8, 4) dtype=float32>, <tf.Variable 'layer_2/bias:0' shape=(4,) dtype=float32>, <tf.Variable 'Rating/kernel:0' shape=(54, 1) dtype=float32>, <tf.Variable 'Rating/bias:0' shape=(1,) dtype=float32>]
[<tf.Variable 'mlp-ItemEmbedding/embeddings:0' shape=(13784, 50) dtype=float32>, <tf.Variable 'mf-UserEmbedding/UserEmbeddingKernel:0' shape=(1, 50) dtype=float32>, <tf.Variable 'mf-ItemEmbedding/embeddings:0' shape=(13784, 50) dtype=float32>, <tf.Variable 'layer_1/kernel:0' shape=(100, 8) dtype=float32>, <tf.Variable 'layer_1/bias:0' shape=(8,) dtype=float32>, <tf.Variable 'layer_2/kernel:0' shape

AssertionError: in user code:

    File "/home/chris/.local/lib/python3.10/site-packages/tensorflow_federated/python/learning/algorithms/fed_recon.py", line 208, in client_update  *
        client_optimizer = keras_optimizer.build_or_verify_tff_optimizer(
    File "/home/chris/.local/lib/python3.10/site-packages/tensorflow_federated/python/learning/optimizers/keras_optimizer.py", line 144, in build_or_verify_tff_optimizer  *
        optimizer_fn, trainable_weights, disjoint_init_and_next
    File "/home/chris/.local/lib/python3.10/site-packages/tensorflow_federated/python/learning/optimizers/keras_optimizer.py", line 72, in mock_apply_gradients  *
        opt.apply_gradients(
    File "/home/chris/.local/lib/python3.10/site-packages/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py", line 394, in apply_gradients  *
        assert self._was_dp_gradients_called, (

    AssertionError: Neither _compute_gradients() or get_gradients() on the differentially private optimizer was called. This means the training is not differentially private. It may be the case that you need to upgrade to TF 2.4 or higher to use this particular optimizer.
