In [26]:
import pandas as pd
import numpy as np

import tensorflow as tf
import keras.backend as K
from keras.layers import Input, Layer
from keras.models import Model
from keras.models import load_model
from tensorflow.keras.optimizers import Adam

import operator
from typing import Union, Tuple, List, Any, Optional

In [9]:
from src.counterfactuals.base import CounterfactualMethod

#from src.counterfactuals.constraints import Freeze, OneHot

In [31]:
class Cadex(CounterfactualMethod):
    '''
    Creates a counterfactual explanation based on a pre-trained model using CADEX method
    The model has to be a Keras classifier model, where in the final classification layer, each class label must
    have a separate unit.
    '''
    def __init__(self, pretrained_model, constraints: Optional[List[Any]] = None) -> None:
        self.model = pretrained_model
        for layer in self.model.layers[2:]:
             layer.trainable = False
                
        self._constraints = constraints if constraints is not None else []
        
#         modified_input_layer = ModifiedInputLayer()
#         model_input = self.original_model.layers[0].input
#         model_output = modified_input_layer(self.original_model.layers[0].output)
#         for layer in self.original_model.layers[1:]:
#             model_output = layer(model_output)
        
#         self.model = Model(inputs=model_input, outputs=model_output)

#         for layer in self.model.layers[2:]:
#             layer.trainable = False
#         print(self.model.summary())
            
#         self.input_modified = modified_input_layer
#         desired = Input(shape=[model_output.shape[1]])
#         desired_loss = -K.sum(desired * K.log(self.output), axis=1)
#         grad = K.gradients(desired_loss, [self.input])
#         self.adv_grad = K.function([self.input, desired], [grad[0]])
# Rebuild the model with an input modification layer
#         input_mod_layer = ModifiedInputLayer()
#         self.input = self.original_model.layers[0].input
#         model = input_mod_layer(self.original_model.layers[0].output)
#         for layer in self.original_model.layers[1:]:
#             model = layer(model)
#         self.output = model

#         self.model = Model(inputs=self.input, outputs=self.output)
#         # Only allow the input modification layer to be trained
#         for layer in self.model.layers[2:]:
#             layer.trainable = False
#         print(self.model.summary())
#         self.input_modifier = input_mod_layer
        
        
    def generate(self, x: Union[pd.Series, np.ndarray], max_epoch=100, threshold=0.5) -> Union[pd.DataFrame, np.ndarray]:
        original_x = x.copy()
        original_y = self.model.predict(original_x)
        expected_y = 0 if original_y = 1 else 1
        
        mask = _get_mask(x, self._constraints) # TODO
        
        result = None
        epoch = 0
        for _ in range(max_epoch):
            gradient = self._get_gradient(x, mask) # TODO
            x = x - gradient
            x = correct_categorical(x, threshold)
            x = apply_constraints(x)
            if self.model.predict(x) == expected_y:
                return x
            
        return None
            
            
    def _get_gradient(x, y, mask):
        with tf.GradientTape(persistent=True) as tape:
            y_pred = self.model.predict(x)
            loss = tf.reduce_mean(y_pred - y)
            
    return tape.gradient(loss, model.trainable_variables) * mask

            
            
        

class ModifiedInputLayer(Layer):
    '''
    Input modification layer.
    Has a vector of trainable weights which are added to the incoming input.
    Subclasses Keras' Layer class and implements 3 abstract methods
    '''
    def build(self, input_shape):
        self._weights = self.add_weight(name='weights', shape=(1, input_shape[1]), initializer='zeros', trainable=True)
        super().build(input_shape)
        
    def call(self, x):
        return x + self._weights
    
    def compute_output_shape(self, input_shape):
        return input_shape
    
    def transform(self, input):
        res = input + self.get_weights()[0]
        if isinstance(res, pd.DataFrame):
            return res.values

    
    
    
    
class CadexAdam(Adam):
    '''
    Adam optimizer, with gradient mask.
    The mask is given in the constructor, and used to multiply by the gradients.
    get_gradients from the base Optimizer class is overridden to implement this behaviour.
    '''
    def __init__(self, mask, **kwargs):
        self._mask = mask
        super().__init__(**kwargs)
        
    def get_gradients(self, loss, params):
        return [i * self._mask for i in super().get_gradients(loss, params)]


In [41]:
model = load_model('data/model_german.h5')

In [42]:
for layer in model.layers[2:]:
    layer.trainable = False
model.trainable_variables

[<tf.Variable 'dense_2/kernel:0' shape=(61, 15) dtype=float32, numpy=
 array([[-7.14035565e-03, -2.23118439e-02, -1.73734408e-03,
         -6.62157254e-06,  1.24929339e-01, -4.41957265e-03,
          1.13691650e-02, -2.15552427e-04, -1.28878698e-01,
          3.30290961e-04,  1.08505548e-04,  5.46226162e-04,
         -7.38163799e-05,  1.02641527e-02,  2.47910211e-05],
        [-1.83521714e-02, -5.75711131e-02,  2.50781828e-04,
         -1.19478325e-04, -5.79149113e-04, -6.55479953e-02,
          9.55273584e-02,  1.01216661e-04, -6.63950283e-04,
          6.76028547e-04, -1.53302404e-04,  2.37043074e-04,
          3.07430346e-05,  8.81974818e-04, -4.32097659e-06],
        [-2.80384859e-03, -1.16936997e-01, -2.01816894e-02,
          1.25808408e-04,  2.95345322e-03, -3.54610942e-03,
          5.36744818e-02,  4.37307099e-05, -1.65244483e-03,
          1.57287531e-03,  2.75312428e-04,  1.36759272e-03,
          7.06414285e-05,  3.58456895e-02,  7.06467981e-05],
        [ 1.66258687e-04, -

In [32]:
cadex = Cadex(model)

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 61)]              0         
                                                                 
 dense_2 (Dense)             (None, 15)                930       
                                                                 
 modified_input_layer_4 (Mod  (None, 15)               15        
 ifiedInputLayer)                                                
                                                                 
 dense_1 (Dense)             (None, 2)                 32        
                                                                 
Total params: 977
Trainable params: 930
Non-trainable params: 47
_________________________________________________________________
None


In [21]:
cadex = Cadex(model)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 61)]              0         
                                                                 
 dense_2 (Dense)             (None, 15)                930       
                                                                 
 modified_input_layer (Modif  (None, 15)               15        
 iedInputLayer)                                                  
                                                                 
 dense_1 (Dense)             (None, 2)                 32        
                                                                 
Total params: 977
Trainable params: 930
Non-trainable params: 47
_________________________________________________________________
None


In [22]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 15)                930       
                                                                 
 dense_1 (Dense)             (None, 2)                 32        
                                                                 
Total params: 962
Trainable params: 930
Non-trainable params: 32
_________________________________________________________________


In [38]:
model = load_model('data/model_german.h5')
x = tf.ones((1, 61), dtype=tf.float32)
yTrue =  tf.ones((1, 61), dtype=tf.float32)
## Why does not it work?
with tf.GradientTape() as tape:
  yPred = model.predict(x)
  #print(yPred)
  loss = tf.reduce_mean(yPred-yTrue)

gradients = tape.gradient(loss, model.trainable_variables)
print(gradients)

LookupError: No gradient defined for operation'IteratorGetNext' (op type: IteratorGetNext). In general every operation must have an associated `@tf.RegisterGradient` for correct autodiff, which this op is lacking. If you want to pretend this operation is a constant in your program, you may insert `tf.stop_gradient`. This can be useful to silence the error in cases where you know gradients are not needed, e.g. the forward pass of tf.custom_gradient. Please see more details in https://www.tensorflow.org/api_docs/python/tf/custom_gradient.