In [1]:
from bglog import BGLog, get_embedding_layer
import numpy as np
import tensorflow as tf
tf.random.set_seed(123)
from pretraining import LogLineEncoder, LogSeqEncoder, LogClassifier
from boundary_loss import euclidean_metric, BoundaryLoss

In [2]:
bglog = BGLog(save_padded_num_sequences=False, load_from_pkl=True)

In [3]:
train_test = bglog.get_tensor_train_test(ablation=1000)
train_data, test_data = train_test

padded_num_seq_df loaded from data\bgl_padded_num_seq_df.pkl
trained tokenizer, tk, loaded from data\bgltk.pkl
train_0:, 800
test_0:, 200
train_1:, 800
test_1:, 200
train_2:, 800
test_2:, 200
train_3:, 800
test_3:, 102
4 class does not have 800 records, it has only 628 records
test_4:, 0
5 class does not have 800 records, it has only 165 records
5 class does not have 200 records, it has only 165 records
6 class does not have 800 records, it has only 75 records
6 class does not have 200 records, it has only 75 records
[[1. 0. 0. 0.]
 [1. 0. 0. 0.]]
<BatchDataset shapes: ((32, 32, 64), (32, 4)), types: (tf.int32, tf.float32)>
<BatchDataset shapes: ((32, 32, 64), (32, 4)), types: (tf.int32, tf.float32)>


In [4]:
line_encoder =   LogLineEncoder(bglog, chars_in_line=64)
# the model doesn't have a state unless it is called at least once
# in order to initialize the model we need a sample data 
sample_train_data = next(iter(train_data))
sample_x_train = sample_train_data[0]
print('sample_x_train.shape:', sample_x_train.shape)
# now we will initialize the model with the sample data
loglineEmbedding = line_encoder(sample_x_train)
print('loglineEmbedding.shape:', loglineEmbedding.shape)
# Now the model have a state and can be inspected        
# line_encoder.summary()

vocab_size: 50
sample_x_train.shape: (32, 32, 64)
loglineEmbedding.shape: (32, 32, 64)


In [5]:
logSeqencer =   LogSeqEncoder(line_in_seq=32)
# the model doesn't have a state unless it is called at least once
logSeqEmbedding = logSeqencer(loglineEmbedding)
print('logSeqEmbedding.shape:', logSeqEmbedding.shape)
# Now the model have a state and can be inspected        
# logSeqencer.summary()

logSeqEmbedding.shape: (32, 16)


In [6]:
log_classifier = LogClassifier(line_encoder=line_encoder, seq_encoder=logSeqencer, num_classes=4)
# log_classifier(sample_x_train)  

In [7]:
# log_classifier.summary()

In [8]:
log_classifier.compile(optimizer='adam', 
                  loss='categorical_crossentropy',
              metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
hist = log_classifier.fit(train_data, validation_data=test_data, epochs=1) 

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


In [9]:
# log_classifier(sample_x_train)   

In [35]:
class OpenSet:
    ''' 
    self.num_labels = number of classes
    self.embedding_size = number of neurons in the logits layers of the pretrained model'''
    def __init__(self, num_labels, pretrained_model, embedding_size,
                lr_boundary):
#         super().__init__():
        self.model = pretrained_model        
        self.centroids = None
        self.num_labels = num_labels
        self.embedding_size = embedding_size
        self.delta = None
        self.lr_boundary = lr_boundary
        self.delta_points = []
        
    
    def train(self, data_train, epochs=1):
        criterion_boundary = BoundaryLoss(num_labels=self.num_labels)
        # delta is getting calculated inside the  BoundaryLoss class as well
        # however that calculated delta is used for calculating the loss 
        # that delta is not updating the criterion_boundary.delta which is 
        # a randomly initialized parameter. 
        # Hence the following softplus is on randomly initialized trainable parameters
        # and not softplus on softplus
        self.delta = tf.nn.softplus(criterion_boundary.delta)
        self.centroids = self.centroids_cal(data_train)        
        optimizer = tf.keras.optimizers.Adam(learning_rate=self.lr_boundary) # does it take criterion_boundary.parameters() ??
        wait = 0
        best_delta, best_centroids = None, None
        
        for epoch in range(epochs):
            tr_loss = 0
            nb_tr_examples, nb_tr_steps = 0, 0
            for batch in data_train:
                logseq_batch, label_batch = batch
                ## (32, 32, 64), (32, 4)
                batch_loss, t_loss = self.train_step(criterion_boundary, 
                                                     logseq_batch, label_batch, optimizer)
                tr_loss += t_loss
                nb_tr_steps += 1
                
            self.delta_points.append(self.delta)
            loss = tr_loss / nb_tr_steps
            print('train_loss:', loss)  
                    
#     @tf.function                
    def train_step(self, criterion_boundary, logseq_batch, label_batch, optimizer):
#         print('within train_step')
        tr_loss = 0
        with tf.GradientTape() as tape:                
            features_batch = self.model(logseq_batch, extract_feature=True)
            loss, self.delta = criterion_boundary(features_batch, 
                                                  self.centroids, 
                                                  label_batch)
        tr_loss += loss
        gradients = tape.gradient(loss, [self.delta])

        optimizer.apply_gradients(zip(gradients, [self.delta]))
        return loss, tr_loss                    
        
        
        
    def centroids_cal(self, data):
        centroids = tf.zeros((self.num_labels, self.embedding_size))
        total_labels = tf.zeros(self.num_labels)
        for batch in data:
            logseq_batch, label_batch = batch
            ## (32, 32, 64), (32, 4)
            features = self.model(logseq_batch, extract_feature=True)
            ## (32, 16) features - 32 sequence of line each haaving 64 characrers
            ## produces a feaure vector of dimension 16. 
            for i in range(len(label_batch)): # (32, 4) --> here length is 32
                label = label_batch[i] # label looks like [0 0 0 1]
                numeric_label = np.argmax(label) # index position of the label = 3 , so it is actually class =3
                ##total_labels = [0 0 0 0] each col representing a class 
                ## count the number for each class
                total_labels_lst = tf.unstack(total_labels)
                total_labels_lst[numeric_label] += 1 
                total_labels = tf.stack(total_labels_lst)
                centroids_lst = tf.unstack(centroids)
                centroids_lst[numeric_label] += features[i]
                centroids = tf.stack(centroids_lst)
                # each row index in the centroid array is a class
                # we add first identify the feature belonging to which class by the numeric_label
                # Then add all the features belonging to the class in the corresponding row of the centroid arr
        ### shape of centroids is (4, 16) whereas shape of total_labels is (1, 4)
        ### reshape the total_labels as 4,1 ==> [[0], [0], [0], [0]]==> 4 rows 
        ## so that we can divide the centroids array by the total_labels
        total_label_reshaped = tf.reshape(total_labels, (self.num_labels, 1))
        centroids /= total_label_reshaped
        return centroids  
        
        

In [11]:
# loss, self.delta = criterion_boundary(features_batch,
# logits =  euclidean_metric(features, centroids)
# NotImplementedError: Cannot convert a symbolic Tensor (log_classifier/log_seq_encoder/dense/Relu:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported 
# it looks like the numpy arrays to be converted to tensor

In [36]:
oset = OpenSet(4, log_classifier, 16, 0.05)

In [14]:
# optimizer.apply_gradients(zip(gradients, self.delta))
# TypeError: 'IndexedSlices' object is not iterable

In [15]:
t_batch = next(iter(train_data))
t_batch_x, t_batch_y = t_batch
t_batch_x.shape
centroids = oset.centroids_cal(train_data)
features_batch = log_classifier(t_batch_x, extract_feature=True)


In [16]:
criterion_boundary = BoundaryLoss(num_labels=4)
loss, delta = criterion_boundary(features_batch, centroids, t_batch_y)

In [17]:
loss

<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([1.6548097, 1.6548097, 1.6548097, 1.6548097, 1.6931441, 1.6931441,
       1.6548097, 1.6362237, 1.6362237, 1.6548097, 1.6548097, 1.6931441,
       1.6754997, 1.6754997, 1.6362237, 1.6931441, 1.6362237, 1.6362237,
       1.6931441, 1.6548097, 1.6754997, 1.6754997, 1.6931441, 1.6754997,
       1.6548097, 1.6548097, 1.6548097, 1.6548097, 1.6754997, 1.6548097,
       1.6754997, 1.6362237], dtype=float32)>

In [18]:
delta

<tf.Variable 'boundary_loss/Variable:0' shape=(4, 1) dtype=float32, numpy=
array([[0.6644433],
       [0.6851332],
       [0.7037192],
       [0.6467987]], dtype=float32)>

In [19]:
tr_loss=0
optimizer = tf.keras.optimizers.Adam(learning_rate=0.05)
with tf.GradientTape() as tape:                
    features_batch = log_classifier(t_batch_x, extract_feature=True)
    loss, delta = criterion_boundary(features_batch, centroids, t_batch_y)
tr_loss += loss
gradients = tape.gradient(loss, [delta])


In [20]:
delta = tf.Variable(delta)
delta

<tf.Variable 'Variable:0' shape=(4, 1) dtype=float32, numpy=
array([[0.6644433],
       [0.6851332],
       [0.7037192],
       [0.6467987]], dtype=float32)>

In [21]:
gradients

[<tensorflow.python.framework.indexed_slices.IndexedSlices at 0x2a6b58a7910>]

In [22]:
# optimizer.minimize(loss, delta)

In [23]:
# optimizer.apply_gradients(zip(gradients, delta))
#### AttributeError: 'tensorflow.python.framework.ops.EagerTensor' object has no attribute '_in_graph_mode'

In [26]:
# @tf.function
def check_gradient(t_batch_x, t_batch_y, centroids):
    tr_loss=0
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.05)
    with tf.GradientTape() as tape:                
        features_batch = log_classifier(t_batch_x, extract_feature=True)
        loss, delta = criterion_boundary(features_batch, centroids, t_batch_y)
    tr_loss += loss
    gradients = tape.gradient(loss, [delta])    
    optimizer.apply_gradients(zip(gradients, [delta]))
#     optimizer.minimize(gradients, var_list=[delta])
    return loss, tr_loss

In [27]:
check_gradient(t_batch_x, t_batch_y, centroids)
###AttributeError: 'Tensor' object has no attribute '_in_graph_mode'

(<tf.Tensor: shape=(32,), dtype=float32, numpy=
 array([1.6548097, 1.6548097, 1.6548097, 1.6548097, 1.6931441, 1.6931441,
        1.6548097, 1.6362237, 1.6362237, 1.6548097, 1.6548097, 1.6931441,
        1.6754997, 1.6754997, 1.6362237, 1.6931441, 1.6362237, 1.6362237,
        1.6931441, 1.6548097, 1.6754997, 1.6754997, 1.6931441, 1.6754997,
        1.6548097, 1.6548097, 1.6548097, 1.6548097, 1.6754997, 1.6548097,
        1.6754997, 1.6362237], dtype=float32)>,
 <tf.Tensor: shape=(32,), dtype=float32, numpy=
 array([1.6548097, 1.6548097, 1.6548097, 1.6548097, 1.6931441, 1.6931441,
        1.6548097, 1.6362237, 1.6362237, 1.6548097, 1.6548097, 1.6931441,
        1.6754997, 1.6754997, 1.6362237, 1.6931441, 1.6362237, 1.6362237,
        1.6931441, 1.6548097, 1.6754997, 1.6754997, 1.6931441, 1.6754997,
        1.6548097, 1.6548097, 1.6548097, 1.6548097, 1.6754997, 1.6548097,
        1.6754997, 1.6362237], dtype=float32)>)

In [37]:
oset.train(train_data)

train_loss tf.Tensor(
[1.7957284 1.7940664 1.7944596 1.7933617 1.7945043 1.792864  1.7947391
 1.7926098 1.7953235 1.7940972 1.7948108 1.7937409 1.7959877 1.7926953
 1.7934608 1.7940593 1.795232  1.7926925 1.7925882 1.7935345 1.7931397
 1.7928509 1.7927395 1.790691  1.7934983 1.7929994 1.7948874 1.7970688
 1.793662  1.7941579 1.7943541 1.7943187], shape=(32,), dtype=float32)


In [38]:
oset.delta_points

[<tf.Variable 'boundary_loss_3/Variable:0' shape=(4, 1) dtype=float32, numpy=
 array([[0.7289697 ],
        [0.74553466],
        [0.76566625],
        [0.73381543]], dtype=float32)>]

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
C:\Users\BHUJAY~1\AppData\Local\Temp/ipykernel_24404/2131960519.py in <module>
----> 1 oset.train(train_data)

C:\Users\BHUJAY~1\AppData\Local\Temp/ipykernel_24404/3835535165.py in train(self, data_train, epochs)
     35                 logseq_batch, label_batch = batch
     36                 ## (32, 32, 64), (32, 4)
---> 37                 batch_loss, t_loss = self.train_step(criterion_boundary, 
     38                                                      logseq_batch, label_batch)
     39                 tr_loss += t_loss

~\anaconda3\envs\env3\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
    778       else:
    779         compiler = "nonXla"
--> 780         result = self._call(*args, **kwds)
    781 
    782       new_tracing_count = self._get_tracing_count()

~\anaconda3\envs\env3\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
    821       # This is the first call of __call__, so we have to initialize.
    822       initializers = []
--> 823       self._initialize(args, kwds, add_initializers_to=initializers)
    824     finally:
    825       # At this point we know that the initialization is complete (or less

~\anaconda3\envs\env3\lib\site-packages\tensorflow\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to)
    694     self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph)
    695     self._concrete_stateful_fn = (
--> 696         self._stateful_fn._get_concrete_function_internal_garbage_collected(  # pylint: disable=protected-access
    697             *args, **kwds))
    698 

~\anaconda3\envs\env3\lib\site-packages\tensorflow\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
   2853       args, kwargs = None, None
   2854     with self._lock:
-> 2855       graph_function, _, _ = self._maybe_define_function(args, kwargs)
   2856     return graph_function
   2857 

~\anaconda3\envs\env3\lib\site-packages\tensorflow\python\eager\function.py in _maybe_define_function(self, args, kwargs)
   3211 
   3212       self._function_cache.missed.add(call_context_key)
-> 3213       graph_function = self._create_graph_function(args, kwargs)
   3214       self._function_cache.primary[cache_key] = graph_function
   3215       return graph_function, args, kwargs

~\anaconda3\envs\env3\lib\site-packages\tensorflow\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
   3063     arg_names = base_arg_names + missing_arg_names
   3064     graph_function = ConcreteFunction(
-> 3065         func_graph_module.func_graph_from_py_func(
   3066             self._name,
   3067             self._python_function,

~\anaconda3\envs\env3\lib\site-packages\tensorflow\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
    984         _, original_func = tf_decorator.unwrap(python_func)
    985 
--> 986       func_outputs = python_func(*func_args, **func_kwargs)
    987 
    988       # invariant: `func_outputs` contains only Tensors, CompositeTensors,

~\anaconda3\envs\env3\lib\site-packages\tensorflow\python\eager\def_function.py in wrapped_fn(*args, **kwds)
    598         # __wrapped__ allows AutoGraph to swap in a converted function. We give
    599         # the function a weak reference to itself to avoid a reference cycle.
--> 600         return weak_wrapped_fn().__wrapped__(*args, **kwds)
    601     weak_wrapped_fn = weakref.ref(wrapped_fn)
    602 

~\anaconda3\envs\env3\lib\site-packages\tensorflow\python\eager\function.py in bound_method_wrapper(*args, **kwargs)
   3733     # However, the replacer is still responsible for attaching self properly.
   3734     # TODO(mdan): Is it possible to do it here instead?
-> 3735     return wrapped_fn(*args, **kwargs)
   3736   weak_bound_method_wrapper = weakref.ref(bound_method_wrapper)
   3737 

~\anaconda3\envs\env3\lib\site-packages\tensorflow\python\framework\func_graph.py in wrapper(*args, **kwargs)
    971           except Exception as e:  # pylint:disable=broad-except
    972             if hasattr(e, "ag_error_metadata"):
--> 973               raise e.ag_error_metadata.to_exception(e)
    974             else:
    975               raise

NotImplementedError: in user code:

    C:\Users\BHUJAY~1\AppData\Local\Temp/ipykernel_24404/3835535165.py:50 train_step  *
        loss, self.delta = criterion_boundary(features_batch,
    C:\Users\Bhujay_ROG\anaconda3\envs\env3\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:985 __call__  **
        outputs = call_fn(inputs, *args, **kwargs)
    C:\Users\Bhujay_ROG\MyDev\OCLog\oclog\boundary_loss.py:34 call  **
        logits =  euclidean_metric(features, centroids)
    C:\Users\Bhujay_ROG\MyDev\OCLog\oclog\boundary_loss.py:12 euclidean_metric
        a = np.expand_dims(a, 1)
    <__array_function__ internals>:5 expand_dims
        
    C:\Users\Bhujay_ROG\anaconda3\envs\env3\lib\site-packages\numpy\lib\shape_base.py:591 expand_dims
        a = asanyarray(a)
    C:\Users\Bhujay_ROG\anaconda3\envs\env3\lib\site-packages\numpy\core\_asarray.py:136 asanyarray
        return array(a, dtype, copy=False, order=order, subok=True)
    C:\Users\Bhujay_ROG\anaconda3\envs\env3\lib\site-packages\tensorflow\python\framework\ops.py:845 __array__
        raise NotImplementedError(

    NotImplementedError: Cannot convert a symbolic Tensor (log_classifier/log_seq_encoder/dense/Relu:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=0.1)
var1 = tf.Variable(10.0)
loss = lambda: (var1 ** 2)/2.0       # d(loss)/d(var1) == var1
step_count = opt.minimize(loss, [var1]).numpy()
# The first step is `-learning_rate*sign(grad)`
var1.numpy()
