In [1]:
from bglog import BGLog, get_embedding_layer
import numpy as np
import tensorflow as tf
tf.random.set_seed(123)

In [2]:
bglog = BGLog(save_padded_num_sequences=False, load_from_pkl=True)

In [3]:
train_test = bglog.get_tensor_train_test(ablation=1000)
train_data, test_data = train_test

padded_num_seq_df loaded from data\bgl_padded_num_seq_df.pkl
trained tokenizer, tk, loaded from data\bgltk.pkl
train_0:, 800
test_0:, 200
train_1:, 800
test_1:, 200
train_2:, 800
test_2:, 200
train_3:, 800
test_3:, 102
4 class does not have 800 records, it has only 628 records
test_4:, 0
5 class does not have 800 records, it has only 165 records
5 class does not have 200 records, it has only 165 records
6 class does not have 800 records, it has only 75 records
6 class does not have 200 records, it has only 75 records
[[1. 0. 0. 0.]
 [1. 0. 0. 0.]]
<BatchDataset shapes: ((32, 32, 64), (32, 4)), types: (tf.int32, tf.float32)>
<BatchDataset shapes: ((32, 32, 64), (32, 4)), types: (tf.int32, tf.float32)>


In [4]:
def model(conv1d_set1 = 3, conv1d_set2 = 3, dense_neurons=2048, filters=64,
            kernel_size=3,maxpool_1=True,epochs=25, dense_activation='relu'):
    embedding_weights, vocab_size, char_onehot = get_embedding_layer(bglog)
    B = train_data.element_spec[0].shape[0]
#     inputs = tf.keras.layers.Input(batch_shape=(B, train_data.element_spec[0].shape[1], train_data.element_spec[0].shape[2]), dtype='float64' )
    inputs = tf.keras.layers.Input(shape=(train_data.element_spec[0].shape[1], train_data.element_spec[0].shape[2]), dtype='float64' )
    x = tf.keras.layers.Embedding(input_dim=vocab_size+1,
                                    output_dim=vocab_size,
                                    input_length=train_data.element_spec[0].shape[2],
                                    weights = [embedding_weights],
                                    )(inputs)
    for _ in range(conv1d_set1):
        x = tf.keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding='same')(x)
    if maxpool_1:
        x = tf.keras.layers.MaxPooling2D(pool_size=(1, train_data.element_spec[0].shape[2]))(x)
        x = tf.reshape(x, (B, train_data.element_spec[0].shape[1], filters))        
        for _ in range(conv1d_set2):
            x = tf.keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding='same')(x)
        x = tf.keras.layers.MaxPooling1D(pool_size=(train_data.element_spec[0].shape[1]) )(x)
        x = tf.reshape(x, (B, filters))
    if not maxpool_1:
        x = tf.keras.layers.Flatten()(x)
    if dense_activation is None:
        x = tf.keras.layers.Dense(dense_neurons)(x)
    else:
        x = tf.keras.layers.Dense(dense_neurons, activation=dense_activation)(x)
    outputs = tf.keras.layers.Dense(train_data.element_spec[1].shape[1], activation='softmax')(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    print(model.summary())
    model.compile(optimizer='adam', 
                  loss='categorical_crossentropy',
              metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
    hist = model.fit(train_data, validation_data=test_data, epochs=epochs) 
    return model, hist

In [5]:
# we  feed  xi  to  a dense layer h to get the log-sequence representation zi∈RD:
#     zi= h(xi) =σ(Whxi+bh) ............................(2)
# in our case zi can be obtained from the dense layer before the softmax
# Lets see how to ger it from the train mode

In [6]:
# we pre-train the model with labeled known intent samples. 
# In order to better reflect the effectiveness of the learned decision boundary, 
# we learn the feature representation zi with the simple softmax loss Ls to perform classification:

trained_model, hist = model(epochs=6,)

vocab_size: 50
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 32, 64)]          0         
_________________________________________________________________
embedding (Embedding)        (None, 32, 64, 50)        2550      
_________________________________________________________________
conv1d (Conv1D)              (None, 32, 64, 64)        9664      
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 32, 64, 64)        12352     
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 32, 64, 64)        12352     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 32, 1, 64)         0         
_________________________________________________________________
tf_op_layer_Reshape (TensorF [(32, 32, 

In [7]:
# Learn the decision boundary of each class constraining the known labels within a ball area
# how to get zi and how to know that zi belongs to which yi ?
# from there we will have to calculate the Ck , centroid for the class k

In [8]:
trained_model.layers

[<tensorflow.python.keras.engine.input_layer.InputLayer at 0x1e1b9d7a9a0>,
 <tensorflow.python.keras.layers.embeddings.Embedding at 0x1e1b8238b20>,
 <tensorflow.python.keras.layers.convolutional.Conv1D at 0x1e1b56258e0>,
 <tensorflow.python.keras.layers.convolutional.Conv1D at 0x1e1b56253d0>,
 <tensorflow.python.keras.layers.convolutional.Conv1D at 0x1e1b696d520>,
 <tensorflow.python.keras.layers.pooling.MaxPooling2D at 0x1e1b6973160>,
 <tensorflow.python.keras.engine.base_layer.TensorFlowOpLayer at 0x1e1b86b6e20>,
 <tensorflow.python.keras.layers.convolutional.Conv1D at 0x1e1b86a6790>,
 <tensorflow.python.keras.layers.convolutional.Conv1D at 0x1e1b86b6d30>,
 <tensorflow.python.keras.layers.convolutional.Conv1D at 0x1e1bb679ee0>,
 <tensorflow.python.keras.layers.pooling.MaxPooling1D at 0x1e1bb752ca0>,
 <tensorflow.python.keras.engine.base_layer.TensorFlowOpLayer at 0x1e1bb7668b0>,
 <tensorflow.python.keras.layers.core.Dense at 0x1e1bb756220>,
 <tensorflow.python.keras.layers.core.Dense

In [11]:
dense_6 = trained_model.get_layer(index=(len(trained_model.layers)-1))
print(dense_6)

<tensorflow.python.keras.layers.core.Dense object at 0x000001E1BB76A4C0>


In [12]:
#This is the log sequence embedding from the last layer
# we can treat this as the features from the logs
dense_6.output

<tf.Tensor 'dense_1/Softmax:0' shape=(32, 4) dtype=float32>

In [13]:
# Then, we use the pre-trained model to extract intent features for 
# learning the decision boundary

In [14]:
class LogLineEncoder(tf.keras.Model):
    def __init__(self, num_of_conv1d=3,  
                 filters=64,
                 kernel_size=3, ):
        super().__init__()            
        self.num_of_conv1d = num_of_conv1d       
        self.filters = filters
        self.kernel_size = kernel_size           
        self.embedding_weights, self.vocab_size, self.char_onehot = get_embedding_layer(bglog)       
        
        self.embedding = tf.keras.layers.Embedding(input_dim=self.vocab_size+1,
                                    output_dim=self.vocab_size,
                                    input_length=train_data.element_spec[0].shape[2],
                                    weights = [self.embedding_weights],
                                    )
        self.conv1d_layers = [tf.keras.layers.Conv1D(filters=filters, 
                                                kernel_size=kernel_size, 
                                                padding='same')  
                       for _ in range(self.num_of_conv1d)]
        self.maxpool2d = tf.keras.layers.MaxPooling2D(
            pool_size=(1, train_data.element_spec[0].shape[2]))
                  
        
    def call(self, inputs):
        x = self.embedding(inputs)
        for conv1d_layer in self.conv1d_layers:
            x = conv1d_layer(x)
        x = self.maxpool2d(x)
        x = tf.reshape(x, (inputs.shape[0], inputs.shape[1], self.filters))
        return x
    
    

# 
line_encoder =   LogLineEncoder()
# the model doesn't have a state unless it is called at least once
# in order to initialize the model we need a sample data 
sample_train_data = next(iter(train_data))
sample_x_train = sample_train_data[0]
print('sample_x_train.shape:', sample_x_train.shape)
# now we will initialize the model with the sample data
loglineEmbedding = line_encoder(sample_x_train)
print('loglineEmbedding.shape:', loglineEmbedding.shape)
# Now the model have a state and can be inspected        
line_encoder.summary()

vocab_size: 50
sample_x_train.shape: (32, 32, 64)
loglineEmbedding.shape: (32, 32, 64)
Model: "log_line_encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      multiple                  2550      
_________________________________________________________________
conv1d_6 (Conv1D)            multiple                  9664      
_________________________________________________________________
conv1d_7 (Conv1D)            multiple                  12352     
_________________________________________________________________
conv1d_8 (Conv1D)            multiple                  12352     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 multiple                  0         
Total params: 36,918
Trainable params: 36,918
Non-trainable params: 0
_________________________________________________________________


LOG SEQUENCE EMBEDDING TAKES LOGLINE EMBEDDING AS INPUT

In [15]:
class LogSeqEncoder(tf.keras.Model):
    
    def __init__(self, num_of_conv1d=3,  filters=64,
                 kernel_size=3, maxpool_1=True,
                 dense_neurons=2048, dense_activation='relu',):
        super().__init__()
        self.num_of_conv1d = num_of_conv1d
        self.dense_neurons = dense_neurons
        self.filters = filters
        self.kernel_size = kernel_size
        self.maxpool_1 = maxpool_1
        self.dense_activation = dense_activation
        self.conv1d_layers = [tf.keras.layers.Conv1D(filters=filters, 
                                                kernel_size=kernel_size, 
                                                padding='same')  
                       for _ in range(self.num_of_conv1d)]
        self.maxpool1d = tf.keras.layers.MaxPooling1D(pool_size=(train_data.element_spec[0].shape[1]) )
        
        self.Dense = tf.keras.layers.Dense(self.dense_neurons, 
                                           activation=self.dense_activation)
       
        
    def call(self, inputs):       
        for conv1d_layer in self.conv1d_layers:
            x = conv1d_layer(inputs)
        x = self.maxpool1d(x)        
        x = tf.reshape(x, (inputs.shape[0], self.filters))
        x = self.Dense(x)
        return x
    
    

logSeqencer =   LogSeqEncoder()
# the model doesn't have a state unless it is called at least once
logSeqEmbedding = logSeqencer(loglineEmbedding)
print('logSeqEmbedding.shape:', logSeqEmbedding.shape)
# Now the model have a state and can be inspected        
logSeqencer.summary()

logSeqEmbedding.shape: (32, 2048)
Model: "log_seq_encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_9 (Conv1D)            multiple                  12352     
_________________________________________________________________
conv1d_10 (Conv1D)           multiple                  12352     
_________________________________________________________________
conv1d_11 (Conv1D)           multiple                  12352     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 multiple                  0         
_________________________________________________________________
dense_2 (Dense)              multiple                  133120    
Total params: 170,176
Trainable params: 170,176
Non-trainable params: 0
_________________________________________________________________


In [16]:
# sample_x_train

In [17]:
class LogClassifier(tf.keras.Model):
    
    def __init__(self,  **kwargs):
        super().__init__(**kwargs)
        self.log_line_encoder = LogLineEncoder()
        self.log_seq_encoder = LogSeqEncoder()
        self.classifier = tf.keras.layers.Dense(
            train_data.element_spec[1].shape[1], activation='softmax')
#         self.extract_feature = extract_feature
    
    def call(self, inputs, extract_feature=False,):
#         x_data, y_data = inputs
        x = self.log_line_encoder(inputs)
        seq_embedding = self.log_seq_encoder(x)
        
        if  extract_feature:
            output = seq_embedding
        else:
            output = self.classifier(seq_embedding)
        return output
    
log_classifier = LogClassifier()
log_classifier(sample_x_train)        

vocab_size: 50


<tf.Tensor: shape=(32, 4), dtype=float32, numpy=
array([[0.26113856, 0.23957637, 0.23371567, 0.26556948],
       [0.2631906 , 0.24449475, 0.23290029, 0.2594144 ],
       [0.25915954, 0.24080718, 0.23420203, 0.26583126],
       [0.2619556 , 0.23947345, 0.23363088, 0.26494014],
       [0.26257977, 0.23949468, 0.23272446, 0.2652011 ],
       [0.2553202 , 0.24411115, 0.23737098, 0.26319772],
       [0.2569148 , 0.24322458, 0.23629706, 0.2635636 ],
       [0.2560517 , 0.24487218, 0.23602891, 0.26304716],
       [0.2562619 , 0.2428931 , 0.23775093, 0.26309413],
       [0.2605076 , 0.2394465 , 0.23187067, 0.26817518],
       [0.25717095, 0.24478468, 0.23633243, 0.26171196],
       [0.25814587, 0.23968872, 0.23689345, 0.265272  ],
       [0.2619062 , 0.23991217, 0.23196454, 0.26621705],
       [0.26105738, 0.23990466, 0.23403397, 0.265004  ],
       [0.25848752, 0.24282423, 0.23646986, 0.26221842],
       [0.2582312 , 0.24684826, 0.23387077, 0.26104978],
       [0.25581473, 0.24250774, 0.23691

In [18]:
# the classifier assigned low probability to all the classes since it is untrained
# TODO: the mode should accept a single sequence. At present it is accepting only a batch

In [19]:
log_classifier.summary()

Model: "log_classifier"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
log_line_encoder_1 (LogLineE multiple                  36918     
_________________________________________________________________
log_seq_encoder_1 (LogSeqEnc multiple                  170176    
_________________________________________________________________
dense_4 (Dense)              multiple                  8196      
Total params: 215,290
Trainable params: 215,290
Non-trainable params: 0
_________________________________________________________________


In [20]:
# This is to check that the model's built in  complile and fit is working well
log_classifier.compile(optimizer='adam', 
                  loss='categorical_crossentropy',
              metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
hist = log_classifier.fit(train_data, validation_data=test_data, epochs=1) 

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


In [21]:
# now after the training the predeicitoin will show higher probability to the 
# a class and lesser probability to other classes
log_classifier(sample_x_train)      

<tf.Tensor: shape=(32, 4), dtype=float32, numpy=
array([[5.53421309e-09, 8.48451862e-07, 9.70203022e-04, 9.99028921e-01],
       [9.99435246e-01, 3.57963378e-04, 4.20965916e-05, 1.64618628e-04],
       [6.08524076e-08, 5.88605508e-06, 7.43910015e-01, 2.56084055e-01],
       [6.88876645e-09, 9.39416225e-08, 9.99778092e-01, 2.21787588e-04],
       [2.12752682e-09, 3.25149074e-07, 9.09067807e-04, 9.99090552e-01],
       [9.99621630e-01, 3.66138003e-04, 8.92610842e-07, 1.13616907e-05],
       [9.99601185e-01, 3.84375773e-04, 1.03211005e-06, 1.33183703e-05],
       [9.99722898e-01, 2.67809752e-04, 8.42018721e-07, 8.45816248e-06],
       [9.09860391e-05, 9.99366343e-01, 1.97362624e-06, 5.40674664e-04],
       [4.88574203e-09, 7.60670616e-07, 7.89354148e-04, 9.99209881e-01],
       [1.40054663e-05, 9.99514461e-01, 1.35646701e-06, 4.70206782e-04],
       [1.12684091e-07, 8.25212453e-07, 9.96584654e-01, 3.41432937e-03],
       [7.65383703e-08, 1.86544185e-05, 4.90979925e-02, 9.50883329e-01],
  

In [22]:
features = log_classifier(sample_x_train, extract_feature=True ) 
features

<tf.Tensor: shape=(32, 2048), dtype=float32, numpy=
array([[0.        , 0.        , 0.382386  , ..., 0.        , 0.        ,
        0.65225273],
       [0.55785555, 0.54774433, 0.        , ..., 0.0465716 , 0.        ,
        0.1510333 ],
       [0.        , 0.        , 0.5416764 , ..., 0.        , 0.        ,
        0.42481586],
       ...,
       [0.        , 0.        , 0.45767915, ..., 0.        , 0.        ,
        0.13653365],
       [0.5917342 , 0.55652755, 0.05647261, ..., 0.04156694, 0.        ,
        0.07663806],
       [0.6023471 , 0.5572019 , 0.04888015, ..., 0.05260737, 0.        ,
        0.06535499]], dtype=float32)>

In [23]:
np.zeros((2, 2))

array([[0., 0.],
       [0., 0.]])

In [27]:
for batch in train_data:
    x_train, y_train = batch
    print(x_train)
    print(y_train)
    break

tf.Tensor(
[[[12  3  6 ... 12  6 18]
  [12  3  6 ... 12  6 18]
  [12  3  6 ... 12  6 18]
  ...
  [23 22 17 ...  8  4 14]
  [12  3  6 ... 12  6 18]
  [ 4 23 22 ...  9  0  0]]

 [[18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]
  ...
  [18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]]

 [[12  3  6 ... 12  6 18]
  [ 4 23 25 ...  9  0  0]
  [23 25 17 ...  8  4 14]
  ...
  [23 25 17 ...  8  4 14]
  [12  3  6 ... 12  6 18]
  [ 4 23 25 ...  9  0  0]]

 ...

 [[ 7 17  8 ... 11 11 11]
  [13  6 20 ... 11 11 11]
  [13  6 20 ... 11 11 11]
  ...
  [13  6 20 ... 11 11 11]
  [ 7 17  8 ... 11 11 11]
  [13  6 20 ... 11 11 11]]

 [[10  5 14 ...  8  2 21]
  [10  5 14 ... 22 13 21]
  [10  5 14 ... 21  2 13]
  ...
  [10  5 14 ... 21 10  8]
  [10  5 14 ... 11 13 21]
  [10  5 14 ... 26 14 10]]

 [[18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]
  ...
  [18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]]]

In [25]:
centroids = np.zeros((train_data.element_spec[1].shape[1],   2048))
print('centriods initialized:', centroids)
total_labels = np.zeros(4)
# total_labels[2] += 1
# total_labels[2] += 1
print('total_labels initialized:', total_labels)
for batch in train_data:
    logseq_batch, label_batch = batch
    features = log_classifier(logseq_batch, extract_feature=True )
    for i in range(len(label_batch)):
        label = label_batch[i]
        numeric_label = np.argmax(label)
        total_labels[numeric_label] += 1
        centroids[numeric_label] += features[i]
total_label_reshaped = np.reshape(total_labels, (4, 1))
centroids /= total_label_reshaped
print('centroids:',centroids)
print('total_labels:',total_label_reshaped)


centriods initialized: [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
total_labels initialized: [0. 0. 0. 0.]
centroids: [[6.44073868e-01 6.38870468e-01 3.86534524e-02 ... 4.16926289e-02
  0.00000000e+00 1.07653599e-01]
 [1.55825443e-01 2.91257706e-01 3.78368225e-01 ... 4.51057293e-04
  0.00000000e+00 5.16504517e-01]
 [1.96967032e-04 2.03963406e-04 5.83768806e-01 ... 0.00000000e+00
  0.00000000e+00 2.77634716e-01]
 [3.52268159e-03 2.94239372e-03 4.22926636e-01 ... 0.00000000e+00
  0.00000000e+00 6.28918800e-01]]
total_labels: [[800.]
 [800.]
 [800.]
 [800.]]


In [28]:
# to understand the np divide operation
n1 = np.arange(12)
n2 = n1.reshape((3, 4))
print('n2', n2)
n3 = np.array([2, 2, 2])
print('n3', n3)
n4 = np.reshape(n3, (3, 1))
print('n4', n4)
n2/n4


n2 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
n3 [2 2 2]
n4 [[2]
 [2]
 [2]]


array([[0. , 0.5, 1. , 1.5],
       [2. , 2.5, 3. , 3.5],
       [4. , 4.5, 5. , 5.5]])

In [36]:
#take zi and a ck 
# from sample_x_train the first sample belongs to class 3
print('sample_x_train[0]', sample_x_train[0])
sample_y_train = sample_train_data[1]
print('sample_y_train[0]', sample_y_train[0])
print('feature for the same:', features[0])
print('centroid for the class 3 :', centroids[3])

sample_x_train[0] tf.Tensor(
[[ 4 30 25 ...  9  0  0]
 [30 25 17 ...  8  4 14]
 [12  3  6 ... 12  6 18]
 ...
 [30 25 17 ...  8  4 14]
 [12  3  6 ... 12  6 18]
 [ 4 30 25 ...  9  0  0]], shape=(32, 64), dtype=int32)
sample_y_train[0] tf.Tensor([0. 0. 0. 1.], shape=(4,), dtype=float32)
feature for the same: tf.Tensor([0.60991514 0.55805254 0.06325469 ... 0.05174574 0.         0.06664393], shape=(2048,), dtype=float32)
centroid for the class 3 : [0.00352268 0.00294239 0.42292664 ... 0.         0.         0.6289188 ]


In [42]:
# eucladian distance
z_0_3= features[0] # [2048]
C_3 = centroids[3] # [2048]
ED = np.sum(np.square(z_0_3 - C_3 ))
print('eucladian distance:', ED)
# InvalidArgumentError: Incompatible shapes: [32,64] vs. [2048] [Op:Sub]


eucladian distance: 212.51135


In [None]:
# understand this , a= features(batch_size, 2048) , b = centroids (4, 2048)
def euclidean_metric_torch(a, b):
    n = a.shape[0]
    m = b.shape[0]
    a = a.unsqueeze(1).expand(n, m, -1)
    b = b.unsqueeze(0).expand(n, m, -1)
    logits = -((a - b)**2).sum(dim=2)
    return logits

In [81]:
import numpy as np
import tensorflow as tf
a = np.arange(6)
a = a.reshape((2, -1))
print('a:', a)
print('a.shape', a.shape)
b = np.arange(8, 16)
print('b',b)
b = np.reshape(b, (4, -1))
print('b',b)
print('b.shape:',b.shape)
tfa = tf.constant(a)
tfb = tf.constant(b)
print('tfa',tfa)
print('tfb',tfb)
# n = tfa.shape[0]
# m = b.shape[0]


a: [[0 1 2]
 [3 4 5]]
a.shape (2, 3)
b [ 8  9 10 11 12 13 14 15]
b [[ 8  9]
 [10 11]
 [12 13]
 [14 15]]
b.shape: (4, 2)
tfa tf.Tensor(
[[0 1 2]
 [3 4 5]], shape=(2, 3), dtype=int32)
tfb tf.Tensor(
[[ 8  9]
 [10 11]
 [12 13]
 [14 15]], shape=(4, 2), dtype=int32)


In [78]:
print('tf.expand_dims(tfa, 0) :',tf.expand_dims(tfa, 0))
print()
print('tf.expand_dims(tfa, 1) :',tf.expand_dims(tfa, 1))
print()
print('tf.expand_dims(tfa, 1) :',tf.expand_dims(tfa, -1))

tf.expand_dims(tfa, 0) : tf.Tensor(
[[[0 1 2]
  [3 4 5]]], shape=(1, 2, 3), dtype=int32)

tf.expand_dims(tfa, 1) : tf.Tensor(
[[[0 1 2]]

 [[3 4 5]]], shape=(2, 1, 3), dtype=int32)

tf.expand_dims(tfa, 1) : tf.Tensor(
[[[0]
  [1]
  [2]]

 [[3]
  [4]
  [5]]], shape=(2, 3, 1), dtype=int32)


In [82]:
tfa = tf.expand_dims(tfa, 1)
print(f'tf.shape(tfa): {tf.shape(tfa)}')
tfb = tf.expand_dims(tfb, 0)
print(f'tf.shape(tfb): {tf.shape(tfb)}')

tf.shape(tfa): [2 1 3]
tf.shape(tfb): [1 4 2]


In [86]:
n = a.shape[0]
m = b.shape[0]
tfa_broadcast = tf.broadcast_to(tfa, [2, 4, 3])
tf.shape(tfa_broadcast)
print('tfa_broadcast',tfa_broadcast)

tfa_broadcast tf.Tensor(
[[[0 1 2]
  [0 1 2]
  [0 1 2]
  [0 1 2]]

 [[3 4 5]
  [3 4 5]
  [3 4 5]
  [3 4 5]]], shape=(2, 4, 3), dtype=int32)


In [None]:
# 6

# The equivalent function for pytorch expand is tensorflow tf.broadcast_to

# Docs: https://www.tensorflow.org/api_docs/python/tf/broadcast_to

# Share
# Follow
# edited Oct 23, 2021 at 18:22

# M.Innat
# 12.2k66 gold badges3434 silver badges6767 bronze badges
# answered Jan 4, 2019 at 9:12

# funkyyyyyy
# 6111 silver badge22 bronze badges
# Add a comment

# 0

# Tensorflow automatically broadcasts, so in general you don't need to do any of this. Suppose you have a y' of shape 6x2x3 and your x is of shape 2x3, then you can already do y'*x or y'+x will already behave as if you had expanded it. But if for some other reason you really need to do it, then the command in tensorflow is tile:

# y = tf.tile(tf.reshape(x, (1,2,3)), multiples=(6,1,1))
# Docs: https://www.tensorflow.org/api_docs/python/tf/tile

In [None]:
def euclidean_metric(a, b):
    n = a.shape[0]
    m = b.shape[0]
    a = tf.expand_dims(a, 1)
    b = tf.expand_dims(b, 0)
    logits = -((a - b)**2).sum(dim=2)
    return logits  

In [87]:
class OpenSet:
    def __init__(self, data, pretrained_model):
        
        self.model = pretrained_model
        self.best_eval_score = 0
        self.delta = None
        self.delta_points = []
        self.centroids = None
        self.test_results = None
        self.predictions = None
        self.true_labels = None
        
    def centroids_cal(self):
        centriods = np.zeros(train_data.element_spec[1].shape[1], embedding_size)
        total_labels = np.empty(0, dtype=longdouble)
        


In [None]:
# Customizing the training step to get centroid for each class
class OpenSet:
    def __init__(self, data, pretrained_model=log_classifier):
#         super().__init__():
        self.model = pretrained_model        
        self.centroids = None
        self.num_labels = train_data.element_spec[1].shape[1]
        
    def centroids_cal(self):
        centriods = np.zeros(self.num_labels, embedding_size)
        total_labels = np.empty(0, dtype=longdouble)
        for batch in data:
            logseq_batch, label_batch = batch
            features = self.model(logseq_batch, extract_feature=True ) 
            

In [None]:
# In context of deep learning the logits layer means the layer that feeds in to softmax (or other such normalization). The output of the softmax are the probabilities for the classification task and its input is logits layer. The logits layer typically produces values from -infinity to +infinity and the softmax layer transforms it to values from 0 to 1.

# Historical Context

# Where does this term comes from? In 1930s and 40s, several people were trying to adapt linear regression to the problem of predicting probabilities. However linear regression produces output from -infinity to +infinity while for probabilities our desired output is 0 to 1. One way to do this is by somehow mapping the probabilities 0 to 1 to -infinity to +infinity and then use linear regression as usual. One such mapping is cumulative normal distribution that was used by Chester Ittner Bliss in 1934 and he called this "probit" model, short for "probability unit". However this function is computationally expensive while lacking some of the desirable properties for multi-class classification. In 1944 Joseph Berkson used the function log(p/(1-p)) to do this mapping and called it logit, short for "logistic unit". The term logistic regression derived from this as well.

# The Confusion

# Unfortunately the term logits is abused in deep learning. From pure mathematical perspective logit is a function that performs above mapping. In deep learning people started calling the layer "logits layer" that feeds in to logit function. Then people started calling the output values of this layer "logit" creating the confusion with logit the function.