In [1]:
from bglog import BGLog, get_embedding_layer
import numpy as np
import tensorflow as tf
tf.random.set_seed(123)

In [2]:
bglog = BGLog(save_padded_num_sequences=False, load_from_pkl=True)

In [3]:
train_test = bglog.get_tensor_train_test(ablation=1000)
train_data, test_data = train_test

padded_num_seq_df loaded from data\bgl_padded_num_seq_df.pkl
trained tokenizer, tk, loaded from data\bgltk.pkl
train_0:, 800
test_0:, 200
train_1:, 800
test_1:, 200
train_2:, 800
test_2:, 200
train_3:, 800
test_3:, 102
4 class does not have 800 records, it has only 628 records
test_4:, 0
5 class does not have 800 records, it has only 165 records
5 class does not have 200 records, it has only 165 records
6 class does not have 800 records, it has only 75 records
6 class does not have 200 records, it has only 75 records
[[1. 0. 0. 0.]
 [1. 0. 0. 0.]]
<BatchDataset shapes: ((32, 32, 64), (32, 4)), types: (tf.int32, tf.float32)>
<BatchDataset shapes: ((32, 32, 64), (32, 4)), types: (tf.int32, tf.float32)>


In [21]:
def model(conv1d_set1 = 3, conv1d_set2 = 3, dense_neurons=2048, filters=64,
            kernel_size=3,maxpool_1=True,epochs=25, dense_activation='relu'):
    embedding_weights, vocab_size, char_onehot = get_embedding_layer(bglog)
    B = train_data.element_spec[0].shape[0]
#     inputs = tf.keras.layers.Input(batch_shape=(B, train_data.element_spec[0].shape[1], train_data.element_spec[0].shape[2]), dtype='float64' )
    inputs = tf.keras.layers.Input(shape=(train_data.element_spec[0].shape[1], train_data.element_spec[0].shape[2]), dtype='float64' )
    x = tf.keras.layers.Embedding(input_dim=vocab_size+1,
                                    output_dim=vocab_size,
                                    input_length=train_data.element_spec[0].shape[2],
                                    weights = [embedding_weights],
                                    )(inputs)
    for _ in range(conv1d_set1):
        x = tf.keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding='same')(x)
    if maxpool_1:
        x = tf.keras.layers.MaxPooling2D(pool_size=(1, train_data.element_spec[0].shape[2]))(x)
        x = tf.reshape(x, (B, train_data.element_spec[0].shape[1], filters))        
        for _ in range(conv1d_set2):
            x = tf.keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding='same')(x)
        x = tf.keras.layers.MaxPooling1D(pool_size=(train_data.element_spec[0].shape[1]) )(x)
        x = tf.reshape(x, (B, filters))
    if not maxpool_1:
        x = tf.keras.layers.Flatten()(x)
    if dense_activation is None:
        x = tf.keras.layers.Dense(dense_neurons)(x)
    else:
        x = tf.keras.layers.Dense(dense_neurons, activation=dense_activation)(x)
    outputs = tf.keras.layers.Dense(train_data.element_spec[1].shape[1], activation='softmax')(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    print(model.summary())
    model.compile(optimizer='adam', 
                  loss='categorical_crossentropy',
              metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
    hist = model.fit(train_data, validation_data=test_data, epochs=epochs) 
    return model, hist

In [None]:
# we  feed  xi  to  a dense layer h to get the log-sequence representation zi∈RD:
#     zi= h(xi) =σ(Whxi+bh) ............................(2)
# in our case zi can be obtained from the dense layer before the softmax
# Lets see how to ger it from the train mode

In [22]:
# we pre-train the model with labeled known intent samples. 
# In order to better reflect the effectiveness of the learned decision boundary, 
# we learn the feature representation zi with the simple softmax loss Ls to perform classification:

trained_model, hist = model(epochs=6,)

vocab_size: 50
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         [(None, 32, 64)]          0         
_________________________________________________________________
embedding_7 (Embedding)      (None, 32, 64, 50)        2550      
_________________________________________________________________
conv1d_21 (Conv1D)           (None, 32, 64, 64)        9664      
_________________________________________________________________
conv1d_22 (Conv1D)           (None, 32, 64, 64)        12352     
_________________________________________________________________
conv1d_23 (Conv1D)           (None, 32, 64, 64)        12352     
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 32, 1, 64)         0         
_________________________________________________________________
tf_op_layer_Reshape (TensorF [(32, 32, 

In [None]:
# Learn the decision boundary of each class constraining the known labels within a ball area
# how to get zi and how to know that zi belongs to which yi ?
# from there we will have to calculate the Ck , centroid for the class k

In [13]:
trained_model.layers

[<tensorflow.python.keras.engine.input_layer.InputLayer at 0x1b2cb5147c0>,
 <tensorflow.python.keras.layers.embeddings.Embedding at 0x1b2d74eb850>,
 <tensorflow.python.keras.layers.convolutional.Conv1D at 0x1b2cb50bb20>,
 <tensorflow.python.keras.layers.convolutional.Conv1D at 0x1b2d74ebfa0>,
 <tensorflow.python.keras.layers.convolutional.Conv1D at 0x1b2d74dacd0>,
 <tensorflow.python.keras.layers.pooling.MaxPooling2D at 0x1b2d74b9460>,
 <tensorflow.python.keras.engine.base_layer.TensorFlowOpLayer at 0x1b2b10bc790>,
 <tensorflow.python.keras.layers.convolutional.Conv1D at 0x1b2d74ee160>,
 <tensorflow.python.keras.layers.convolutional.Conv1D at 0x1b2b10bc400>,
 <tensorflow.python.keras.layers.convolutional.Conv1D at 0x1b2b10c2e80>,
 <tensorflow.python.keras.layers.pooling.MaxPooling1D at 0x1b2b10c7580>,
 <tensorflow.python.keras.engine.base_layer.TensorFlowOpLayer at 0x1b2b10dcd00>,
 <tensorflow.python.keras.layers.core.Dense at 0x1b2b10d7c10>,
 <tensorflow.python.keras.layers.core.Dense

In [19]:
dense_6 = trained_model.get_layer('dense_6')
print(dense_6)

<tensorflow.python.keras.layers.core.Dense object at 0x000001B2B10D7C10>


In [20]:
#This is the log sequence embedding from the last layer
# we can treat this as the features from the logs
dense_6.output

<tf.Tensor 'dense_6/Relu:0' shape=(32, 2048) dtype=float32>

In [None]:
# Then, we use the pre-trained model to extract intent features for 
# learning the decision boundary

In [108]:
class LogLineEncoder(tf.keras.Model):
    def __init__(self, num_of_conv1d=3,  
                 filters=64,
                 kernel_size=3, ):
        super().__init__()            
        self.num_of_conv1d = num_of_conv1d       
        self.filters = filters
        self.kernel_size = kernel_size           
        self.embedding_weights, self.vocab_size, self.char_onehot = get_embedding_layer(bglog)       
        
        self.embedding = tf.keras.layers.Embedding(input_dim=self.vocab_size+1,
                                    output_dim=self.vocab_size,
                                    input_length=train_data.element_spec[0].shape[2],
                                    weights = [self.embedding_weights],
                                    )
        self.conv1d_layers = [tf.keras.layers.Conv1D(filters=filters, 
                                                kernel_size=kernel_size, 
                                                padding='same')  
                       for _ in range(self.num_of_conv1d)]
        self.maxpool2d = tf.keras.layers.MaxPooling2D(
            pool_size=(1, train_data.element_spec[0].shape[2]))
                  
        
    def call(self, inputs):
        x = self.embedding(inputs)
        for conv1d_layer in self.conv1d_layers:
            x = conv1d_layer(x)
        x = self.maxpool2d(x)
        x = tf.reshape(x, (inputs.shape[0], inputs.shape[1], self.filters))
        return x
    
    

# 
line_encoder =   LogLineEncoder()
# the model doesn't have a state unless it is called at least once
# in order to initialize the model we need a sample data 
sample_train_data = next(iter(train_data))
sample_x_train = sample_train_data[0]
print('sample_x_train.shape:', sample_x_train.shape)
# now we will initialize the model with the sample data
loglineEmbedding = line_encoder(sample_x_train)
print('loglineEmbedding.shape:', loglineEmbedding.shape)
# Now the model have a state and can be inspected        
line_encoder.summary()

sample_x_train.shape: (32, 32, 64)
vocab_size: 50
loglineEmbedding.shape: (32, 32, 64)
Model: "log_line_encoder_47"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_46 (Embedding)     multiple                  2550      
_________________________________________________________________
conv1d_183 (Conv1D)          multiple                  9664      
_________________________________________________________________
conv1d_184 (Conv1D)          multiple                  12352     
_________________________________________________________________
conv1d_185 (Conv1D)          multiple                  12352     
_________________________________________________________________
max_pooling2d_46 (MaxPooling multiple                  0         
Total params: 36,918
Trainable params: 36,918
Non-trainable params: 0
_________________________________________________________________


LOG SEQUENCE EMBEDDING TAKES LOGLINE EMBEDDING AS INPUT

In [106]:
class LogSeqEncoder(tf.keras.Model):
    
    def __init__(self, num_of_conv1d=3,  filters=64,
                 kernel_size=3, maxpool_1=True,
                 dense_neurons=2048, dense_activation='relu',):
        super().__init__()
        self.num_of_conv1d = num_of_conv1d
        self.dense_neurons = dense_neurons
        self.filters = filters
        self.kernel_size = kernel_size
        self.maxpool_1 = maxpool_1
        self.dense_activation = dense_activation
        self.conv1d_layers = [tf.keras.layers.Conv1D(filters=filters, 
                                                kernel_size=kernel_size, 
                                                padding='same')  
                       for _ in range(self.num_of_conv1d)]
        self.maxpool1d = tf.keras.layers.MaxPooling1D(pool_size=(train_data.element_spec[0].shape[1]) )
        
        self.Dense = tf.keras.layers.Dense(self.dense_neurons, 
                                           activation=self.dense_activation)
       
        
    def call(self, inputs):       
        for conv1d_layer in self.conv1d_layers:
            x = conv1d_layer(inputs)
        x = self.maxpool1d(x)        
        x = tf.reshape(x, (inputs.shape[0], self.filters))
        x = self.Dense(x)
        return x
    
    

logSeqencer =   LogSeqEncoder()
# the model doesn't have a state unless it is called at least once
logSeqEmbedding = logSeqencer(loglineEmbedding)
print('logSeqEmbedding.shape:', logSeqEmbedding.shape)
# Now the model have a state and can be inspected        
logSeqencer.summary()

logSeqEmbedding.shape: (32, 2048)
Model: "log_seq_encoder_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_180 (Conv1D)          multiple                  12352     
_________________________________________________________________
conv1d_181 (Conv1D)          multiple                  12352     
_________________________________________________________________
conv1d_182 (Conv1D)          multiple                  12352     
_________________________________________________________________
max_pooling1d_14 (MaxPooling multiple                  0         
_________________________________________________________________
dense_46 (Dense)             multiple                  133120    
Total params: 170,176
Trainable params: 170,176
Non-trainable params: 0
_________________________________________________________________


In [116]:
# sample_x_train

In [168]:
class LogClassifier(tf.keras.Model):
    
    def __init__(self,  **kwargs):
        super().__init__(**kwargs)
        self.log_line_encoder = LogLineEncoder()
        self.log_seq_encoder = LogSeqEncoder()
        self.classifier = tf.keras.layers.Dense(
            train_data.element_spec[1].shape[1], activation='softmax')
#         self.extract_feature = extract_feature
    
    def call(self, inputs, extract_feature=False,):
#         x_data, y_data = inputs
        x = self.log_line_encoder(inputs)
        seq_embedding = self.log_seq_encoder(x)
        
        if  extract_feature:
            output = seq_embedding
        else:
            output = self.classifier(seq_embedding)
        return output
    
log_classifier = LogClassifier()
log_classifier(sample_x_train)        

vocab_size: 50


<tf.Tensor: shape=(32, 4), dtype=float32, numpy=
array([[0.26594797, 0.23993689, 0.24374838, 0.25036687],
       [0.2712713 , 0.23759408, 0.2424112 , 0.24872339],
       [0.27049428, 0.23759227, 0.24442764, 0.24748583],
       [0.26594797, 0.23993689, 0.24374838, 0.25036687],
       [0.27129176, 0.23026243, 0.24704917, 0.25139663],
       [0.27271685, 0.23522092, 0.24514192, 0.24692021],
       [0.27256274, 0.23706135, 0.24357268, 0.24680322],
       [0.2712713 , 0.23759408, 0.2424112 , 0.24872339],
       [0.26936093, 0.23150212, 0.24697001, 0.25216702],
       [0.27184606, 0.23702982, 0.24413759, 0.24698654],
       [0.26934317, 0.2346282 , 0.24737462, 0.248654  ],
       [0.2686097 , 0.23688017, 0.24340446, 0.25110564],
       [0.26978517, 0.2378512 , 0.24382828, 0.2485354 ],
       [0.26897094, 0.23215264, 0.24991204, 0.24896437],
       [0.2728863 , 0.2351903 , 0.24460365, 0.24731976],
       [0.27251709, 0.23248255, 0.24827993, 0.24672048],
       [0.26945013, 0.23541771, 0.24713

In [None]:
# the classifier assigned low probability to all the classes since it is untrained
# TODO: the mode should accept a single sequence. At present it is accepting only a batch

In [169]:
log_classifier.summary()

Model: "log_classifier_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
log_line_encoder_58 (LogLine multiple                  36918     
_________________________________________________________________
log_seq_encoder_19 (LogSeqEn multiple                  170176    
_________________________________________________________________
dense_68 (Dense)             multiple                  8196      
Total params: 215,290
Trainable params: 215,290
Non-trainable params: 0
_________________________________________________________________


In [170]:
# This is to check that the model's built in  complile and fit is working well
log_classifier.compile(optimizer='adam', 
                  loss='categorical_crossentropy',
              metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
hist = log_classifier.fit(train_data, validation_data=test_data, epochs=1) 



In [171]:
# now after the training the predeicitoin will show higher probability to the 
# a class and lesser probability to other classes
log_classifier(sample_x_train)      

<tf.Tensor: shape=(32, 4), dtype=float32, numpy=
array([[5.9559608e-07, 9.9999511e-01, 1.0115820e-06, 3.3967465e-06],
       [3.3315580e-06, 5.3150663e-05, 9.9895394e-01, 9.8956109e-04],
       [9.9981517e-01, 1.8341266e-04, 1.7208157e-07, 1.2867113e-06],
       [5.9559608e-07, 9.9999511e-01, 1.0115820e-06, 3.3967465e-06],
       [1.5486393e-08, 1.3062559e-06, 2.7559273e-02, 9.7243935e-01],
       [9.9982077e-01, 1.7789275e-04, 1.7640629e-07, 1.2314774e-06],
       [9.9979359e-01, 2.0472099e-04, 1.9793011e-07, 1.4540375e-06],
       [3.3315580e-06, 5.3150663e-05, 9.9895394e-01, 9.8956109e-04],
       [6.0969441e-10, 4.6309253e-08, 7.2057842e-04, 9.9927944e-01],
       [1.0308991e-03, 9.9745423e-01, 5.1951088e-04, 9.9537824e-04],
       [3.2217307e-09, 7.8519768e-08, 1.3213953e-03, 9.9867851e-01],
       [7.8803737e-04, 9.9436545e-01, 8.2244060e-04, 4.0241582e-03],
       [8.7072927e-08, 1.5266496e-05, 9.9929976e-01, 6.8481930e-04],
       [1.0226398e-09, 7.3874865e-08, 1.5879484e-03, 9

In [173]:
features = log_classifier(sample_x_train, extract_feature=True ) 
features

<tf.Tensor: shape=(32, 2048), dtype=float32, numpy=
array([[0.        , 0.        , 0.4174264 , ..., 0.        , 0.        ,
        0.2542565 ],
       [0.01834698, 0.09165653, 0.        , ..., 0.        , 0.03006388,
        0.        ],
       [0.        , 0.13645807, 0.07725984, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.13265407, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.27734283, ..., 0.        , 0.        ,
        0.02185998],
       [0.        , 0.        , 0.28436154, ..., 0.        , 0.        ,
        0.02117014]], dtype=float32)>

In [179]:
np.zeros((2, 2))

array([[0., 0.],
       [0., 0.]])

In [226]:
centroids = np.zeros((train_data.element_spec[1].shape[1],   2048))
print('centriods initialized:', centriods)
total_labels = np.zeros(4)
# total_labels[2] += 1
# total_labels[2] += 1
print('total_labels initialized:', total_labels)
for batch in train_data:
    logseq_batch, label_batch = batch
    features = log_classifier(logseq_batch, extract_feature=True )
    for i in range(len(label_batch)):
        label = label_batch[i]
        numeric_label = np.argmax(label)
        total_labels[numeric_label] += 1
        centroids[numeric_label] += features[i]
total_label_reshaped = np.reshape(total_labels, (4, 1))
centroids /= total_label_reshaped
print('centroids:',centroids)
print('total_labels:',total_label_reshaped)


centriods initialized: [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]]
total_labels initialized: [0. 0. 0. 0.]
centroids: [[0.00000000e+00 1.91634579e-01 4.55714893e-02 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 1.12166718e-03 2.84190178e-01 ... 0.00000000e+00
  0.00000000e+00 7.99438429e-02]
 [1.16506970e-02 8.62588120e-02 2.15531606e-04 ... 2.72557475e-04
  5.09823143e-03 0.00000000e+00]


In [224]:
# to understand the np divide operation
n1 = np.arange(12)
n2 = n1.reshape((3, 4))
print('n2', n2)
n3 = np.array([2, 2, 2])
print('n3', n3)
n4 = np.reshape(n3, (3, 1))
print('n4', n4)
n2/n4


n2 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
n3 [2 2 2]
n4 [[2]
 [2]
 [2]]


array([[0. , 0.5, 1. , 1.5],
       [2. , 2.5, 3. , 3.5],
       [4. , 4.5, 5. , 5.5]])

In [None]:
# Customizing the training step to get centroid for each class
class OpenSet:
    def __init__(self, data, pretrained_model=log_classifier):
#         super().__init__():
        self.model = pretrained_model        
        self.centroids = None
        self.num_labels = train_data.element_spec[1].shape[1]
        
    def centroids_cal(self):
        centriods = np.zeros(self.num_labels, embedding_size)
        total_labels = np.empty(0, dtype=longdouble)
        for batch in data:
            logseq_batch, label_batch = batch
            features = self.model(logseq_batch, extract_feature=True ) 
            

In [126]:
for batch in train_data:
    x_train, y_train = batch
    print(x_train)
    print(y_train)
    break

tf.Tensor(
[[[18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]
  ...
  [18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]]

 [[ 7 17  8 ... 11 11 11]
  [13  6 20 ... 11 11 11]
  [13  6 20 ... 11 11 11]
  ...
  [ 7 17  8 ... 11 11 11]
  [ 7 17  8 ... 11 11 11]
  [ 7 17  8 ... 11 11 11]]

 [[18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]
  ...
  [18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]]

 ...

 [[18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]
  ...
  [11 25 17 ...  8  4 14]
  [12  3  6 ... 12  6 18]
  [ 4 11 25 ...  9  0  0]]

 [[18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]
  ...
  [18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]
  [18  2  4 ...  0  0  0]]

 [[ 2 19 11 ...  8 10  2]
  [ 2 19 11 ...  8 10  2]
  [ 2 19 11 ...  8 10  2]
  ...
  [ 2 19 11 ...  8 10  2]
  [ 2 19 11 ...  8 10  2]
  [ 2 19 11 ...  8 10  2]]]

In [None]:
def euclidean_metric_torch(a, b):
    n = a.shape[0]
    m = b.shape[0]
    a = a.unsqueeze(1).expand(n, m, -1)
    b = b.unsqueeze(0).expand(n, m, -1)
    logits = -((a - b)**2).sum(dim=2)
    return logits

In [81]:
import numpy as np
import tensorflow as tf
a = np.arange(6)
a = a.reshape((2, -1))
print('a:', a)
print('a.shape', a.shape)
b = np.arange(8, 16)
print('b',b)
b = np.reshape(b, (4, -1))
print('b',b)
print('b.shape:',b.shape)
tfa = tf.constant(a)
tfb = tf.constant(b)
print('tfa',tfa)
print('tfb',tfb)
# n = tfa.shape[0]
# m = b.shape[0]


a: [[0 1 2]
 [3 4 5]]
a.shape (2, 3)
b [ 8  9 10 11 12 13 14 15]
b [[ 8  9]
 [10 11]
 [12 13]
 [14 15]]
b.shape: (4, 2)
tfa tf.Tensor(
[[0 1 2]
 [3 4 5]], shape=(2, 3), dtype=int32)
tfb tf.Tensor(
[[ 8  9]
 [10 11]
 [12 13]
 [14 15]], shape=(4, 2), dtype=int32)


In [78]:
print('tf.expand_dims(tfa, 0) :',tf.expand_dims(tfa, 0))
print()
print('tf.expand_dims(tfa, 1) :',tf.expand_dims(tfa, 1))
print()
print('tf.expand_dims(tfa, 1) :',tf.expand_dims(tfa, -1))

tf.expand_dims(tfa, 0) : tf.Tensor(
[[[0 1 2]
  [3 4 5]]], shape=(1, 2, 3), dtype=int32)

tf.expand_dims(tfa, 1) : tf.Tensor(
[[[0 1 2]]

 [[3 4 5]]], shape=(2, 1, 3), dtype=int32)

tf.expand_dims(tfa, 1) : tf.Tensor(
[[[0]
  [1]
  [2]]

 [[3]
  [4]
  [5]]], shape=(2, 3, 1), dtype=int32)


In [82]:
tfa = tf.expand_dims(tfa, 1)
print(f'tf.shape(tfa): {tf.shape(tfa)}')
tfb = tf.expand_dims(tfb, 0)
print(f'tf.shape(tfb): {tf.shape(tfb)}')

tf.shape(tfa): [2 1 3]
tf.shape(tfb): [1 4 2]


In [86]:
n = a.shape[0]
m = b.shape[0]
tfa_broadcast = tf.broadcast_to(tfa, [2, 4, 3])
tf.shape(tfa_broadcast)
print('tfa_broadcast',tfa_broadcast)

tfa_broadcast tf.Tensor(
[[[0 1 2]
  [0 1 2]
  [0 1 2]
  [0 1 2]]

 [[3 4 5]
  [3 4 5]
  [3 4 5]
  [3 4 5]]], shape=(2, 4, 3), dtype=int32)


In [None]:
# 6

# The equivalent function for pytorch expand is tensorflow tf.broadcast_to

# Docs: https://www.tensorflow.org/api_docs/python/tf/broadcast_to

# Share
# Follow
# edited Oct 23, 2021 at 18:22

# M.Innat
# 12.2k66 gold badges3434 silver badges6767 bronze badges
# answered Jan 4, 2019 at 9:12

# funkyyyyyy
# 6111 silver badge22 bronze badges
# Add a comment

# 0

# Tensorflow automatically broadcasts, so in general you don't need to do any of this. Suppose you have a y' of shape 6x2x3 and your x is of shape 2x3, then you can already do y'*x or y'+x will already behave as if you had expanded it. But if for some other reason you really need to do it, then the command in tensorflow is tile:

# y = tf.tile(tf.reshape(x, (1,2,3)), multiples=(6,1,1))
# Docs: https://www.tensorflow.org/api_docs/python/tf/tile

In [None]:
def euclidean_metric(a, b):
    n = a.shape[0]
    m = b.shape[0]
    a = tf.expand_dims(a, 1)
    b = tf.expand_dims(b, 0)
    logits = -((a - b)**2).sum(dim=2)
    return logits  

In [87]:
class OpenSet:
    def __init__(self, data, pretrained_model):
        
        self.model = pretrained_model
        self.best_eval_score = 0
        self.delta = None
        self.delta_points = []
        self.centroids = None
        self.test_results = None
        self.predictions = None
        self.true_labels = None
        
    def centroids_cal(self):
        centriods = np.zeros(train_data.element_spec[1].shape[1], embedding_size)
        total_labels = np.empty(0, dtype=longdouble)
        


In [None]:
# In context of deep learning the logits layer means the layer that feeds in to softmax (or other such normalization). The output of the softmax are the probabilities for the classification task and its input is logits layer. The logits layer typically produces values from -infinity to +infinity and the softmax layer transforms it to values from 0 to 1.

# Historical Context

# Where does this term comes from? In 1930s and 40s, several people were trying to adapt linear regression to the problem of predicting probabilities. However linear regression produces output from -infinity to +infinity while for probabilities our desired output is 0 to 1. One way to do this is by somehow mapping the probabilities 0 to 1 to -infinity to +infinity and then use linear regression as usual. One such mapping is cumulative normal distribution that was used by Chester Ittner Bliss in 1934 and he called this "probit" model, short for "probability unit". However this function is computationally expensive while lacking some of the desirable properties for multi-class classification. In 1944 Joseph Berkson used the function log(p/(1-p)) to do this mapping and called it logit, short for "logistic unit". The term logistic regression derived from this as well.

# The Confusion

# Unfortunately the term logits is abused in deep learning. From pure mathematical perspective logit is a function that performs above mapping. In deep learning people started calling the layer "logits layer" that feeds in to logit function. Then people started calling the output values of this layer "logit" creating the confusion with logit the function.