In [1]:
import tensorflow as tf
import sys


def regression_loss(logits, labels, num_steps, steps, seq_lens, loss_type,
                    normalize_indices, variance_lambda, huber_delta):
  """Loss function based on regressing to the correct indices.
  In the paper, this is called Cycle-back Regression. There are 3 variants
  of this loss:
  i) regression_mse: MSE of the predicted indices and ground truth indices.
  ii) regression_mse_var: MSE of the predicted indices that takes into account
  the variance of the similarities. This is important when the rate at which
  sequences go through different phases changes a lot. The variance scaling
  allows dynamic weighting of the MSE loss based on the similarities.
  iii) regression_huber: Huber loss between the predicted indices and ground
  truth indices.
  Args:
    logits: Tensor, Pre-softmax similarity scores after cycling back to the
      starting sequence.
    labels: Tensor, One hot labels containing the ground truth. The index where
      the cycle started is 1.
    num_steps: Integer, Number of steps in the sequence embeddings.
    steps: Tensor, step indices/frame indices of the embeddings of the shape
      [N, T] where N is the batch size, T is the number of the timesteps.
    seq_lens: Tensor, Lengths of the sequences from which the sampling was done.
      This can provide additional temporal information to the alignment loss.
    loss_type: String, This specifies the kind of regression loss function.
      Currently supported loss functions: regression_mse, regression_mse_var,
      regression_huber.
    normalize_indices: Boolean, If True, normalizes indices by sequence lengths.
      Useful for ensuring numerical instabilities don't arise as sequence
      indices can be large numbers.
    variance_lambda: Float, Weight of the variance of the similarity
      predictions while cycling back. If this is high then the low variance
      similarities are preferred by the loss while making this term low results
      in high variance of the similarities (more uniform/random matching).
    huber_delta: float, Huber delta described in tf.keras.losses.huber_loss.
  Returns:
     loss: Tensor, A scalar loss calculated using a variant of regression.
  """
  # Just to be safe, we stop gradients from labels as we are generating labels.
  labels = tf.stop_gradient(labels)
  steps = tf.stop_gradient(steps)

  # tf.print("logits" , logits ,output_stream=sys.stderr)
  # tf.print("labels" , labels ,output_stream=sys.stderr)

  if normalize_indices:
    float_seq_lens = tf.cast(seq_lens, tf.float32)
    tile_seq_lens = tf.tile(
        tf.expand_dims(float_seq_lens, axis=1), [1, num_steps])
    steps = tf.cast(steps, tf.float32) / tile_seq_lens
  else:
    steps = tf.cast(steps, tf.float32)

  beta = tf.nn.softmax(logits)
  # print('steps.shape: ', steps.shape)
  # print('labels.shape: ', labels.shape)
  true_time = tf.reduce_sum(steps * labels, axis=1)
  pred_time = tf.reduce_sum(steps * beta, axis=1)

  if loss_type in ['regression_mse', 'regression_mse_var']:
    if 'var' in loss_type:
      # Variance aware regression.
      pred_time_tiled = tf.tile(tf.expand_dims(pred_time, axis=1),
                                [1, num_steps])
      
      pred_time_variance = tf.reduce_sum(
          tf.square(steps - pred_time_tiled) * beta, axis=1)

      # Using log of variance as it is numerically stabler.
      pred_time_log_var = tf.math.log(pred_time_variance)
      squared_error = tf.square(true_time - pred_time)
      
      distance = tf.math.exp(-pred_time_log_var) * squared_error \
                            + variance_lambda * pred_time_log_var
      return tf.reduce_mean(distance)

    else:
      return tf.reduce_mean(
          tf.keras.losses.mean_squared_error(y_true=true_time,
                                             y_pred=pred_time))
  elif loss_type == 'regression_huber':
    return tf.reduce_mean(tf.keras.losses.huber_loss(
        y_true=true_time, y_pred=pred_time,
        delta=huber_delta))
  else:
    raise ValueError('Unsupported regression loss %s. Supported losses are: '
                     'regression_mse, regresstion_mse_var and regression_huber.'
                     % loss_type)
    
    
def pairwise_l2_distance(embs1, embs2):
  """Computes pairwise distances between all rows of embs1 and embs2."""
  norm1 = tf.reduce_sum(tf.square(embs1), 1)
  norm1 = tf.reshape(norm1, [-1, 1])
  norm2 = tf.reduce_sum(tf.square(embs2), 1)
  norm2 = tf.reshape(norm2, [1, -1])

  # Max to ensure matmul doesn't produce anything negative due to floating
  # point approximations.
  dist = tf.maximum(
      norm1 + norm2 - 2.0 * tf.matmul(embs1, embs2, False, True), 0.0)

  return dist

def get_scaled_similarity(embs1, embs2, similarity_type, temperature):
  """Returns similarity between each all rows of embs1 and all rows of embs2.
  The similarity is scaled by the number of channels/embedding size and
  temperature.
  Args:
    embs1: Tensor, Embeddings of the shape [M, D] where M is the number of
      embeddings and D is the embedding size.
    embs2: Tensor, Embeddings of the shape [N, D] where N is the number of
      embeddings and D is the embedding size.
    similarity_type: String, Either one of 'l2' or 'cosine'.
    temperature: Float, Temperature used in scaling logits before softmax.
  Returns:
    similarity: Tensor, [M, N] tensor denoting similarity between embs1 and
      embs2.
  """
  channels = tf.cast(tf.shape(embs1)[1], tf.float32)
  # Go for embs1 to embs2.
  if similarity_type == 'cosine':
    similarity = tf.matmul(embs1, embs2, transpose_b=True)
  elif similarity_type == 'l2':
    similarity = -1.0 * pairwise_l2_distance(embs1, embs2)
  else:
    raise ValueError('similarity_type can either be l2 or cosine.')

  # Scale the distance  by number of channels. This normalization helps with
  # optimization.
  similarity /= channels
  # Scale the distance by a temperature that helps with how soft/hard the
  # alignment should be.
  similarity /= temperature
  
  return similarity

def align_pair_of_sequences(embs1,
                            embs2,
                            similarity_type,
                            temperature):
  """Align a given pair embedding sequences.
  Args:
    embs1: Tensor, Embeddings of the shape [M, D] where M is the number of
      embeddings and D is the embedding size.
    embs2: Tensor, Embeddings of the shape [N, D] where N is the number of
      embeddings and D is the embedding size.
    similarity_type: String, Either one of 'l2' or 'cosine'.
    temperature: Float, Temperature used in scaling logits before softmax.
  Returns:
     logits: Tensor, Pre-softmax similarity scores after cycling back to the
      starting sequence.
    labels: Tensor, One hot labels containing the ground truth. The index where
      the cycle started is 1.
  """
  # max_num_steps is embs1's frame number
  max_num_steps = tf.shape(embs1)[0]

  # Find distances between embs1 and embs2.
  sim_12 = get_scaled_similarity(embs1, embs2, similarity_type, temperature)
  
  # Softmax the distance.
  softmaxed_sim_12 = tf.nn.softmax(sim_12, axis=1)
  # softmaxed_sim_12 is alpha in TCC paper
  # Calculate soft-nearest neighbors.
  nn_embs = tf.matmul(softmaxed_sim_12, embs2)
  # Find distances between nn_embs and embs1.
  sim_21 = get_scaled_similarity(nn_embs, embs1, similarity_type, temperature)

  # tf.print("emb1" , embs1 ,output_stream=sys.stderr)
  # tf.print("emb2" , embs2 ,output_stream=sys.stderr)
  # tf.print("sim_12" , sim_12 ,output_stream=sys.stderr)
  # tf.print("softmaxed_sim_12" , softmaxed_sim_12 ,output_stream=sys.stderr)
  # tf.print("nn_embs" , nn_embs ,output_stream=sys.stderr)
  # tf.print("sim_21" , sim_21 ,output_stream=sys.stderr)



  logits = sim_21
  labels = tf.one_hot(tf.range(max_num_steps), max_num_steps)

  return logits, labels


def compute_deterministic_alignment_loss(embs,
                                         steps,
                                         seq_lens,
                                         num_steps,
                                         batch_size,
                                         loss_type,
                                         similarity_type,
                                         temperature,
                                         label_smoothing,
                                         variance_lambda,
                                         huber_delta,
                                         normalize_indices):
  """Compute cycle-consistency loss for all steps in each sequence.
  This aligns each pair of videos in the batch except with itself.
  When aligning it also matters which video is the starting video. So for N
  videos in the batch, we have N * (N-1) alignments happening.
  For example, a batch of size 3 has 6 pairs of sequence alignments.
  Args:
    embs: Tensor, sequential embeddings of the shape [N, T, D] where N is the
      batch size, T is the number of timesteps in the sequence, D is the size
      of the embeddings.
    steps: Tensor, step indices/frame indices of the embeddings of the shape
      [N, T] where N is the batch size, T is the number of the timesteps.
    seq_lens: Tensor, Lengths of the sequences from which the sampling was
    done. This can provide additional information to the alignment loss.
    num_steps: Integer/Tensor, Number of timesteps in the embeddings.
    batch_size: Integer, Size of the batch.
    loss_type: String, This specifies the kind of loss function to use.
      Currently supported loss functions: 'classification', 'regression_mse',
      'regression_mse_var', 'regression_huber'.
    similarity_type: String, Currently supported similarity metrics: 'l2' ,
      'cosine' .
    temperature: Float, temperature scaling used to scale the similarity
      distributions calculated using the softmax function.
    label_smoothing: Float, Label smoothing argument used in
      tf.keras.losses.categorical_crossentropy function and described in this
      paper https://arxiv.org/pdf/1701.06548.pdf.
    variance_lambda: Float, Weight of the variance of the similarity
      predictions while cycling back. If this is high then the low variance
      similarities are preferred by the loss while making this term low
      results in high variance of the similarities (more uniform/random
      matching).
    huber_delta: float, Huber delta described in tf.keras.losses.huber_loss.
    normalize_indices: Boolean, If True, normalizes indices by sequence
      lengths. Useful for ensuring numerical instabilities doesn't arise as
      sequence indices can be large numbers.
  Returns:
    loss: Tensor, Scalar loss tensor that imposes the chosen variant of the
        cycle-consistency loss.
  """

  # print('b4, steps shape:' , steps.shape)
  labels_list = []
  logits_list = []
  steps_list = []
  seq_lens_list = []

  for i in range(batch_size):
    for j in range(batch_size):
      # We do not align the sequence with itself.
      if i != j:
        # print(embs.shape , batch_size)
        logits, labels = align_pair_of_sequences(embs[i],
                                                 embs[j],
                                                 similarity_type,
                                                 temperature)
        logits_list.append(logits)
        labels_list.append(labels)
        steps_list.append(tf.tile(steps[i:i+1], [num_steps, 1]))
        seq_lens_list.append(tf.tile(seq_lens[i:i+1], [(num_steps)]))


  logits = tf.concat(logits_list, axis=0)
  labels = tf.concat(labels_list, axis=0)
  steps = tf.concat(steps_list, axis=0)
  seq_lens = tf.concat(seq_lens_list, axis=0)
  # print('logit.shape',logits.shape)
  # print('label.shape',labels.shape)
  # print('steps.shape',steps.shape)
  # print('seq_lens.shape',seq_lens.shape)


  if True:
    loss = regression_loss(logits, labels, num_steps, steps, seq_lens,
                           loss_type, normalize_indices, variance_lambda,
                           huber_delta)
    
  else:
    raise ValueError('Unidentified loss_type %s. Currently supported loss '
                     'types are: regression_mse, regression_huber, '
                     'classification.' % loss_type)
  
  return loss,logits,labels,steps,seq_lens

2023-10-04 21:20:05.826132: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [12]:
import torch
import sys

class TCC():
    def __init__(self,cfg):
        self.label_smoothing = cfg.TCC.LABEL_SMOOTHING
        self.temperature = cfg.TCC.SOFTMAX_TEMPERATURE
        self.lambda_ = cfg.TCC.VARIANCE_LAMBDA
        pass
    def compute_cycle_loss(self,emb_x,emb_y):
        """
        1. Find the distance for emb_x in emb_y (using l2)
        2. softmax the distance then multiply by emb_y
        3. use 2. to find the distance for emb_x as logits
        4. construct smoothed labels 
        """
        num_steps, D = emb_x.shape

        distance = -1 * torch.cdist(emb_x,emb_y,p=2).pow(2)
        distance = distance / D / self.temperature

        sftmax = torch.softmax(distance,dim=-1)
        emb_y = torch.matmul(sftmax,emb_y)
        logits = -1 * torch.cdist(emb_y,emb_x,p=2).pow(2) / D / self.temperature

        labels = torch.diag(torch.ones(num_steps)).type_as(logits)
        ## label smoothing
        # labels = (1-num_steps*self.label_smoothing/(num_steps-1))*labels + \
        #                 self.label_smoothing/(num_steps-1)*torch.ones_like(labels)
        return logits,labels
    
    def regression_loss(self,logits,labels,steps,seq_lens):
        """
        1. Normalize the steps by seq_lens (to mitigate the influence of long seq_lens)
        2. softmax logits to obtain beta
        3. calculate i by multiplying labels and steps
        4. calculate mean by multiplying beta and steps
        5. calculate variance by multiplying beta and (i - mean)^2
        6. calculate Lcbr
        """
        # steps = steps / seq_lens.unsqueeze(1)
        beta = torch.softmax(logits,dim=-1)
        i = torch.sum(steps*labels,dim=-1)
        mean = torch.sum(steps*beta,dim=-1)
        variance = torch.sum(torch.square(steps-mean.unsqueeze(1)) * beta ,dim=-1)
        log_variance = torch.log(variance)
        Lcbr = torch.mean(torch.square(i-mean) / variance + self.lambda_ * log_variance)
        return Lcbr
        
    def compute_loss(self,embs,steps,seq_lens,batch_size=2):
        """
        TCC computes loss based on the following equation:
            1. find the soft nearest neighbor for u in v, this is acheived by softmaxing
            2. compute betak, the formula is given as exp(-d(v,uk)) / sum(exp(-d(v,uj)))
        """
        steps = steps.to(embs.device)
        seq_lens = seq_lens.to(embs.device)

        logits_list = []
        labels_list = []
        steps_list = []
        seq_lens_list = []

        B , T , D = embs.shape
        for i in range((batch_size)):
            for j in range((batch_size)):
                if i ==j:
                    continue
                logits,labels = self.compute_cycle_loss(embs[i],embs[j])
                logits_list.append(logits)
                labels_list.append(labels)
                steps_list.append(steps[i].unsqueeze(0).expand(T,T))
                seq_lens_list.append(seq_lens[i].view(1,).expand(T))
        logits_list = torch.cat(logits_list,dim=0)
        labels_list = torch.cat(labels_list,dim=0)
        steps_list = torch.cat(steps_list,dim=0)
        seq_lens_list = torch.cat(seq_lens_list,dim=0)

        loss = self.regression_loss(logits_list,labels_list,steps_list,seq_lens_list)
        return loss,logits_list,labels_list,steps_list,seq_lens_list

In [3]:
# coding=utf-8
import torch

class TCC_CARL(object):
    def __init__(self, cfg):
        self.cfg = cfg
        self.loss_type=cfg.TCC.LOSS_TYPE
        self.similarity_type=cfg.TCC.SIMILARITY_TYPE
        self.cycle_length=cfg.TCC.CYCLE_LENGTH
        self.temperature=cfg.TCC.SOFTMAX_TEMPERATURE
        self.label_smoothing=cfg.TCC.LABEL_SMOOTHING
        self.variance_lambda=cfg.TCC.VARIANCE_LAMBDA
        self.huber_delta=cfg.TCC.HUBER_DELTA
        self.normalize_indices=cfg.TCC.NORMALIZE_INDICES

    def compute_loss(self, embs,  chosen_steps,seq_lens):
        """One pass through the model.

        Args:
        videos: Tensor, batches of tensors from many videos.
        training: Boolean, if True model is run in training mode.

        Returns:
        loss: Tensor, Float tensor containing loss
        """
        # num_frames = self.cfg.TRAIN.NUM_FRAMES

        # if self.cfg.SSL:
        #     batch_size, num_views, num_steps, c, h, w = videos.shape
        #     videos = videos.view(-1, num_steps, c, h, w)
        #     chosen_steps = chosen_steps.view(-1, num_frames)
        #     seq_lens = seq_lens.view(batch_size, num_views).view(-1)
        # else:
        #     batch_size, num_steps, c, h, w = videos.shape
        # if video_masks is not None:
        #     video_masks = video_masks.view(-1, 1, num_steps)
        # embs = model(videos, num_frames, video_masks=video_masks)
        loss = self.compute_deterministic_alignment_loss(embs, seq_lens, chosen_steps)
        return loss

    def compute_deterministic_alignment_loss(self, embs, seq_lens, steps):

        labels_list = []
        logits_list = []
        steps_list = []
        seq_lens_list = []

        batch_size, num_frames, channels = embs.shape

        for i in range(batch_size):
            for j in range(batch_size):
                # We do not align the sequence with itself.
                if i == j:
                    continue
                logits, labels = self.align_pair_of_sequences(embs[i], embs[j])
                logits_list.append(logits)
                labels_list.append(labels)
                steps_list.append(steps[i].unsqueeze(0).expand(num_frames, num_frames))
                seq_lens_list.append(seq_lens[i].view(1,).expand(num_frames))
                
        logits = torch.cat(logits_list,0)
        labels = torch.cat(labels_list, 0)
        steps = torch.cat(steps_list, 0)
        seq_lens = torch.cat(seq_lens_list, 0)

        if self.loss_type == 'classification':
            loss = {"loss": torch.nn.KLDivLoss(reduction='mean')(logits, labels)}
        elif 'regression' in self.loss_type:
            loss = self.regression_loss(logits, labels, steps, seq_lens)

        return loss,logits,labels,steps,seq_lens

    def align_pair_of_sequences(self, embs1, embs2):
        """Align a given pair embedding sequences.

        Args:
            embs1: Tensor, Embeddings of the shape [M, D] where M is the number of
            embeddings and D is the embedding size.
            embs2: Tensor, Embeddings of the shape [N, D] where N is the number of
            embeddings and D is the embedding size.
        Returns:
            logits: Tensor, Pre-softmax similarity scores after cycling back to the
            starting sequence.
            labels: Tensor, One hot labels containing the ground truth. The index where
            the cycle started is 1.
        """
        num_steps, channels = embs1.shape

        sim_12 = self.get_scaled_similarity(embs1, embs2)
        # Softmax the distance.
        softmaxed_sim_12 = torch.softmax(sim_12, dim=-1)

        # Calculate soft-nearest neighbors.
        nn_embs = torch.matmul(softmaxed_sim_12, embs2)

        # Find distances between nn_embs and embs1.
        sim_21 = self.get_scaled_similarity(nn_embs, embs1)

        logits = sim_21
        labels = torch.diag(torch.ones(num_steps)).type_as(logits)
        if self.label_smoothing:
            labels = (1-num_steps*self.label_smoothing/(num_steps-1))*labels + \
                        self.label_smoothing/(num_steps-1)*torch.ones_like(labels)

        return logits, labels

    def get_scaled_similarity(self, embs1, embs2):
        num_steps, channels = embs1.shape
        # Find distances between embs1 and embs2.
        if self.similarity_type == 'cosine':
            sim_12 = torch.matmul(embs1, embs2.transpose(0,1))
        elif self.similarity_type == 'l2':
            norm1 = torch.square(embs1).sum(1).view(-1,1)
            norm2 = torch.square(embs2).sum(1).view(1,-1)
            sim_12 = - (norm1 + norm2 - 2*torch.matmul(embs1, embs2.transpose(0,1)))
        else:
            raise ValueError('Unsupported similarity type %s.' % self.similarity_type)
        return sim_12 / channels / self.temperature

    def regression_loss(self, logits, labels, steps, seq_lens):
        """Loss function based on regressing to the correct indices.

        In the paper, this is called Cycle-back Regression. There are 3 variants
        of this loss:
        i) regression_mse: MSE of the predicted indices and ground truth indices.
        ii) regression_mse_var: MSE of the predicted indices that takes into account
        the variance of the similarities. This is important when the rate at which
        sequences go through different phases changes a lot. The variance scaling
        allows dynamic weighting of the MSE loss based on the similarities.
        iii) regression_huber: Huber loss between the predicted indices and ground
        truth indices.


        Args:
            logits: Tensor, Pre-softmax similarity scores after cycling back to the
            starting sequence.
            labels: Tensor, One hot labels containing the ground truth. The index where
            the cycle started is 1.
            num_steps: Integer, Number of steps in the sequence embeddings.
            steps: Tensor, step indices/frame indices of the embeddings of the shape
            [N, T] where N is the batch size, T is the number of the timesteps.
            seq_lens: Tensor, Lengths of the sequences from which the sampling was done.
            This can provide additional temporal information to the alignment loss.

            loss_type: String, This specifies the kind of regression loss function.
            Currently supported loss functions: regression_mse, regression_mse_var,
            regression_huber.
            normalize_indices: Boolean, If True, normalizes indices by sequence lengths.
            Useful for ensuring numerical instabilities don't arise as sequence
            indices can be large numbers.
            variance_lambda: Float, Weight of the variance of the similarity
            predictions while cycling back. If this is high then the low variance
            similarities are preferred by the loss while making this term low results
            in high variance of the similarities (more uniform/random matching).
            huber_delta: float, Huber delta described in tf.keras.losses.huber_loss.

        Returns:
            loss: Tensor, A scalar loss calculated using a variant of regression.
        """
        steps = steps.type_as(logits)
        if self.normalize_indices:
            seq_lens = seq_lens.type_as(logits)
            # print("steps", steps.shape)
            # print("seq_lens", seq_lens.shape)
            steps = steps / seq_lens.unsqueeze(1)

        beta = torch.softmax(logits, dim=-1)
        true_time = torch.sum(steps * labels, dim=-1)
        pred_time = torch.sum( beta * steps, dim=-1)

        if self.loss_type in ['regression_mse', 'regression_mse_var']:
            if 'var' in self.loss_type:
                # Variance aware regression.
                pred_time_variance = torch.sum(torch.square(steps - pred_time.unsqueeze(-1)) * beta, dim=-1)
                assert torch.min(pred_time_variance) > 0
                # Using log of variance as it is numerically stabler.
                pred_time_log_var = torch.log(pred_time_variance)
                squared_error = torch.square(true_time - pred_time)
                loss = torch.mean(torch.exp(-pred_time_log_var) * squared_error
                                        + self.variance_lambda * pred_time_log_var)
                # return {"loss": loss, "squared_error": torch.mean(squared_error), 
                #                     "pred_time_log_var": torch.mean(pred_time_log_var)}
                return loss
            else:
                return {"loss": torch.nn.MSELoss()(pred_time, true_time)}
        elif self.loss_type == 'regression_huber':
            return {"loss": torch.nn.SmoothL1Loss()(pred_time, true_time)}
        else:
            raise ValueError('Unsupported regression loss %s. Supported losses are: '
                            'regression_mse, regresstion_mse_var and regression_huber.'
                            % self.loss_type)

In [4]:
import tensorflow as tf

tf.random.set_seed(7)

# Create a sample input tensor
N, T, D = 2, 50, 128
embs = tf.random.normal((N, T, D))
steps = tf.sort(tf.random.uniform((N, T), maxval=T, dtype=tf.int32))
seq_lens = tf.random.uniform((N,), maxval=T, dtype=tf.int32)

num_embs = embs.numpy()
num_steps = steps.numpy()
num_seq_lens = seq_lens.numpy()

torch_embs = torch.from_numpy(num_embs)
torch_steps = torch.from_numpy(num_steps)
torch_seq_lens = torch.from_numpy(num_seq_lens)

2023-10-04 21:20:12.829104: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-04 21:20:16.925523: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4711 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:3f:00.0, compute capability: 8.6
2023-10-04 21:20:16.927696: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 22304 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:40:00.0, compute capability: 8.6
2023-10-04 21:20:16.929004: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhos

In [5]:
num_steps = T
batch_size = N
loss_type = 'regression_mse_var'
similarity_type = 'l2'
temperature = 1.0
label_smoothing = None
variance_lambda = 0.1
huber_delta = 1.0
normalize_indices = False

# Compute the loss for the sample input tensor
tf_loss,tf_logits_list,tf_labels_list,tf_steps_list,tf_seq_lens_list = compute_deterministic_alignment_loss(embs, steps, seq_lens,
                                             num_steps, batch_size,
                                             loss_type,
                                             similarity_type,
                                             temperature,
                                             label_smoothing,
                                             variance_lambda,
                                             huber_delta,normalize_indices)

# Print the loss
tf_loss

2023-10-04 21:20:18.139392: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


<tf.Tensor: shape=(), dtype=float32, numpy=1.5241928>

In [13]:
# Create a TCC object with default configuration
cfg = type('', (), {})()
cfg.TCC = type('', (), {})()
cfg.TCC.LABEL_SMOOTHING = None
cfg.TCC.SOFTMAX_TEMPERATURE = 1.0
cfg.TCC.VARIANCE_LAMBDA = 0.1
tcc = TCC(cfg)

# Compute the loss for the sample input tensor
torch_loss,torch_logits_list,torch_labels_list,torch_steps_list,torch_seq_lens_list = tcc.compute_loss(torch_embs, torch_steps, torch_seq_lens)

torch_loss

tensor(1.5242)

In [15]:
from easydict import EasyDict as edict
# Create a TCC object with default configuration
cfg = {'TCC': {'LOSS_TYPE': 'regression_mse_var', 'SIMILARITY_TYPE': 'l2', 'CYCLE_LENGTH': 10, 'SOFTMAX_TEMPERATURE': 1, 'LABEL_SMOOTHING': None, 'VARIANCE_LAMBDA': 0.1, 'HUBER_DELTA': 0.1, 'NORMALIZE_INDICES': False}, 'TRAIN': {'NUM_FRAMES': 16}, 'SSL': False}
cfg = edict(cfg)
carl_tcc = TCC_CARL(cfg)

carl_loss,carl_logits_list,carl_labels_list,carl_steps_list,carl_seq_lens_list = carl_tcc.compute_deterministic_alignment_loss(torch_embs, torch_seq_lens, torch_steps)
carl_loss

tensor(1.5242)

In [8]:
torch_logits_list

tensor([[-0.9479, -1.0044, -0.8491,  ..., -1.0135, -1.0736, -1.0281],
        [-0.9675, -0.9829, -0.8366,  ..., -1.0082, -1.0672, -1.0317],
        [-0.9816, -1.0059, -0.8138,  ..., -1.0172, -1.0688, -1.0307],
        ...,
        [-0.9337, -0.9292, -1.0141,  ..., -0.8647, -0.9082, -1.1305],
        [-0.9356, -0.9366, -1.0120,  ..., -0.8850, -0.8784, -1.1281],
        [-0.9348, -0.9297, -1.0137,  ..., -0.8819, -0.9031, -1.0956]])

In [9]:
carl_logits_list

tensor([[-0.9479, -1.0044, -0.8491,  ..., -1.0135, -1.0736, -1.0281],
        [-0.9675, -0.9829, -0.8366,  ..., -1.0082, -1.0672, -1.0317],
        [-0.9816, -1.0059, -0.8138,  ..., -1.0172, -1.0688, -1.0307],
        ...,
        [-0.9337, -0.9292, -1.0141,  ..., -0.8647, -0.9082, -1.1305],
        [-0.9356, -0.9366, -1.0120,  ..., -0.8850, -0.8784, -1.1281],
        [-0.9348, -0.9297, -1.0137,  ..., -0.8819, -0.9031, -1.0956]])