IMPLEMENT ACTIVATION FUNCTIONS WITH PYTHON AND NUMPY

In [None]:
import numpy as np
vec = np.random.uniform(-10, 10, 27).reshape(3,3,3)
print(vec)

[[[-8.29415404  2.58126219 -1.63519635]
  [ 1.18195215  3.88352743 -7.44073997]
  [ 8.22183407  2.96481674 -5.59173037]]

 [[ 9.78099613 -3.52056322 -8.49343998]
  [-2.14733803  1.71114413 -9.66659434]
  [-6.19494784 -7.16000057  8.12186391]]

 [[-9.15044385  2.38678294  3.10308447]
  [ 5.64454996  9.26244545 -7.00962563]
  [-9.10744832  6.78037136 -5.75521856]]]


In [None]:
import numpy as np
class activation:
  def __init__(self, vec, axis=0):
    self.vec = vec
    self.axis = axis

  def relu(self):
    return np.maximum(self.vec, 0)

  def sigmoid(self, hard=False):
    if hard:
      return np.where(1/(1+np.exp(-self.vec)) >= 0.5, 1, 0)
    return 1/(1+np.exp(-self.vec))

  def softmax(self,stable=True):
    if stable:
      self.vec -= np.max(self.vec, axis=self.axis, keepdims=True)
    return np.exp(self.vec)/np.sum(np.exp(self.vec), axis = self.axis, keepdims=True)

  def tanh(self,stable=True):
    if stable:
      self.vec -= np.max(self.vec, axis=self.axis, keepdims=True)
    return np.exp(self.vec) - np.exp(-self.vec)/np.exp(self.vec) + np.exp(-self.vec)

  def __str__(self):
    return str(self.vec)

In [None]:
# import numpy as np
# arr = np.array([[-1, 2, -3], [4, -5, 6]])
# vec = np.array([[0.1,0.3,0.6],[0.3,0.2,0.5],[0.8,0.1,0.1]])
act = activation(vec)
#result = act.relu()
#result = act.sigmoid(True)
result = act.softmax()
# result = act.softmax(False)
#result = act.tanh(False)
# result = act.tanh()
#print(act)
print(result)
print(np.sum(result, axis=1))

NameError: name 'vec' is not defined

IMPLEMENT BASIC LOSS FUNCTIONS WITH PYTHON AND NUMPY

In [None]:
class loss:
  def __init__(self, y_true, y_pred, axis=0):
    self.y_true = y_true
    self.y_pred = y_pred
    self.axis = axis
    self.activation = activation(y_pred, axis)

  def mse(self):
    return np.mean(np.power(self.y_true - self.y_pred, 2))

  def mae(self):
    return np.mean(np.abs(self.y_true - self.y_pred))

  def binary_cross_entropy(self):
    return -np.sum(self.y_true * np.log(self.y_pred) + (1 - self.y_true) * np.log(1 - self.y_pred))

  def categorical_cross_entropy(self, axis = 0, epsilon=1e-15):
    self.y_pred = self.activation.softmax()
    self.y_pred = np.clip(self.y_pred, epsilon, 1-epsilon)
    return -np.sum(self.y_true * np.log(self.y_pred), axis=axis)

  def categorical_cross_entropy_with_class_labels(self, epsilon=1e-15):
    self.y_pred = self.activation.softmax()
    self.y_pred = np.clip(self.y_pred, epsilon, 1-epsilon)

    self.batch_index = np.arange(self.y_true.shape[0])
    self.y_pred = self.y_pred[self.batch_index, self.y_true]
    print(self.y_pred)

    return -np.sum(np.log(self.y_pred))

  def __str__(self):
    return str(self.y_true) + " " + str(self.y_pred)


In [None]:
# Loss1 = loss(np.array([1,2,3]), np.array([4,5,6]))
# print(Loss1.mse())
# print(Loss1.mae())
# Loss2 = loss(np.array([1,0,1,0]), np.array([0.7,0.2,0.1,0.1]))
# print(Loss2.binary_cross_entropy())
target = np.array([[1,0,0],[0,1,0],[0,0,1]])
target_labels = np.argmax(target, axis=1)
#print(target_labels)
logits = np.array([[0.1,0.3,0.6],[0.3,0.2,0.5],[0.8,0.1,0.1]])
Loss3 = loss(target, logits, axis=1)
print(Loss3.categorical_cross_entropy(None)) # sum over class
print(Loss3.categorical_cross_entropy())
# Loss4 = loss(target_labels, logits)
# print(Loss4.categorical_cross_entropy_with_class_labels()) # sum over batch

3.9828442571439258
[1.35328656 1.23983106 1.38972664]


IMPLEMENT SIMCLR LOSS FUNCTION WITH CROSS ENTROPY

In [None]:
class NT_Xent:
  def __init__(self, z, temperature=0.5):
    self.feat = z
    self.temp = temperature

  def forward(self):
    B,V,D = self.feat.shape
    assert V == 2
    self.feat = self.feat.reshape(B*V, D)

    self.feat = self.feat/ np.linalg.norm(self.feat, axis=1, keepdims=True)
    self.sim = np.dot(self.feat, self.feat.T)/self.temp

    #np.fill_diagonal(self.sim, -np.inf)

    mask = np.eye(2*B)
    self.sim[mask==1] = -np.inf

    target = [i+1 if i%2==0 else i-1 for i in range(2*B)]

    logit = self.sim - np.max(self.sim, axis=1, keepdims=True)
    logit_exp_sum = np.sum(np.exp(logit), axis = 1, keepdims=True)
    logit_prob = logit - np.log(logit_exp_sum)

    return -np.mean(logit_prob[np.arange(2*B), target])

vec = np.random.uniform(-10, 10, 50).reshape(5,2,5)
ContaLoss = NT_Xent(vec)
print(ContaLoss.forward())

2.585308080309623


IMPLEMENT SIMCLR LOSS FUNCTION WITHOUT CROSS ENTROPY

In [None]:
import numpy as np

def simclr_contrastive_loss(z, temperature=0.5):
    """
    Computes the SimCLR contrastive loss (NT-Xent Loss) for a batch of embeddings.

    Args:
        z (numpy.ndarray): Embeddings of shape (2N, D) where N is the batch size
                           and D is the embedding dimension. Should be L2 normalized.
        temperature (float): Temperature parameter.

    Returns:
        float: Scalar loss value.
    """
    # Step 1: Normalize embeddings
    z = z / np.linalg.norm(z, axis=1, keepdims=True)

    # Step 2: Compute similarity matrix
    sim_matrix = np.dot(z, z.T)  # Shape: (2N, 2N)

    # Step 3: Apply temperature scaling
    sim_matrix /= temperature

    # Step 4: Mask out self-similarities (diagonal)
    batch_size = z.shape[0]
    mask = np.eye(batch_size, dtype=bool)
    sim_matrix_no_diag = np.where(mask, -np.inf, sim_matrix)  # logsumexp-safe

    # Step 5: For each positive pair (i,j)
    # We assume: first N are view 1, second N are view 2, so positive pairs are (i, i+N) and (i+N, i)
    N = batch_size // 2
    positives = np.concatenate([
        np.arange(N)[:, None],            # (0,1,2,...N-1)
        np.arange(N, 2*N)[:, None]        # (N,N+1,...2N-1)
    ], axis=1)

    loss = 0.0
    sim_matrix_no_diag -= np.max(sim_matrix_no_diag, axis=1, keepdims=True)
    for i, j in positives:
        # numerator: exp(sim(i, j))
        numerator = np.exp(sim_matrix_no_diag[i, j])
        # denominator: sum over all except itself
        denominator = np.sum(np.exp(sim_matrix_no_diag[i, :]))
        loss += -np.log(numerator / denominator)

    # Average loss
    loss /= (2 * N)
    return loss
#vec = np.random.uniform(-10, 10, 50).reshape(5,2,5)
new_vec = np.concatenate((vec[:,0,:], vec[:,1,:]), axis=0)
ContaLoss = simclr_contrastive_loss(new_vec)
print(ContaLoss)

1.2159902266494427


IMPLEMENT TRIPLET LOSS WITH PYTHON AND NUMPY

In [None]:
class tripletLoss:
  def __init__(self, anc, pos, neg, margin=1.0):
    self.anc = anc
    self.pos = pos
    self.neg = neg
    self.margin = margin

  def forward(self):
    anc = self.anc/np.linalg.norm(self.anc, axis=1, keepdims=True)
    pos = self.pos/np.linalg.norm(self.pos, axis=1, keepdims=True)
    neg = self.neg/np.linalg.norm(self.neg, axis=1, keepdims=True)

    pos_dist = np.sum(np.square(anc - pos), axis=1)
    neg_dist = np.sum(np.square(anc - neg), axis=1)

    return np.mean(np.maximum(pos_dist - neg_dist + self.margin, 0))

anc = np.random.uniform(-10, 10, 100).reshape(10,10)
pos  = np.random.uniform(-10, 10, 100).reshape(10,10)
neg  = np.random.uniform(-10, 10, 100).reshape(10,10)
TripLoss = tripletLoss(anc, pos, neg)
print(TripLoss.forward())

1.0662874818049741


DINO LOSS FOR SELF-DISTILATION

In [None]:
import numpy as np

class DINOLossNumpy:
    def __init__(self, out_dim, ncrops, warmup_teacher_temp, teacher_temp,
                 warmup_teacher_temp_epochs, nepochs, student_temp=0.1,
                 center_momentum=0.9):
        self.student_temp = student_temp
        self.center_momentum = center_momentum
        self.ncrops = ncrops
        self.center = np.zeros((1, out_dim))  # shape (1, out_dim)

        # Create teacher temperature schedule
        self.teacher_temp_schedule = np.concatenate((
            np.linspace(warmup_teacher_temp, teacher_temp, warmup_teacher_temp_epochs),
            np.ones(nepochs - warmup_teacher_temp_epochs) * teacher_temp
        ))

    def softmax(self, x, axis=-1):
        x = x - np.max(x, axis=axis, keepdims=True)  # for numerical stability
        exp_x = np.exp(x)
        return exp_x / np.sum(exp_x, axis=axis, keepdims=True)

    def log_softmax(self, x, axis=-1):
        x = x - np.max(x, axis=axis, keepdims=True)  # for numerical stability
        logsumexp = np.log(np.sum(np.exp(x), axis=axis, keepdims=True))
        return x - logsumexp

    def forward(self, student_output, teacher_output, epoch):
        """
        student_output: shape [B * ncrops, out_dim]
        teacher_output: shape [B, out_dim]
        """
        B = teacher_output.shape[0]
        student_out = student_output / self.student_temp
        student_out = np.array_split(student_out, self.ncrops, axis=0)  # tuple of [B, out_dim]
        #print('Student Out:', len(student_out), student_out[0])

        # teacher centering and sharpening
        temp = self.teacher_temp_schedule[epoch]
        teacher_out = self.softmax((teacher_output - self.center) / temp, axis=-1)
        teacher_out = np.array_split(teacher_out, 2, axis=0)  # assuming teacher uses 2 global crops

        total_loss = 0.0
        n_loss_terms = 0

        for iq, q in enumerate(teacher_out):
            for v in range(len(student_out)):
                if v == iq:
                    continue
                # Cross-entropy between teacher and student outputs
                log_probs = self.log_softmax(student_out[v], axis=-1)
                loss = -np.sum(q * log_probs, axis=-1)  # shape: [B]
                total_loss += np.mean(loss)
                n_loss_terms += 1

        total_loss /= n_loss_terms

        self.update_center(teacher_output)

        return total_loss

    def update_center(self, teacher_output):
        """
        Exponential moving average update of the center.
        teacher_output: shape [2B, out_dim]
        """
        batch_center = np.mean(teacher_output, axis=0, keepdims=True)
        #print('batch_center: ',batch_center.shape, batch_center)
        self.center = self.center * self.center_momentum + \
                      batch_center * (1 - self.center_momentum)



In [None]:
np.random.seed(0)
B, out_dim = 4, 10
ncrops = 6

student_output = np.random.randn(B * ncrops, out_dim)
teacher_output = np.random.randn(B * 2, out_dim)
epoch = 5

dino_loss = DINOLossNumpy(
    out_dim=out_dim,
    ncrops=ncrops,
    warmup_teacher_temp=0.04,
    teacher_temp=0.07,
    warmup_teacher_temp_epochs=10,
    nepochs=100
)

loss = dino_loss.forward(student_output, teacher_output, epoch)
print("Loss:", loss)


Student Out: 6 [[ 17.64052346   4.00157208   9.78737984  22.40893199  18.6755799
   -9.7727788    9.50088418  -1.51357208  -1.03218852   4.10598502]
 [  1.44043571  14.54273507   7.61037725   1.21675016   4.43863233
    3.33674327  14.94079073  -2.05158264   3.13067702  -8.54095739]
 [-25.52989816   6.53618595   8.64436199  -7.4216502   22.69754624
  -14.54365675   0.45758517  -1.8718385   15.32779214  14.6935877 ]
 [  1.54947426   3.7816252   -8.87785748 -19.80796468  -3.47912149
    1.56348969  12.30290681  12.02379849  -3.87326817  -3.02302751]]
batch_center:  (1, 10) [[-0.38563021 -0.1221811   0.21694348 -0.151761   -0.22178317  0.02449219
  -0.10994892 -0.02136662 -0.47691968  0.33885894]]
Loss: 15.40925833750359


In [None]:
import numpy as np

class DinoLoss:
  def __init__(self, ncrops, embed_dim, teach_temp, teach_temp_warm, teach_temp_epochs, nepochs, momentum=0.9,
               stud_temp=0.1):
    self.ncrops = ncrops # number of image crops
    self.embed_dim = embed_dim #feature dimension from vit
    self.tea_temp = teach_temp # temprature of teacher
    self.tea_temp_warm = teach_temp_warm # minimum temprature of teacher
    self.tea_temp_epochs = teach_temp_epochs # num epoch to schedule techer temprature
    self.nepo = nepochs # total number of training epochs
    self.mom = momentum #momentum to maintain moving average of teacher crops mean
    self.stud_temp = stud_temp # temprature of student

    self.center = np.zeros((1, embed_dim)) # center for teacher output of size [2B, embed_dim]
    self.tea_temp_schedule = np.concatenate((
        np.linspace(teach_temp_warm, teach_temp, teach_temp_epochs),
        np.ones(nepochs-teach_temp_epochs)*teach_temp)) #schedule teacher temprature for epoches

  def softmax(self, x, axis=-1):
      x = x - np.max(x, axis=axis, keepdims=True)  # for numerical stability
      exp_x = np.exp(x)
      return exp_x / np.sum(exp_x, axis=axis, keepdims=True)

  def log_softmax(self, x, axis=-1):
      x = x - np.max(x, axis=axis, keepdims=True)  # for numerical stability
      logsumexp = np.log(np.sum(np.exp(x), axis=axis, keepdims=True))
      return x - logsumexp

  def forward(self, stud_out, tech_out, epoch):
      '''
      studen_out shape [B*ncrops, embed_dim]
      teacher_out shape [B, embed_dim]
      '''
      # Process student output
      stud_out = stud_out/self.stud_temp
      stud_out = np.array_split(stud_out, self.ncrops, axis=0) # list of ncrops of size [B, embed_dim]

      #process teacher outputs
      temp = self.tea_temp_schedule[epoch]
      tech_out = self.softmax((tech_out-self.center)/temp, axis=-1)
      tech_out = np.array_split(tech_out, 2, axis=0) # list of 2 crops of size [B, embed_dim]

      total_loss = 0.0
      num_loss = 0


      for iq, q in enumerate(tech_out): # iterate over two crops of teacher
        for v in range(len(stud_out)): # iterate iver n crops of student
          if v == iq: # skip self similarity
            continue

          log_prob = self.log_softmax(stud_out[v], axis=-1) # compute log(softmax(stud_out[v]))
          loss = -np.sum(q*log_prob, axis=-1) # compute cross_enropy (-softmax(tech_out) * log(softmax(stud_out[v])))
          total_loss += np.mean(loss) # compute mean of batch
          num_loss += 1

      total_loss /= num_loss

      self.update_center(tech_out)

      return total_loss

  def update_center(self, tech_out):
      '''
      tech_out shape [2B, embed_dim]
      '''
      batch_center = np.mean(tech_out, axis=0, keepdims=True)
      self.center = self.center*self.mom + batch_center*(1-self.mom)


In [None]:
np.random.seed(0)
B, out_dim = 4, 10
ncrops = 6

student_output = np.random.randn(B * ncrops, out_dim)
teacher_output = np.random.randn(B * 2, out_dim)
epoch = 5

dino_loss = DinoLoss(
    embed_dim=out_dim,
    ncrops=ncrops,
    teach_temp_warm=0.04,
    teach_temp=0.07,
    teach_temp_epochs=10,
    nepochs=100
)

loss = dino_loss.forward(student_output, teacher_output, epoch)
print("Loss:", loss)


Loss: 15.40925833750359


In [None]:
print(np.sum(target*logits, axis=-1))

[-0.5 -0.3 -0.7]
