In [1]:
import sys
sys.path.insert(0, './')
sys.path.insert(0, '../')
import numpy as np
import torch.utils.data as data
from PIL import Image
import torch
import os
import torchvision.transforms as transforms
# from dataloaders.helper import CutoutPIL
from randaugment import RandAugment

from PIL import ImageDraw
import numpy as np
import random

# __all__ = ['CutoutPIL']


class CutoutPIL(object):
    def __init__(self, cutout_factor=0.5):
        self.cutout_factor = cutout_factor

    def __call__(self, x):
        img_draw = ImageDraw.Draw(x)
        h, w = x.size[0], x.size[1]  # HWC
        h_cutout = int(self.cutout_factor * h + 0.5)
        w_cutout = int(self.cutout_factor * w + 0.5)
        y_c = np.random.randint(h)
        x_c = np.random.randint(w)

        y1 = np.clip(y_c - h_cutout // 2, 0, h)
        y2 = np.clip(y_c + h_cutout // 2, 0, h)
        x1 = np.clip(x_c - w_cutout // 2, 0, w)
        x2 = np.clip(x_c + w_cutout // 2, 0, w)
        fill_color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
        img_draw.rectangle([x1, y1, x2, y2], fill=fill_color)

        return x

class foodseg103(data.Dataset):
    def __init__(self, root, data_split, img_size=224, p=1, annFile="", label_mask=None, partial=1+1e-6):
        # data_split = train / val
        self.root = root
        self.classnames = ["candy", "egg tart", "french fries", "chocolate", "biscuit", "popcorn", "pudding", "ice cream", "cheese butter", "cake", "wine", "milkshake", "coffee", "juice", "milk", "tea", "almond", "red beans", "cashew", "dried cranberries", "soy", "walnut", "peanut", "egg", "apple", "date", "apricot", "avocado", "banana", "strawberry", "cherry", "blueberry", "raspberry", "mango", "olives", "peach", "lemon", "pear", "fig", "pineapple", "grape", "kiwi", "melon", "orange", "watermelon", "steak", "pork", "chicken duck", "sausage", "fried meat", "lamb", "sauce", "crab", "fish", "shellfish", "shrimp", "soup", "bread", "corn", "hamburg", "pizza", "hanamaki baozi", "wonton dumplings", "pasta", "noodles", "rice", "pie", "tofu", "eggplant", "potato", "garlic", "cauliflower", "tomato", "kelp", "seaweed", "spring onion", "rape", "ginger", "okra", "lettuce", "pumpkin", "cucumber", "white radish", "carrot", "asparagus", "bamboo shoots", "broccoli", "celery stick", "cilantro mint", "snow peas", "cabbage", "bean sprouts", "onion", "pepper", "green beans", "French beans", "king oyster mushroom", "shiitake", "enoki mushroom", "oyster mushroom", "white button mushroom", "salad", "other ingredients"]
        
        self.data_split = data_split
        if data_split == 'trainval':
            self.labels_lab = np.load('/home/samyakr2/food/FoodSeg103/train_labels.npy', allow_pickle=True).item()
        
        if data_split == 'test':
            self.labels_lab = np.load('/home/samyakr2/food/FoodSeg103/test_labels.npy', allow_pickle=True).item()
            
        
        if annFile == "":
            self.annFile = os.path.join(self.root, 'Annotations')
        else:
            raise NotImplementedError

        image_list_file = os.path.join('/home/samyakr2/food/FoodSeg103', 'ImageSets', '%s.txt' % data_split)

        with open(image_list_file) as f:
            image_list = f.readlines()
        self.image_list = [a.strip() for a in image_list]

        
        if data_split == 'Train':
            num_examples = len(self.image_list)
            pick_example = int(num_examples * p)
            self.image_list = self.image_list[:pick_example]
        else:
            self.image_list = self.image_list

        train_transform = transforms.Compose([
            # transforms.RandomResizedCrop(img_size)
            transforms.Resize((img_size, img_size)),
            CutoutPIL(cutout_factor=0.25),
            RandAugment(),
            transforms.ToTensor(),
            transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
        ])
        test_transform = transforms.Compose([
            # transforms.CenterCrop(img_size),
            transforms.Resize((img_size, img_size)),
            transforms.ToTensor(),
            transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
        ])

        if self.data_split == 'trainval':
            self.transform = train_transform
        elif self.data_split == 'test':
            self.transform = test_transform
        else:
            raise ValueError('data split = %s is not supported in Nus Wide' % self.data_split)

        # create the label mask
        self.mask = None
        self.partial = partial
        if data_split == 'trainval' and partial < 1.:
            if label_mask is None:
                rand_tensor = torch.rand(len(self.image_list), len(self.classnames))
                mask = (rand_tensor < partial).long()
                mask = torch.stack([mask], dim=1)
                torch.save(mask, os.path.join(self.root, 'Annotations', 'partial_label_%.2f.pt' % partial))
            else:
                mask = torch.load(os.path.join(self.root, 'Annotations', label_mask))
            self.mask = mask.long()

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, index):
        
        if self.data_split == 'trainval':
                si = self.data_split[:-3]
        else:
            si = self.data_split
        
        img_path = os.path.join('/home/samyakr2/food/FoodSeg103', 'Images/img_dir/', si+'/', self.image_list[index])
        img = Image.open(img_path).convert('RGB')
        label_vector = self.labels_lab[self.image_list[index][:-4]]      
        targets = label_vector[1:].long()
        target = targets[None, ]
        if self.mask is not None:
            masked = - torch.ones((1, len(self.classnames)), dtype=torch.long)
            target = self.mask[index] * target + (1 - self.mask[index]) * masked

        if self.transform is not None:
            img = self.transform(img)

        return img, target

    def name(self):
        return 'foodseg103'


In [2]:
train_data = foodseg103(root = '', data_split = 'trainval', img_size = 224,
                                         p=1, annFile='',
                                         label_mask=None,
                                         partial=1)

test_data = foodseg103(root = '', data_split = 'test', img_size = 224,
                                         p=1, annFile='',
                                         label_mask=None,
                                         partial=1)

In [3]:
image_list_file = os.path.join('/home/samyakr2/food/FoodSeg103', 'ImageSets', 'trainval.txt')
# image_list_file = os.path.join('/home/samyakr2/food/FoodSeg103', 'ImageSets', '%s.txt' % data_split)

image_list = []
with open(image_list_file) as f:
    image_list = f.readlines()
image_list = [a.strip() for a in image_list]

labels_lab = np.load('/home/samyakr2/food/FoodSeg103/train_labels.npy', allow_pickle=True).item()
labels_lab_val = np.load('/home/samyakr2/food/FoodSeg103/test_labels.npy', allow_pickle=True).item()

In [4]:
for val in labels_lab.values():
    print(val.shape)
    print(val[1:].shape)
    
    break

torch.Size([104])
torch.Size([103])


In [5]:
# # # for val in labels_lab.values():

# # # Initialize co-occurrence matrix
# num_labels = 103
# co_occurrence_matrix = np.zeros((num_labels, num_labels))

# # Loop over each multi-label vector
# for vector in labels_lab_val.values():
#     vector_no_background = vector[1:]
#     # Iterate over pairs of labels
#     for i in range(num_labels):
#         for j in range(i, num_labels):
#             # If both labels co-occur in this vector, update the co-occurrence matrix
#             if vector_no_background[i] == 1 and vector_no_background[j] == 1:
#                 co_occurrence_matrix[i, j] += 1
#                 co_occurrence_matrix[j, i] += 1  # Since it's symmetric

# # Print or use the co-occurrence matrix as needed
# print(co_occurrence_matrix)


In [6]:
# co_occurrence_matrix.shape

In [7]:
# rows, cols = co_occurrence_matrix.shape

# # Iterate over the diagonal elements and divide them by 2
# for i in range(min(rows, cols)):
#     co_occurrence_matrix[i, i] /= 2

In [117]:
co_occurrence_matrix_test[:10, :10]

array([[ 11.,   0.,   0.,   1.,   2.,   1.,   0.,   1.,   0.,   2.],
       [  0.,   1.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,  78.,   0.,   0.,   0.,   0.,  14.,  11.,   0.],
       [  1.,   0.,   0.,  18.,   3.,   0.,   0.,   3.,   0.,   8.],
       [  2.,   0.,   0.,   3.,  77.,   0.,   0.,  17.,   6.,   1.],
       [  1.,   0.,   0.,   0.,   0.,   4.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   1.,   0.,   0.,   0.],
       [  1.,   0.,  14.,   3.,  17.,   0.,   0., 277.,  10.,  26.],
       [  0.,   0.,  11.,   0.,   6.,   0.,   0.,  10., 120.,   2.],
       [  2.,   0.,   0.,   8.,   1.,   0.,   0.,  26.,   2., 126.]])

In [118]:
co_occurrence_matrix_train = np.load('/home/samyakr2/SHOP/co_occurrence_matrix.npy')
co_occurrence_matrix_train[:10, :10]

array([[ 21.,   0.,   0.,   1.,   9.,   3.,   0.,   0.,   2.,   3.],
       [  0.,   3.,   0.,   0.,   1.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0., 166.,   0.,   5.,   1.,   0.,  21.,  16.,   0.],
       [  1.,   0.,   0.,  49.,   9.,   1.,   0.,  15.,   0.,  18.],
       [  9.,   1.,   5.,   9., 218.,   3.,   0.,  47.,  14.,   3.],
       [  3.,   0.,   1.,   1.,   3.,  10.,   0.,   1.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   4.,   0.,   0.,   0.],
       [  0.,   0.,  21.,  15.,  47.,   1.,   0., 636.,  18.,  82.],
       [  2.,   0.,  16.,   0.,  14.,   0.,   0.,  18., 276.,  13.],
       [  3.,   0.,   0.,  18.,   3.,   0.,   0.,  82.,  13., 333.]])

In [116]:
np.save('/home/samyakr2/SHOP/co_occurrence_matrix_val.npy', co_occurrence_matrix)

In [8]:
# import numpy as np

# # Load the co-occurrence matrix
# co_occurrence_matrix = np.load('/home/samyakr2/SHOP/co_occurrence_matrix.npy')

# # Get the diagonal values of the matrix
# diagonal_values = np.diag(co_occurrence_matrix)

# # Divide each row by its corresponding diagonal value
# normalized_co_occurrence_matrix = co_occurrence_matrix / diagonal_values[:, None]

# # Print or use the normalized co-occurrence matrix as needed
# print(normalized_co_occurrence_matrix)

In [9]:
# np.save('/home/samyakr2/SHOP/normalized_foodseg103_co_occurrence_matrix.npy', normalized_co_occurrence_matrix)

In [8]:
index_counts = {i: 0 for i in range(104)}

for file in image_list:
    key = file[:-4]  # Extract the key from the filename
    if key in labels_lab:  # Check if the key exists in the labels_lab dictionary
        label_tensor = labels_lab[key]
        indices_gt_zero = torch.nonzero(label_tensor > 0).squeeze()
        if indices_gt_zero.dim() == 0:
            indices_gt_zero = indices_gt_zero.unsqueeze(0)
        for index in indices_gt_zero:
            index_counts[index.item()] += 1
index_counts

{0: 0,
 1: 21,
 2: 3,
 3: 166,
 4: 49,
 5: 218,
 6: 10,
 7: 4,
 8: 636,
 9: 276,
 10: 333,
 11: 96,
 12: 80,
 13: 127,
 14: 129,
 15: 45,
 16: 24,
 17: 70,
 18: 20,
 19: 13,
 20: 18,
 21: 32,
 22: 41,
 23: 7,
 24: 223,
 25: 106,
 26: 7,
 27: 22,
 28: 57,
 29: 100,
 30: 388,
 31: 159,
 32: 152,
 33: 47,
 34: 43,
 35: 43,
 36: 73,
 37: 423,
 38: 30,
 39: 21,
 40: 94,
 41: 123,
 42: 38,
 43: 20,
 44: 183,
 45: 29,
 46: 728,
 47: 474,
 48: 848,
 49: 195,
 50: 154,
 51: 71,
 52: 818,
 53: 14,
 54: 258,
 55: 30,
 56: 99,
 57: 67,
 58: 991,
 59: 343,
 60: 5,
 61: 54,
 62: 19,
 63: 9,
 64: 114,
 65: 187,
 66: 464,
 67: 397,
 68: 48,
 69: 19,
 70: 785,
 71: 73,
 72: 157,
 73: 790,
 74: 1,
 75: 7,
 76: 133,
 77: 54,
 78: 23,
 79: 15,
 80: 403,
 81: 72,
 82: 369,
 83: 36,
 84: 881,
 85: 187,
 86: 3,
 87: 704,
 88: 164,
 89: 636,
 90: 48,
 91: 94,
 92: 24,
 93: 374,
 94: 305,
 95: 152,
 96: 253,
 97: 10,
 98: 82,
 99: 6,
 100: 7,
 101: 99,
 102: 12,
 103: 155}

In [9]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=32,
                                             shuffle=True,
                                             num_workers=3, pin_memory=True)

test_loader = torch.utils.data.DataLoader(test_data, batch_size=100,
                                             shuffle=False,
                                             num_workers=3, pin_memory=True)

## MODEL PREP

In [10]:
import os
import clip
import torch
from PIL import Image
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
import pandas as pd

import itertools
import numpy as np
import copy
import shutil

In [11]:
device = "cuda" if torch.cuda.is_available() else "cpu"

print(clip.available_models())
clip_model, preprocess = clip.load('RN101', device)
clip_model = clip_model#.float()

['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14', 'ViT-L/14@336px']


In [6]:
# labels_food = ["candy", "egg tart", "french fries", "chocolate", "biscuit", "popcorn", "pudding", "ice cream", "cheese butter", "cake", "wine", "milkshake", "coffee", "juice", "milk", "tea", "almond", "red beans", "cashew", "dried cranberries", "soy", "walnut", "peanut", "egg", "apple", "date", "apricot", "avocado", "banana", "strawberry", "cherry", "blueberry", "raspberry", "mango", "olives", "peach", "lemon", "pear", "fig", "pineapple", "grape", "kiwi", "melon", "orange", "watermelon", "steak", "pork", "chicken duck", "sausage", "fried meat", "lamb", "sauce", "crab", "fish", "shellfish", "shrimp", "soup", "bread", "corn", "hamburg", "pizza", "hanamaki baozi", "wonton dumplings", "pasta", "noodles", "rice", "pie", "tofu", "eggplant", "potato", "garlic", "cauliflower", "tomato", "kelp", "seaweed", "spring onion", "rape", "ginger", "okra", "lettuce", "pumpkin", "cucumber", "white radish", "carrot", "asparagus", "bamboo shoots", "broccoli", "celery stick", "cilantro mint", "snow peas", "cabbage", "bean sprouts", "onion", "pepper", "green beans", "French beans", "king oyster mushroom", "shiitake", "enoki mushroom", "oyster mushroom", "white button mushroom", "salad", "other ingredients"]

# items_food = ["A photo of a " + item + ', a type of food' for item in labels_food]

# text = clip.tokenize(items_food).to(device)
# text_features = clip_model.encode_text(text)
# text_features /= text_features.norm(dim=-1, keepdim=True)

# text_features_path = '/home/samyakr2/SHOP/foodseg103_labels.pt'
# torch.save(text_features, text_features_path)

import torch
import numpy as np
text_features_path = '/home/samyakr2/SHOP/foodseg103_labels.pt'
text_features = torch.load(text_features_path).to(torch.float32)
similarity_text = (text_features @ text_features.T)
print(similarity_text.shape)
torch.save(similarity_text, '/home/samyakr2/SHOP/foodseg103_relation.pt')
np.save('/home/samyakr2/SHOP/foodseg103_relation.npy', similarity_text.detach().cpu().numpy())


torch.Size([103, 103])


In [13]:
a = np.load('/home/samyakr2/SHOP/relation+coco.npy')
a.shape

relation = torch.Tensor(a)
        
_ ,max_idx = torch.topk(relation, 50)
mask = torch.ones_like(relation).type(torch.bool)
for i, idx in enumerate(max_idx):
    mask[i][idx] = 0
relation[mask] = 0
sparse_mask = mask
dialog = torch.eye(80).type(torch.bool)
relation[dialog] = 0
relation = relation / torch.sum(relation, dim=1).reshape(-1, 1) * 0.2
relation[dialog] = 1-0.2

gcn_relation = relation.clone()
gcn_relation.shape

torch.Size([80, 80])

In [13]:
def get_features(dataloader):
    all_features_batches = []
    all_labels_batches = []
    for images, labels in dataloader:
        features = clip_model.encode_image(images.to(device))
        all_features_batches.append(features.detach())
        all_labels_batches.append(labels)
    return all_features_batches, all_labels_batches

# train_features, train_labels = get_features(train_loader)
# val_features, val_labels = get_features(test_loader)

In [14]:
# train_features_path = '/home/samyakr2/SHOP/foodseg103_train_img.pt'
# torch.save(train_features, train_features_path)

# train_labels_path = '/home/samyakr2/SHOP/foodseg103_train_label.pt'
# torch.save(train_labels, train_labels_path)

# val_features_path = '/home/samyakr2/SHOP/foodseg103_test_img.pt'
# torch.save(val_features, val_features_path)

# val_labels_path = '/home/samyakr2/SHOP/foodseg103_test_label.pt'
# torch.save(val_labels, val_labels_path)

In [15]:
train_features_path = '/home/samyakr2/SHOP/foodseg103_train_img.pt'
train_features = torch.load(train_features_path)

train_labels_path = '/home/samyakr2/SHOP/foodseg103_train_label.pt'
train_labels = torch.load(train_labels_path)

val_features_path = '/home/samyakr2/SHOP/foodseg103_test_img.pt'
val_features = torch.load(val_features_path)

val_labels_path = '/home/samyakr2/SHOP/foodseg103_test_label.pt'
val_labels = torch.load(val_labels_path)

In [16]:
# train_labels[0][0]

In [17]:
class clip_2fc(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(clip_2fc, self).__init__()
        
        self.fc1 = nn.Sequential(
            nn.Linear(input_dim, output_dim,bias=False)
        )
        
#         self.fc1 = nn.Linear(input_dim, hidden_dim),
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.softmax = nn.Softmax(dim=1)
        self.dropout = nn.Dropout(0.3)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
#         out = self.relu(out)
#         out = self.fc2(out)
#         out = self.sigmoid(out)
        return out

In [18]:
class AsymmetricLoss(nn.Module):
    def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-6, disable_torch_grad_focal_loss=True):
        super(AsymmetricLoss, self).__init__()

        self.gamma_neg = gamma_neg
        self.gamma_pos = gamma_pos
        self.clip = clip
        self.disable_torch_grad_focal_loss = disable_torch_grad_focal_loss
        self.eps = eps
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x, y):
        """"
        Parameters
        ----------
        x: input logits
        y: targets (multi-label binarized vector)
        """
        # Calculating Probabilities
        x_sigmoid = torch.sigmoid(x)
        xs_pos = x_sigmoid
        xs_neg = 1 - x_sigmoid
        
        
#         print('X.shape', x.shape)
#         print('Y shape',y.shape)
#         x_softmax = self.softmax(x)
#         print()
#         xs_pos = x_softmax[:, 1, :]
#         xs_neg = x_softmax[:, 0, :]
#         y = y.reshape(-1)
#         xs_pos = xs_pos.reshape(-1)
#         xs_neg = xs_neg.reshape(-1)

#         xs_pos = xs_pos[y!=-1]
#         xs_neg = xs_neg[y!=-1]
#         y = y[y!=-1]

        # Asymmetric Clipping
        if self.clip is not None and self.clip > 0:
            xs_neg = (xs_neg + self.clip).clamp(max=1)

        # Basic CE calculation
        los_pos = y * torch.log(xs_pos.clamp(min=self.eps))
        los_neg = (1 - y) * torch.log(xs_neg.clamp(min=self.eps))
        loss = los_pos + los_neg


        # Asymmetric Focusing
        if self.gamma_neg > 0 or self.gamma_pos > 0:
            if self.disable_torch_grad_focal_loss:
                torch.set_grad_enabled(False)
            pt0 = xs_pos * y
            pt1 = xs_neg * (1 - y)  # pt = p if t > 0 else 1-p
            pt = pt0 + pt1
            one_sided_gamma = self.gamma_pos * y + self.gamma_neg * (1 - y)
            one_sided_w = torch.pow(1 - pt, one_sided_gamma)
            if self.disable_torch_grad_focal_loss:
                torch.set_grad_enabled(True)
            loss *= one_sided_w

        return -loss.sum()

In [47]:
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts

input_size = train_features[0].size(1)  
hidden_size = 200
num_classes = 103 # len(labels_food)
model = clip_2fc(input_size, hidden_size, num_classes).to(device)

lr = 0.002
max_epochs = 50
warmup_epochs = 1
warmup_constant_lr = 1e-5

optimizer = torch.optim.SGD(model.parameters(), lr = lr)
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=warmup_epochs, T_mult=1, eta_min=warmup_constant_lr)

criterion = AsymmetricLoss(2, 1) # Y_neg = 2, Y_pos = 1

In [18]:
num_epochs = 250
len_labels = 103


gamma = torch.ones(len_labels) / len_labels
gamma = gamma.to(device)

alpha = np.zeros(num_epochs)

for epoch in range(num_epochs):
    
    epoch_loss = 0.0
    for features_batch, labels_batch in zip(train_features, train_labels):
        # Flatten features batch
        
    
        features_batch = features_batch.view(features_batch.size(0), -1).to(torch.float32)
        gamma = gamma.detach()
        
        # Convert labels to tensor
        labels_tensor = labels_batch.type(torch.float32).to(device)#torch.tensor(labels_batch, dtype=torch.float32)#
        labels_tensor = labels_tensor.squeeze(dim=1)
        # Forward pass
        outputs = model(features_batch.to(device))
        similarity_text = (text_features @ text_features.T)
        normalized_similarity_text = F.normalize(similarity_text, p=2, dim=1)  # Normalize along the second dimension (rows)
        
        normalized_similarity_text = torch.clamp(normalized_similarity_text, min=0, max=1)  # Clamp values to be between 0 and 1
        normalized_similarity_with_gamma = normalized_similarity_text * gamma

        outputs_reshaped = outputs.unsqueeze(1)
        
        result = torch.sum(outputs_reshaped * normalized_similarity_with_gamma.unsqueeze(0), dim =2)
        pred = result
        
        
        
#         labels_tensor_gamma = labels_tensor * 2 - 1
        r = torch.sum(gamma * labels_tensor * torch.sigmoid(pred))#/len(label_tensor_gamma)
#         r = torch.sum(gamma * labels_tensor_gamma * torch.tanh(pred)) / len(label_tensor_gamma)
        
#         print(r)
#         print('-'*25)
        
        a = 0.5 * torch.log((1 + r) / (1 - r))

        gamma = gamma * torch.exp(-a * labels_tensor * torch.sigmoid(pred))
#         gamma = gamma * torch.exp(-a * labels_tensor_gamma * torch.tanh(pred))#torch.sigmoid(pred))
        sum_val = torch.sum(gamma)
        gamma = gamma / sum_val
        gamma = torch.min(gamma, dim=0).values
        
#         print(gamma)
#         print('*'*25)
        
        
        loss = criterion(pred, labels_tensor)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
#         print("=="*50)
    
    scheduler.step()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss}")
#     if epoch_loss < best_loss:
#         best_loss = epoch_loss
#         best_model_state_dict = model.state_dict()
        
#     if (epoch + 1) % 100 == 0:
#         torch.save(best_model_state_dict, f"/home/samyakr2/food_seg/weights/best_epoch_{epoch+1+5000}.pth")


Epoch [1/250], Loss: 66155.56857299805
Epoch [2/250], Loss: 66150.37854003906
Epoch [3/250], Loss: 66135.43002319336
Epoch [4/250], Loss: 66123.06134033203
Epoch [5/250], Loss: 66112.13363647461
Epoch [6/250], Loss: 66102.05670166016
Epoch [7/250], Loss: 66092.50286865234
Epoch [8/250], Loss: 66083.2783203125
Epoch [9/250], Loss: 66074.26190185547
Epoch [10/250], Loss: 66065.3734741211
Epoch [11/250], Loss: 66056.55975341797
Epoch [12/250], Loss: 66047.78259277344
Epoch [13/250], Loss: 66039.01550292969
Epoch [14/250], Loss: 66030.23910522461
Epoch [15/250], Loss: 66021.43859863281
Epoch [16/250], Loss: 66012.60360717773
Epoch [17/250], Loss: 66003.72546386719
Epoch [18/250], Loss: 65994.7984008789
Epoch [19/250], Loss: 65985.81677246094
Epoch [20/250], Loss: 65976.77853393555
Epoch [21/250], Loss: 65967.67904663086
Epoch [22/250], Loss: 65958.51742553711
Epoch [23/250], Loss: 65949.29098510742
Epoch [24/250], Loss: 65939.9994506836
Epoch [25/250], Loss: 65930.64126586914
Epoch [26/250

Epoch [204/250], Loss: 63439.58142089844
Epoch [205/250], Loss: 63422.423400878906
Epoch [206/250], Loss: 63405.23944091797
Epoch [207/250], Loss: 63388.02951049805
Epoch [208/250], Loss: 63370.79364013672
Epoch [209/250], Loss: 63353.53286743164
Epoch [210/250], Loss: 63336.24639892578
Epoch [211/250], Loss: 63318.93444824219
Epoch [212/250], Loss: 63301.597229003906
Epoch [213/250], Loss: 63284.23501586914
Epoch [214/250], Loss: 63266.84811401367
Epoch [215/250], Loss: 63249.43566894531
Epoch [216/250], Loss: 63231.99899291992
Epoch [217/250], Loss: 63214.5373840332
Epoch [218/250], Loss: 63197.051330566406
Epoch [219/250], Loss: 63179.541595458984
Epoch [220/250], Loss: 63162.0075378418
Epoch [221/250], Loss: 63144.44869995117
Epoch [222/250], Loss: 63126.86633300781
Epoch [223/250], Loss: 63109.25988769531
Epoch [224/250], Loss: 63091.62857055664
Epoch [225/250], Loss: 63073.97393798828
Epoch [226/250], Loss: 63056.296295166016
Epoch [227/250], Loss: 63038.59469604492
Epoch [228/25

In [19]:
import numpy as np
from sklearn.metrics import average_precision_score

# Define a function for testing the model

def average_precision(output, target):
    epsilon = 1e-8

    # sort examples
    indices = output.argsort()[::-1]
    # Computes prec@i
    total_count_ = np.cumsum(np.ones((len(output), 1)))

    target_ = target[indices]
    ind = target_ == 1
    pos_count_ = np.cumsum(ind)
    total = pos_count_[-1]
    pos_count_[np.logical_not(ind)] = 0
    pp = pos_count_ / total_count_
    precision_at_i_ = np.sum(pp)
    precision_at_i = precision_at_i_ / (total + epsilon)

    return precision_at_i


def mAP(targs, preds):
    """Returns the model's average precision for each class
    Return:
        ap (FloatTensor): 1xK tensor, with avg precision for each class k
    """

    if np.size(preds) == 0:
        return 0
    ap = np.zeros((preds.shape[1]))
    # compute average precision for each class
    for k in range(preds.shape[1]):
        # sort scores
        scores = preds[:, k]
        targets = targs[:, k]
        # compute average precision
        ap[k] = average_precision(scores, targets)
    return ap.mean()

def test_model(model,text_features,criterion, features_batches, labels_batches, device):
    model.eval()  # Set the model to evaluation mode
    test_loss = 0.0
    all_labels = []
    all_outputs = []
    with torch.no_grad():  # Disable gradient computation
        for features_batch, labels_batch in zip(features_batches, labels_batches):
            # Move batch to device
            features_batch = features_batch.to(device).to(torch.float32)
            labels_tensor =  labels_batch.type(torch.float32).to(device)#torch.tensor(labels_batch, dtype=torch.float32).to(device)
            labels_tensor = labels_tensor.squeeze(dim=1)
            # Flatten features batch
            features_batch = features_batch.view(features_batch.size(0), -1)

            # Forward pass
            outputs = model(features_batch)
            similarity_text = (text_features @ text_features.T)
            
            normalized_similarity_text = F.normalize(similarity_text, p=2, dim=1)
            normalized_similarity_text = torch.clamp(normalized_similarity_text, min=0, max=1)  # Clamp values to be between 0 and 1
            normalized_similarity_with_gamma = normalized_similarity_text * gamma
#           
            outputs_reshaped = outputs.unsqueeze(1)
            result = torch.sum(outputs_reshaped * normalized_similarity_with_gamma.unsqueeze(0), dim =2)
            pred = result
        
            loss = criterion(pred, labels_tensor)
            test_loss += loss.item()

            # Convert outputs and labels to numpy arrays
            outputs_np = torch.sigmoid(pred).cpu().detach().numpy()
            labels_np = labels_tensor.cpu().detach().numpy()

            all_outputs.append(outputs_np)
            all_labels.append(labels_np)

    # Concatenate outputs and labels
    all_outputs = np.concatenate(all_outputs)
    all_labels = np.concatenate(all_labels)
#     print(all_labels)
    # Compute average precision score
    avg_precision = average_precision_score(all_labels, all_outputs, average='micro')
    answer_to_ = mAP(all_labels, all_outputs)
    # Average test loss
    avg_test_loss = test_loss / len(features_batches)
#     print(f"Test Loss: {avg_test_loss}")
    print(f"Average Precision Score: {avg_precision}")
    print(answer_to_)

# criterion = nn.BCELoss()  
test_model(model, text_features, criterion, val_features, val_labels, device)

# for i in range (4900, 4901):
#     print(i)
#     best_model_state_dict = torch.load("/home/samyakr2/food_seg/weights/best_epoch_{}.pth".format(i))
#     model.load_state_dict(best_model_state_dict)

#     test_model(model, text_features, criterion, val_features, val_labels, device)


Average Precision Score: 0.0390861065527771
0.04106429523626541


In [20]:
gamma

tensor([1.9715e-04, 5.2011e-03, 2.1730e-09, 1.5006e-05, 6.4267e-11, 1.0832e-03,
        3.4567e-03, 3.4142e-13, 8.6322e-12, 6.5685e-12, 3.9179e-07, 5.4561e-07,
        6.2882e-09, 7.4810e-09, 4.6902e-06, 2.2173e-04, 2.8009e-06, 2.7194e-04,
        6.8238e-04, 6.0110e-04, 1.4515e-04, 1.4466e-05, 2.2478e-03, 1.1130e-10,
        7.6886e-08, 2.0788e-03, 3.1588e-04, 2.9790e-06, 4.0096e-08, 8.7602e-13,
        2.9217e-09, 5.8194e-09, 1.5598e-05, 1.2778e-05, 5.4028e-05, 2.7427e-06,
        1.2055e-12, 6.1414e-05, 2.1950e-04, 6.0362e-08, 6.4709e-09, 2.5571e-05,
        1.8665e-04, 7.0709e-10, 1.9597e-04, 2.5266e-13, 3.1833e-13, 3.1732e-13,
        3.0857e-10, 1.0982e-09, 3.7097e-07, 2.8054e-13, 9.0279e-04, 1.8956e-11,
        1.3256e-04, 8.9791e-08, 3.5011e-06, 2.5095e-13, 3.4066e-12, 2.7109e-03,
        2.1469e-06, 3.9705e-04, 1.3815e-03, 1.4227e-08, 1.2180e-10, 8.3105e-13,
        2.3588e-12, 6.9830e-06, 4.3593e-04, 2.5264e-13, 3.4759e-07, 5.7132e-09,
        2.5071e-13, 6.5412e-03, 2.1085e-

### Pretraining CLIP with FoodSeg

In [19]:
class clip_2fc(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(clip_2fc, self).__init__()
        
        self.fc1 = nn.Sequential(
            nn.Linear(input_dim, hidden_dim,bias=False)
        )
        
#         self.fc1 = nn.Linear(input_dim, hidden_dim),
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.softmax = nn.Softmax(dim=1)
        self.dropout = nn.Dropout(0.4)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)
#         out = self.sigmoid(out)
        return out

In [20]:
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
import numpy as np
from sklearn.metrics import average_precision_score

input_size = train_features[0].size(1)  
hidden_size = 256  # Define the size of the hidden layer
num_classes = 103 #len(train_labels[0][0])  # Assuming labels_batches is a list of lists of labels

model = clip_2fc(input_size, hidden_size, num_classes).to(device)

# # Define loss function and optimizer
# criterion = nn.BCELoss()  # Binary Cross-Entropy Loss for multilabel classification
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer with learning rate 0.001


lr = 0.002
warmup_epochs = 1
warmup_constant_lr = 1e-5

optimizer = torch.optim.SGD(model.parameters(), lr = lr)
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=warmup_epochs, T_mult=1, eta_min=warmup_constant_lr)

criterion = AsymmetricLoss(3, 1) # Y_neg = 2, Y_pos = 1

# Training loop
best_model_state_dict = None
best_loss = float('inf')
num_epochs = 500

# model.load_state_dict(torch.load("/home/samyakr2/SHOP/weights/best_clip_1000_model.pth"))


for epoch in range(num_epochs):
    epoch_loss = 0.0
    all_labels = []
    all_outputs = []
    
    for features_batch, labels_batch in zip(train_features, train_labels):
        # Flatten features batch
        features_batch = features_batch.view(features_batch.size(0), -1).to(torch.float32)

        # Convert labels to tensor
        labels_tensor = labels_batch.type(torch.float32)
        labels_tensor = labels_tensor.squeeze(dim=1)
        # Forward pass
        outputs = model(features_batch.to(device))

        # Compute loss
        loss = criterion(outputs, labels_tensor.to(device))

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        
        outputs_np = outputs.cpu().detach().numpy()
        labels_np = labels_tensor.cpu().detach().numpy()
        all_outputs.append(outputs_np)
        all_labels.append(labels_np)
    
    scheduler.step()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss}")
    
    all_outputs = np.concatenate(all_outputs)
    all_labels = np.concatenate(all_labels)   
    
    avg_precision = average_precision_score(all_labels, all_outputs, average=None) ## macro
    
    
    if epoch_loss < best_loss:
        best_loss = epoch_loss
        best_model_state_dict = model.state_dict()
        avg_p = avg_precision
        np.save('/home/samyakr2/SHOP/train_gamma.npy', avg_p )
    
    if (epoch + 1) % 10 == 0:
        torch.save(best_model_state_dict, f'/home/samyakr2/SHOP/weights/best_clip_{epoch + 1}_model.pth')
#     torch.save(best_model_state_dict, "/home/samyakr2/SHOP/weights/best_clip.pt")

In [21]:
import numpy as np
from sklearn.metrics import average_precision_score


def average_precision(output, target):
    epsilon = 1e-8

    # sort examples
    indices = output.argsort()[::-1]
    # Computes prec@i
    total_count_ = np.cumsum(np.ones((len(output), 1)))

    target_ = target[indices]
    ind = target_ == 1
    pos_count_ = np.cumsum(ind)
    total = pos_count_[-1]
    pos_count_[np.logical_not(ind)] = 0
    pp = pos_count_ / total_count_
    precision_at_i_ = np.sum(pp)
    precision_at_i = precision_at_i_ / (total + epsilon)

    return precision_at_i


def mAP(targs, preds):
    """Returns the model's average precision for each class
    Return:
        ap (FloatTensor): 1xK tensor, with avg precision for each class k
    """

    if np.size(preds) == 0:
        return 0
    ap = np.zeros((preds.shape[1]))
    # compute average precision for each class
    for k in range(preds.shape[1]):
        # sort scores
        scores = preds[:, k]
        targets = targs[:, k]
        # compute average precision
        ap[k] = average_precision(scores, targets)
    return ap.mean()


# Define a function for testing the model
def test_model(model, criterion, features_batches, labels_batches, device):
    model.eval()  # Set the model to evaluation mode
    test_loss = 0.0
    all_labels = []
    all_outputs = []
    with torch.no_grad():  # Disable gradient computation
        for features_batch, labels_batch in zip(features_batches, labels_batches):
            # Move batch to device
            features_batch = features_batch.to(device)
            labels_tensor = labels_batch.type(torch.float32).to(device)
            labels_tensor = labels_tensor.squeeze(dim=1)
        
            
            # Flatten features batch
            features_batch = features_batch.view(features_batch.size(0), -1).to(torch.float32)

            # Forward pass
            outputs = model(features_batch)

            # Compute loss
            loss = criterion(outputs, labels_tensor)

            test_loss += loss.item()

            # Convert outputs and labels to numpy arrays
            outputs_np = outputs.cpu().detach().numpy()
            labels_np = labels_tensor.cpu().detach().numpy()

            all_outputs.append(outputs_np)
            all_labels.append(labels_np)

    # Concatenate outputs and labels
    all_outputs = np.concatenate(all_outputs)
    all_labels = np.concatenate(all_labels)

    # Compute average precision score
    avg_precision = average_precision_score(all_labels, all_outputs, average=None) ## macro
    print(avg_precision.shape)
    answer_to_ = mAP(all_labels, all_outputs)
    # Average tes tt loss
    avg_test_loss = test_loss / len(features_batches)
#     print(f"Test Loss: {avg_test_loss}")
    print(f"Average Precision Score: {np.sort(avg_precision)}")
    print('mAP from ASL', answer_to_)
    return avg_precision

for idx in range (500, 501, 10):
    best_model_state_dict = torch.load("/home/samyakr2/SHOP/weights/best_clip_{}_model.pth".format(idx))
    model.load_state_dict(best_model_state_dict)
    test_model(model, criterion, val_features, val_labels, device)


(103,)
Average Precision Score: [0.00136986 0.00578993 0.00678111 0.00847336 0.01282051 0.02095238
 0.02464239 0.025      0.03395092 0.05473934 0.0645236  0.06596791
 0.07178542 0.07486631 0.07588336 0.08539285 0.12510152 0.15149363
 0.15447088 0.16210035 0.16671499 0.16696642 0.16949002 0.17234633
 0.18980296 0.20312516 0.2088276  0.22452355 0.22595115 0.22887079
 0.23622028 0.24575078 0.26797902 0.26938762 0.27631945 0.28448561
 0.28762665 0.2958524  0.29929771 0.30824569 0.31632006 0.32623679
 0.35233023 0.36568654 0.37032126 0.37661455 0.38211936 0.39241472
 0.40270365 0.42421301 0.42475103 0.44125499 0.44618137 0.45729493
 0.46189741 0.46870087 0.47222222 0.47538093 0.50655728 0.51424864
 0.53765019 0.5525414  0.55637779 0.56659405 0.57249389 0.59489728
 0.6051172  0.60819592 0.60857383 0.61128841 0.61846947 0.61943373
 0.62197336 0.64617177 0.64880952 0.66282791 0.6652537  0.66915115
 0.6716096  0.68157515 0.68191854 0.68498847 0.68723558 0.69222216
 0.72418845 0.72479237 0.72658

In [67]:
co_occurrence_matrix = torch.tensor(np.load('/home/samyakr2/SHOP/co_occurrence_matrix.npy')).to(device)
initial_ap = torch.tensor(np.load('/home/samyakr2/SHOP/train_gamma.npy')).to(device)

indices_less_than_0_3 = torch.nonzero(torch.lt(initial_ap, 0.3)).squeeze()
print(indices_less_than_0_3)
new_matrix = torch.zeros_like(co_occurrence_matrix)
# new_matrix[indices_less_than_0_3] = co_occurrence_matrix[indices_less_than_0_3]

top_neighbours = 1

for idx in indices_less_than_0_3:
    row = co_occurrence_matrix[idx]
    top_indices = torch.argsort(row, descending=True)[:top_neighbours]
    new_matrix[idx, top_indices] = row[top_indices]

for i in range(103):
    if i not in indices_less_than_0_3:
        new_matrix[i, i] = co_occurrence_matrix[i, i]

diagonal_values = torch.diag(new_matrix)
normalized_matrix = new_matrix / diagonal_values[:, None]
print(normalized_matrix)

tensor([ 18,  19,  33,  37,  59,  62,  68,  75,  89, 101, 102],
       device='cuda:0')
tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.]], device='cuda:0', dtype=torch.float64)


In [68]:
# best_model_state_dict = torch.load("/home/samyakr2/SHOP/weights/best_clip_500_model.pth")
# model.load_state_dict(best_model_state_dict)

# similarity_text = (text_features @ text_features.T)
# print(similarity_text)
# normalized_similarity_text = torch.clamp(similarity_text, min=0, max=1)

# indices_less_than_0_3 = torch.nonzero(torch.lt(initial_ap, 0.2)).squeeze()
# print(indices_less_than_0_3)
# new_matrix = torch.zeros_like(normalized_similarity_text)
# # new_matrix[indices_less_than_0_3] = co_occurrence_matrix[indices_less_than_0_3]

# top_neighbours = 50

# for idx in indices_less_than_0_3:
#     row = normalized_similarity_text[idx]
#     top_indices = torch.argsort(row, descending=True)[:top_neighbours]
#     new_matrix[idx, top_indices] = row[top_indices]

# for i in range(103):
#     if i not in indices_less_than_0_3:
#         new_matrix[i, i] = normalized_similarity_text[i, i]

# diagonal_values = torch.diag(new_matrix)
# normalized_matrix = new_matrix / diagonal_values[:, None]
# print(normalized_matrix)

In [81]:
best_model_state_dict = torch.load("/home/samyakr2/SHOP/weights/best_clip_500_model.pth")
model.load_state_dict(best_model_state_dict)

# similarity_text = (text_features @ text_features.T)
# normalized_similarity_text = F.normalize(similarity_text, p=2, dim=1)
# normalized_similarity_text = torch.clamp(similarity_text, min=0, max=1)
# normalized_similarity_text = normalized_similarity_text / 10

# Set diagonals to 1
# y = normalized_similarity_text.fill_diagonal_(1)

# normalized_co_occurrence_matrix = np.load('/home/samyakr2/SHOP/normalized_foodseg103_co_occurrence_matrix.npy')
# normalized_co_occurrence_matrix = torch.tensor(normalized_co_occurrence_matrix).to(device)

# y = torch.eye(len(normalized_co_occurrence_matrix)).to(device)

# normalized_matrix

model.eval()  # Set the model to evaluation mode
test_loss = 0.0
co_occurrence_matrix = torch.tensor(np.load('/home/samyakr2/SHOP/co_occurrence_matrix.npy')).to(device)
all_labels = []
all_outputs = []

with torch.no_grad(): # Disable gradient computation

        
#         indices_less_than_0_3 = torch.nonzero(torch.lt(initial_ap, 0.1)).squeeze()
#         new_matrix = torch.zeros_like(co_occurrence_matrix)
#         new_matrix[indices_less_than_0_3] = co_occurrence_matrix[indices_less_than_0_3]

#         for i in range(103):
#             if i not in indices_less_than_0_3:
#                 new_matrix[i, i] = co_occurrence_matrix[i, i]

#         diagonal_values = torch.diag(new_matrix)
#         normalized_matrix = new_matrix / diagonal_values[:, None]

    for features_batch, labels_batch in zip(val_features, val_labels):
        # Move batch to device
        features_batch = features_batch.to(device)
        labels_tensor = labels_batch.type(torch.float32).to(device)
        labels_tensor = labels_tensor.squeeze(dim=1)
        features_batch = features_batch.view(features_batch.size(0), -1).to(torch.float32)

            # Forward pass
        outputs = model(features_batch)
#         outputs = torch.sigmoid(outputs)
        outputs_reshaped = outputs.unsqueeze(1)
#         print(outputs_reshaped.shape)
#         print(normalized_similarity_text.unsqueeze(0).shape)
#         print((outputs_reshaped * normalized_similarity_text.unsqueeze(0)).shape)
#         result = torch.sum(outputs_reshaped * normalized_similarity_text.unsqueeze(0), dim =2)
#         result = torch.sum(outputs_reshaped * normalized_co_occurrence_matrix.unsqueeze(0), dim =2)
#         print('-'*5,torch.max(normalized_matrix), torch.min(normalized_matrix))
#         print(normalized_matrix.shape)
        result = torch.sum(outputs_reshaped * normalized_matrix.unsqueeze(0), dim =2)
#         print(torch.max(result), torch.min(result))
#         normalized_matrix
        pred = initial_ap*result
        print(torch.max(pred), torch.min(pred))
        print(torch.max(result), torch.min(result))
        print('-'*45)

        outputs_np = pred.cpu().detach().numpy()
        labels_np = labels_tensor.cpu().detach().numpy()

        all_outputs.append(outputs_np)
        all_labels.append(labels_np)


#         initial_ap*normalized_similarity_text*outputs
    all_outputs = np.concatenate(all_outputs)
    all_labels = np.concatenate(all_labels)

    avg_precision = average_precision_score(all_labels, all_outputs, average=None) ## macro
    print(np.sort(avg_precision)) 
    avg_precision = average_precision_score(all_labels, all_outputs, average='macro') ## macro
    print(avg_precision)
    print('-'*25)
#         initial_ap = torch.tensor(avg_precision)


tensor(3.7761, device='cuda:0', dtype=torch.float64) tensor(-5.0739, device='cuda:0', dtype=torch.float64)
tensor(4.0475, device='cuda:0', dtype=torch.float64) tensor(-5.4273, device='cuda:0', dtype=torch.float64)
---------------------------------------------
tensor(3.1462, device='cuda:0', dtype=torch.float64) tensor(-4.5082, device='cuda:0', dtype=torch.float64)
tensor(3.4567, device='cuda:0', dtype=torch.float64) tensor(-4.8754, device='cuda:0', dtype=torch.float64)
---------------------------------------------
tensor(4.4273, device='cuda:0', dtype=torch.float64) tensor(-5.3983, device='cuda:0', dtype=torch.float64)
tensor(4.7456, device='cuda:0', dtype=torch.float64) tensor(-5.7744, device='cuda:0', dtype=torch.float64)
---------------------------------------------
tensor(2.9875, device='cuda:0', dtype=torch.float64) tensor(-4.6397, device='cuda:0', dtype=torch.float64)
tensor(3.2823, device='cuda:0', dtype=torch.float64) tensor(-4.9732, device='cuda:0', dtype=torch.float64)
------

In [61]:
initial_ap = torch.tensor(np.load('/home/samyakr2/SHOP/train_gamma.npy')).to(device)
values, indices = initial_ap.sort()
indices

tensor([ 19,  59, 102, 101,  37,  18,  89,  75,  62,  68,  33,  42,  15,  96,
          8,  97,  82,  49,  70,  99,  78,  90,  34,  22,   1,  50,  76,  21,
         85,  26, 100,  91,  77,  32,  46,  52,  44,  41,  14,  92,  93,   0,
          3,  71,  25,  39,  16,  27,  48,  43,  38,  56,  80,  74,  24,  51,
         67,  17,  60,  61,  10,  66,  87,  35,  30,  53,   4,  13,   6,  88,
         20,  95,  81,   7,  63,  47,  79,  28,  23,  45,   5,  40,  11,  84,
         98,   2,  54,   9,  69,  36,  94,  57,  72,  31,  55,  65,  64,  12,
         58,  29,  86,  83,  73], device='cuda:0')

In [56]:
co_occurrence_matrix[19, :]

tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.2222, 0.0556, 0.0000, 0.1111, 0.0000,
        0.1111, 0.0000, 0.0000, 0.0556, 0.0556, 0.0000, 0.0000, 0.1111, 0.0000,
        0.0556, 1.0000, 0.0000, 0.0556, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0556, 0.0556, 0.0000, 0.0000, 0.1111, 0.0000, 0.0000,
        0.0556, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0556, 0.0000,
        0.0000, 0.0000, 0.0556, 0.0556, 0.0000, 0.0556, 0.1111, 0.0000, 0.0000,
        0.0000, 0.0556, 0.0000, 0.0556, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.1667, 0.0556, 0.0000, 0.0000, 0.0000, 0.0000, 0.0556,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.1111, 0.0000, 0.0000, 0.1111, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0556, 0.0000, 0.0000, 0.0556, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000], device='cuda:0', dtype=torch.float64)

In [57]:
co_occurrence_matrix[:, 19]

tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0183, 0.1000, 0.0000, 0.0031, 0.0000,
        0.0060, 0.0000, 0.0000, 0.0079, 0.0078, 0.0000, 0.0000, 0.0286, 0.0000,
        0.0769, 1.0000, 0.0000, 0.0244, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0026, 0.0063, 0.0000, 0.0000, 0.0465, 0.0000, 0.0000,
        0.0024, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0055, 0.0000,
        0.0000, 0.0000, 0.0012, 0.0051, 0.0000, 0.0141, 0.0024, 0.0000, 0.0000,
        0.0000, 0.0101, 0.0000, 0.0010, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0065, 0.0025, 0.0000, 0.0000, 0.0000, 0.0000, 0.0064,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0023, 0.0000, 0.0000, 0.0028, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0027, 0.0000, 0.0000, 0.0040, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000], device='cuda:0', dtype=torch.float64)

In [29]:
co_occurrence_matrix[:,0][:10]

tensor([1.0000, 0.0000, 0.0000, 0.0204, 0.0413, 0.3000, 0.0000, 0.0000, 0.0072,
        0.0090], device='cuda:0', dtype=torch.float64)

In [63]:
indices

tensor([ 19,  59, 102, 101,  37,  18,  89,  75,  62,  68,  33,  42,  15,  96,
          8,  97,  82,  49,  70,  99,  78,  90,  34,  22,   1,  50,  76,  21,
         85,  26, 100,  91,  77,  32,  46,  52,  44,  41,  14,  92,  93,   0,
          3,  71,  25,  39,  16,  27,  48,  43,  38,  56,  80,  74,  24,  51,
         67,  17,  60,  61,  10,  66,  87,  35,  30,  53,   4,  13,   6,  88,
         20,  95,  81,   7,  63,  47,  79,  28,  23,  45,   5,  40,  11,  84,
         98,   2,  54,   9,  69,  36,  94,  57,  72,  31,  55,  65,  64,  12,
         58,  29,  86,  83,  73], device='cuda:0')

In [69]:
co_occurrence_matrix[4,19]

tensor(0.0183, device='cuda:0', dtype=torch.float64)

In [72]:
co_occurrence_matrix = torch.tensor(np.load('/home/samyakr2/SHOP/normalized_foodseg103_co_occurrence_matrix.npy')).to(device)
for idx in indices:

    y = co_occurrence_matrix[idx,:].topk(5)
    y_as = co_occurrence_matrix[:,idx].topk(5)
    print(y_as)
    print(y)
#     for idx2 in y.indices:
#         print(co_occurrence_matrix[idx2,idx])
    print('-'*45)

torch.return_types.topk(
values=tensor([1.0000, 0.1000, 0.0769, 0.0465, 0.0286], device='cuda:0',
       dtype=torch.float64),
indices=tensor([19,  5, 18, 33, 16], device='cuda:0'))
torch.return_types.topk(
values=tensor([1.0000, 0.2222, 0.1667, 0.1111, 0.1111], device='cuda:0',
       dtype=torch.float64),
indices=tensor([19,  4, 65,  9,  7], device='cuda:0'))
---------------------------------------------
torch.return_types.topk(
values=tensor([1.0000, 0.0120, 0.0081, 0.0064, 0.0054], device='cuda:0',
       dtype=torch.float64),
indices=tensor([59,  2, 40, 71, 81], device='cuda:0'))
torch.return_types.topk(
values=tensor([1.0000, 0.4000, 0.4000, 0.4000, 0.2000], device='cuda:0',
       dtype=torch.float64),
indices=tensor([59, 81,  2, 51, 40], device='cuda:0'))
---------------------------------------------
torch.return_types.topk(
values=tensor([1.0000, 0.1333, 0.1000, 0.0833, 0.0822], device='cuda:0',
       dtype=torch.float64),
indices=tensor([102,  78,   5,  67,  70], device='cud

In [74]:
first_neighbours = []
# first_neighbours_as = []
for ele, ele_as in zip(y,y_as):
    print(ele.topk(5))
    print(ele_as.topk(5))
    print('-'*20)
    first_neighbours.append(ele.topk(5).indices [1])
    first_neighbours_as.append(ele_as.topk(5).indices [1])
    

In [40]:
FN =  torch.stack(first_neighbours)
FN

tensor([ 4, 81, 57, 83, 51, 65, 83, 83, 51, 93], device='cuda:0')

In [73]:
z = co_occurrence_matrix[FN][:10]
z_as = co_occurrence_matrix[:,FN][:10]
for ele, ele_as in zip(z,z_as):
    print(ele.topk(5))
    print(ele_as.topk(5))
    print('-'*25)