In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
# For CLIP model and cosine function
!pip install sentence-transformers --quiet
import sys
sys.path.append('/content/gdrive/MyDrive')

import sentence_transformers
from sentence_transformers import SentenceTransformer, util
import glob
import pickle
import zipfile
from collections import defaultdict
import tqdm.notebook as tq
from io import BytesIO

import torch
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
device

[K     |████████████████████████████████| 81kB 3.5MB/s 
[K     |████████████████████████████████| 2.1MB 9.3MB/s 
[K     |████████████████████████████████| 1.2MB 61.9MB/s 
[K     |████████████████████████████████| 3.3MB 61.9MB/s 
[K     |████████████████████████████████| 901kB 60.8MB/s 
[?25h  Building wheel for sentence-transformers (setup.py) ... [?25l[?25hdone


'cpu'

In [None]:
from refer import REFER
import numpy as np
import skimage.io as sio
import matplotlib.pyplot as plt
import os
from PIL import Image

# 12 seconds on high ram, gpu
data_root = '/content/gdrive/MyDrive/coco' 
dataset = 'refcoco' 
splitBy = 'unc'
refer = REFER(data_root, dataset, splitBy)

# Creating sets of refs for train, eval, and testing.
TRAIN_IDS = refer.getRefIds(split='train')
EVAL_IDS = refer.getRefIds(split='val')
TEST_IDS = refer.getRefIds(split='test')

def compute_cosine(eOne, eTwo):
    cos_scores = util.cos_sim(eOne, eTwo)
    return cos_scores

loading dataset refcoco into memory...
creating index...
index created.
DONE (t=15.70s)


In [None]:
def check_cuda():
  # see how much memory used
  t = torch.cuda.get_device_properties(0).total_memory
  r = torch.cuda.memory_reserved(0) 
  a = torch.cuda.memory_allocated(0)
  f = r-a  # free inside reserved
  return f

# Create Positive Samples

In [None]:
def create_positive_samples(ref_split, main_img_features, ann_img_features, sentence_features, tensor_list):
  for ref_id in tq.tqdm(ref_split):
    curr_ref = refer.Refs[ref_id]

    img_id = curr_ref['image_id']
    ann_id = curr_ref['ann_id']
    sent_ids = curr_ref['sent_ids']

    # Extract features from appropiate dictionaries
    try: 
      img_emb = main_img_features[img_id][0]
      ann_emb = ann_img_features[ann_id][0]
      ann_pos_feat = ann_img_features[ann_id][1]
    except: 
      continue

    # Create tensor from annotation positional features
    ann_pos_t = torch.flatten(torch.from_numpy(ann_pos_feat)).to(device)

    sent_embs = []
    for s_id in sent_ids:
      try:
        sent_embs.append(sentence_features[s_id][0])
      except:
        # Sentence embedding not found. Skip this sentence
        continue

    # Now, for each sentence embedding, generate a positve sample. Compute cosines and concat all feature tensors
    for sent_embedding in sent_embs:
      img_cos = torch.flatten(compute_cosine(img_emb, sent_embedding))
      ann_cos = torch.flatten(compute_cosine(ann_emb, sent_embedding))

      # All 6 features now ready to be joined into one tensor
      tensor_list.append(torch.cat((img_emb, ann_emb, sent_embedding, ann_pos_t, img_cos, ann_cos), 0).to('cpu'))

  return

In [None]:
create_positive_samples(TRAIN_IDS, main_img_emb, ann_features, sentence_emb, all_samples) # len = 120264
pos_size = len(all_samples) # 120264 positive samples created.

HBox(children=(FloatProgress(value=0.0, max=42404.0), HTML(value='')))




# Negative Samples

In [None]:
def related_negative_samples(ref_split, main_img_features, ann_img_features, sentence_features, tensor_list):

  for ref_id in tq.tqdm(ref_split):
    curr_ref = refer.Refs[ref_id]

    img_id = curr_ref['image_id']
    matching_ann_id = curr_ref['ann_id']
    sent_ids = curr_ref['sent_ids']

    # Get main image embedding
    try: 
      img_emb = main_img_features[img_id][0]
    except: 
      continue

    # Load embeddings for sentences tied to current reference
    sent_embs = []
    for s_id in sent_ids:
      try:
        sent_embs.append(sentence_features[s_id][0])
      except:
        continue

    # Get all annotations for current image_id
    img_anns = refer.imgToAnns[img_id]

    ann_count = 0
    for related_ann in img_anns:
      if ann_count > 2: break
      current_ann_id = related_ann['id']

      # Don't include positive sample
      if current_ann_id == matching_ann_id:
        continue

      # Attempt to pull out ann embedding
      try:
        ann_emb = ann_img_features[current_ann_id][0]
        ann_pos_feat = ann_img_features[current_ann_id][1]
      except:
        continue


      # Create tensor from annotation positional features
      ann_pos_t = torch.flatten(torch.from_numpy(ann_pos_feat)).to(device)


      # Generate a negative sample for each sentence embedding.
      for sent_embedding in sent_embs:
        img_cos = torch.flatten(compute_cosine(img_emb, sent_embedding))
        ann_cos = torch.flatten(compute_cosine(ann_emb, sent_embedding))

        # All 6 features now ready to be joined into one tensor
        tensor_list.append(torch.cat((img_emb, ann_emb, sent_embedding, ann_pos_t, img_cos, ann_cos), 0).to('cpu'))

        # Limit to only 4 addtional negative samples
      ann_count += 1
  return 

In [None]:
related_negative_samples(TRAIN_IDS, main_img_emb, ann_features, sentence_emb, all_samples) 
neg_size = len(all_samples) - pos_size # 335668 negative samples created.
# 4 minutes, 10.53 gb at end for 4 negative samples
# reached a high of 22.35/25 gb when saving tensors

HBox(children=(FloatProgress(value=0.0, max=42404.0), HTML(value='')))




In [None]:
import pickle

# Save coco train image embeddings to file
with open('/content/gdrive/MyDrive/sample_list/train_data.pickle', 'wb') as handle:
  pickle.dump(all_samples, handle)

# Open
#with open('coco_train_img_feats.pickle', 'rb') as handle:
 # coco_train_img_feats = pickle.load(handle)

# Test Set sample creation





In [None]:
# LOAD TEST EMBEDDINGS AND CREATE SAMPLES

with open('/content/gdrive/MyDrive/embeddings/test_img_embeddings.pickle', 'rb') as handle:
  test_img_emb = pickle.load(handle)

with open('/content/gdrive/MyDrive/embeddings/test_sentence_embeddings.pickle', 'rb') as handle:
  test_sent_emb = pickle.load(handle)

with open('/content/gdrive/MyDrive/embeddings/test_ann_embeddings.pickle', 'rb') as handle:
  test_ann_emb = pickle.load(handle)
  
test_samples = []

In [None]:
# Test Positive Samples
create_positive_samples(TEST_IDS, test_img_emb, test_ann_emb, test_sent_emb, test_samples) 
test_pos_size = len(test_samples)
test_pos_size # 10730

HBox(children=(FloatProgress(value=0.0, max=3785.0), HTML(value='')))




10730

In [None]:
# Test Negative Samples
related_negative_samples(TEST_IDS, test_img_emb, test_ann_emb, test_sent_emb, test_samples) 
test_neg_size = len(test_samples) - test_pos_size
test_neg_size # 30121

HBox(children=(FloatProgress(value=0.0, max=3785.0), HTML(value='')))




30121

In [None]:
# Test sample list to Tensor
test_x = torch.stack(test_samples)

torch.save(test_x, '/content/gdrive/MyDrive/Final_Tensors/test_x.pt')

# Create Label Vectors and save
test_y = torch.ones(test_pos_size)
y_neg = torch.zeros(test_neg_size) 

test_y = torch.cat((test_y, y_neg), 0)
test_y.shape
torch.save(test_y, '/content/gdrive/MyDrive/Final_Tensors/test_y.pt')

# Eval Set Sample Creation

In [None]:
# LOAD EVAL EMBEDDINGS AND CREATE SAMPLES

with open('/content/gdrive/MyDrive/embeddings/eval_img_embeddings.pickle', 'rb') as handle:
  eval_img_emb = pickle.load(handle)

with open('/content/gdrive/MyDrive/embeddings/eval_sentence_embeddings.pickle', 'rb') as handle:
  eval_sent_emb = pickle.load(handle)

with open('/content/gdrive/MyDrive/embeddings/eval_ann_embeddings.pickle', 'rb') as handle:
  eval_ann_emb = pickle.load(handle)
  
eval_samples = []

In [None]:
# Eval Positive Samples
create_positive_samples(EVAL_IDS, eval_img_emb, eval_ann_emb, eval_sent_emb, eval_samples) 
eval_pos_size = len(eval_samples)
eval_pos_size # 10829

HBox(children=(FloatProgress(value=0.0, max=3811.0), HTML(value='')))




10829

In [None]:
# eval Negative Samples
related_negative_samples(EVAL_IDS, eval_img_emb, eval_ann_emb, eval_sent_emb, eval_samples)
eval_neg_size = len(eval_samples) - eval_pos_size
eval_neg_size # 30530

HBox(children=(FloatProgress(value=0.0, max=3811.0), HTML(value='')))




30530

In [None]:
# eval sample list to Tensor
eval_x = torch.stack(eval_samples)

torch.save(eval_x, '/content/gdrive/MyDrive/Final_Tensors/eval_x.pt')

# Create Label Vectors and save
eval_y = torch.ones(eval_pos_size)
y_neg = torch.zeros(eval_neg_size) 

eval_y = torch.cat((eval_y, y_neg), 0)
print(eval_y.shape)
torch.save(eval_y, '/content/gdrive/MyDrive/Final_Tensors/eval_y.pt')

torch.Size([41359])


In [None]:
eval_x.shape, eval_y.shape

(torch.Size([41359, 1545]), torch.Size([41359]))

# Creating Label Vectors

In [None]:
# Load files
train_x = torch.load('/content/gdrive/MyDrive/sample_list/train_x.pt')
train_x.shape

torch.Size([455932, 1545])

In [None]:
# Create label vectors
train_y = torch.ones(120264)
train_y_neg = torch.zeros(335668) 
train_y = torch.cat((train_y, train_y_neg), 0)
train_y.shape

torch.save(train_y, '/content/gdrive/MyDrive/Final_Tensors/train_y.pt')

In [None]:
train_y.shape,

(torch.Size([455932]),)