In [55]:
import numpy as np
import pandas as pd

import os
import json
import copy
import shutil
import PIL
from PIL import Image

In [57]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms, utils
from torchvision.transforms.functional import to_tensor, normalize

from tqdm import tqdm
from core.dataset import COCODataset, COCODatasetWithID
from core.model import Model
from core.metrics import AccuracyLogger

read in training, validation, test data

In [13]:
paper_data_path_test = './debug/coco_search18_fixations_TP_test.json'
df_test = pd.read_json(paper_data_path_test)

In [17]:
df_test['new_name'] = df_test['name'] + df_test['task']

In [18]:
len(df_test.new_name.unique())

612

In [15]:
len(df_test.name.unique())

586

In [None]:
with open('./COCOstuff/annotations_UnRel_compatible/train.json', "r") as f:
    project_json = json.load(f)
    
project_json.keys()

In [None]:
project_json['categories']

In [None]:
df_train.head(5)

In [None]:
with open("./debug/annotations.json", "r") as f:
    project_json = json.load(f)
    
project_json.keys()

In [21]:
dataset = COCODatasetWithID("./coco18/coco18_test.json", './coco18/coco18_test', image_size =(224,224))
dataloader = DataLoader(dataset, batch_size=32, num_workers=4, shuffle=True, pin_memory=True, drop_last=True)

-------------------------------
Annotation Counts
-------------------------------
chair                        50
fork                         46
sink                         55
tv                           56
bowl                         28
car                          20
clock                        23
cup                          55
keyboard                     36
knife                        28
laptop                       24
mouse                        21
oven                         20
potted plant                 30
toilet                       31
bottle                       33
stop sign                    25
microwave                    31
Total                       612
-------------------------------



In [23]:
image, target_image, bbox_relative, label, annotation_id = dataset[0]

In [39]:
dataset.id2file[dataset.annotations[0]['image_id']]

'./coco18/coco18_test/000000340934.jpg'

In [58]:
image = Image.open('./coco18/coco18_test/000000340934.jpg')
image = image.convert("RGB")
image = to_tensor(image)

In [59]:
image.shape

torch.Size([3, 1050, 1680])

In [50]:
dataset.idx2label[label]

'chair'

In [53]:
dataset.idx2label

{0: 'bottle',
 1: 'bowl',
 2: 'car',
 3: 'chair',
 4: 'clock',
 5: 'cup',
 6: 'fork',
 7: 'keyboard',
 8: 'knife',
 9: 'laptop',
 10: 'microwave',
 11: 'mouse',
 12: 'oven',
 13: 'potted plant',
 14: 'sink',
 15: 'stop sign',
 16: 'toilet',
 17: 'tv'}

extract image feature

In [None]:
def jsonfileprocessing(df):
    df = df[['name', 'subject', 'task', 'bbox', 'correct']]
    df = df[df['subject'] == 1]
    df.drop(columns = ['subject', 'correct'], inplace = True)
    df.reset_index(inplace = True, drop = True)

    # to find out duplicate file names
    counts = df['name'].value_counts()
    name_duplicate = [k for k, v in counts.items() if v > 1]
    df_duplicate = df[df["name"].isin(name_duplicate)]
    df_duplicate.reset_index(inplace = True, drop = True)

    df_unique = df[~df["name"].isin(name_duplicate)]
    df_unique.reset_index(inplace = True, drop = True)
    # return  
    return df_unique, df_duplicate

transfer COCO18 json file to COCO_STUFF style json file

In [None]:
def coco_dict(template_dict):
    coco18_json = copy.deepcopy(template_dict)
    for key in coco18_json.keys():
        coco18_json[key].clear()
        
    return coco18_json

In [None]:
def coco18_json_construct(coco18_json, df, df_duplicate, info, categories, startingIdx=0):
      coco18_json['info'] = info
      coco18_json['categories'] = categories
      length = len(df)
      lookup_table = {dict['name']: dict['id'] for dict in categories}
      # construct image features
      for index, row in df.iterrows():
            annotation_temp = {
                'area': 99999,
                'iscrowd': 0,
                'bbox': row['bbox'],
                'image_id': startingIdx + index,
                'category_id': lookup_table[row['task']]
            }
            image_temp = {
                'height': 1050,
                'width': 1680,
                'file_name': row['name'],
                'id': startingIdx + index
            }
            coco18_json['annotations'].append(annotation_temp)
            coco18_json['images'].append(image_temp) 

      for index, row in df_duplicate.iterrows():
      # initialization
            annotation_temp = {
                'area': 99999,
                'iscrowd': 0,
                'bbox': None,
                'category_id': None,
                'image_id': None
            }
            image_temp = {'file_name': None, 'height': 1050, 'width': 1680, 'id': None}

            annotation_temp['bbox'] = row['bbox']
            annotation_temp['image_id'] = startingIdx + length + index
            annotation_temp['category_id'] = lookup_table[row['task']] 

            image_temp['file_name'] = row['task'] + row['name']
            image_temp['id'] = startingIdx + length + index

            coco18_json['annotations'].append(annotation_temp)
            coco18_json['images'].append(image_temp)   


      return coco18_json

In [None]:
def copyfiles(src, dest, df, df_duplicate):
    # sourcery skip: use-fstring-for-concatenation
    # unique filename
    for index, row in df.iterrows():
        srcfile = src + row['task'] + "/" + row["name"]
        destfile = dest + "/" + row["name"]
        if os.path.exists(destfile):
            continue
        else:
            shutil.copy(srcfile, destfile)
        
    for index, row in df_duplicate.iterrows():
        srcfile = src + row['task'] + "/" + row["name"]
        destfile = dest + "/" + row['task'] + row["name"] 
        if os.path.exists(destfile):
            continue
        else:
            shutil.copy(srcfile, destfile)
    

In [None]:
def dict2jsonsave(json_dict, type_data):
    json_str = json.dumps(json_dict, indent = 4)
    filename = f'coco18_{type_data}.json'
    with open(filename, 'w') as json_file:
        json_file.write(json_str)

In [None]:
df_train, df_train_duplicate = jsonfileprocessing(df_train)
df_valid, df_valid_duplicate = jsonfileprocessing(df_valid)
df_test, df_test_duplicate = jsonfileprocessing(df_test)

processing training set

In [None]:
coco18_train_json = coco_dict(project_json)
info = {'description': 'COCO18 search dataset'}
categories = [
    {'id': 44, 'name': 'bottle'},
    {'id': 51, 'name': 'bowl'},
    {'id': 3, 'name': 'car'}, 
    {'id': 62, 'name': 'chair'}, 
    {'id': 85, 'name': 'clock'}, 
    {'id': 47, 'name': 'cup'}, 
    {'id': 48, 'name': 'fork'},  
    {'id': 76, 'name': 'keyboard'}, 
    {'id': 49, 'name': 'knife'}, 
    {'id': 73, 'name': 'laptop'}, 
    {'id': 78, 'name': 'microwave'}, 
    {'id': 74, 'name': 'mouse'}, 
    {'id': 79, 'name': 'oven'}, 
    {'id': 64, 'name': 'potted plant'}, 
    {'id': 81, 'name': 'sink'}, 
    {'id': 13, 'name': 'stop sign'}, 
    {'id': 70, 'name': 'toilet'}, 
    {'id': 72, 'name': 'tv'}
]
coco18_train_json = coco18_json_construct(coco18_train_json, df_train, df_train_duplicate ,info, categories)
source_folder = './coco18_images/'
destination_folder = './coco18_train'
copyfiles(source_folder, destination_folder, df_train, df_train_duplicate)
dict2jsonsave(coco18_train_json, 'train')


validation dataset

In [None]:
coco18_valid_json = coco_dict(project_json)
coco18_valid_json = coco18_json_construct(coco18_valid_json, df_valid, df_valid_duplicate, info, categories, 10000)
source_folder = './coco18_images/'
destination_folder = './coco18_valid'
copyfiles(source_folder, destination_folder, df_valid, df_valid_duplicate)
dict2jsonsave(coco18_valid_json, 'valid')

In [None]:
coco18_test_json = coco_dict(project_json)
coco18_test_json = coco18_json_construct(coco18_test_json, df_test, df_test_duplicate, info, categories, 20000)
source_folder = './coco18_images/'
destination_folder = './coco18_test'
copyfiles(source_folder, destination_folder, df_test, df_test_duplicate)
dict2jsonsave(coco18_test_json, 'test')