In [1]:
import numpy as np
import pandas as pd

import os
import json
import copy
import shutil
import PIL

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms, utils

from tqdm import tqdm
from core.dataset import COCODataset, COCODatasetWithID
from core.model import Model
from core.metrics import AccuracyLogger

read in training, validation, test data

In [2]:
paper_data_path_train = './debug/coco_search18_fixations_TP_train_split1.json'
df_train = pd.read_json(paper_data_path_train)
paper_data_path_valid = './debug/coco_search18_fixations_TP_validation_split1.json'
df_valid = pd.read_json(paper_data_path_valid)
paper_data_path_test = './debug/coco_search18_fixations_TP_test.json'
df_test = pd.read_json(paper_data_path_test)

In [3]:
with open('./COCOstuff/annotations_UnRel_compatible/train.json', "r") as f:
    project_json = json.load(f)
    
project_json.keys()

dict_keys(['info', 'licenses', 'images', 'annotations', 'categories'])

In [7]:
project_json.keys()

dict_keys(['info', 'licenses', 'images', 'annotations', 'categories'])

In [4]:
len(project_json['images'])

118287

In [5]:
project_json['categories']

[{'supercategory': 'person', 'id': 1, 'name': 'person'},
 {'supercategory': 'vehicle', 'id': 2, 'name': 'bicycle'},
 {'supercategory': 'vehicle', 'id': 3, 'name': 'car'},
 {'supercategory': 'vehicle', 'id': 4, 'name': 'motorcycle'},
 {'supercategory': 'vehicle', 'id': 5, 'name': 'airplane'},
 {'supercategory': 'vehicle', 'id': 6, 'name': 'bus'},
 {'supercategory': 'vehicle', 'id': 7, 'name': 'train'},
 {'supercategory': 'vehicle', 'id': 8, 'name': 'truck'},
 {'supercategory': 'vehicle', 'id': 9, 'name': 'boat'},
 {'supercategory': 'outdoor', 'id': 10, 'name': 'traffic light'},
 {'supercategory': 'outdoor', 'id': 15, 'name': 'bench'},
 {'supercategory': 'animal', 'id': 17, 'name': 'cat'},
 {'supercategory': 'animal', 'id': 18, 'name': 'dog'},
 {'supercategory': 'animal', 'id': 19, 'name': 'horse'},
 {'supercategory': 'animal', 'id': 22, 'name': 'elephant'},
 {'supercategory': 'animal', 'id': 25, 'name': 'giraffe'},
 {'supercategory': 'accessory', 'id': 28, 'name': 'umbrella'},
 {'superc

In [6]:
df_train.head(5)

Unnamed: 0,name,subject,task,condition,bbox,X,Y,T,length,correct,RT,split
0,000000478726.jpg,2,bottle,present,"[1063, 68, 95, 334]","[848.2, 799.2, 731.1, 1114.4, 1121.5]","[517.2, 476.2, 383.4, 271.1, 205.9]","[73, 193, 95, 635, 592]",5,1,1159,train
1,000000488390.jpg,2,bottle,present,"[351, 282, 111, 377]","[847.7, 846.3, 474.2, 394.3, 384.5]","[523.0, 401.5, 425.2, 504.0, 538.4]","[149, 107, 109, 232, 417]",5,1,945,train
2,000000058864.jpg,2,bottle,present,"[1072, 121, 127, 337]","[845.8, 858.1, 1106.5, 1183.2]","[530.7, 467.4, 319.7, 295.0]","[60, 109, 297, 480]",4,1,835,train
3,000000142970.jpg,2,bottle,present,"[981, 0, 216, 173]","[849.8, 862.4, 1168.7, 601.0, 1106.3, 1186.8, ...","[532.8, 366.1, 124.4, 51.5, 92.7, 79.9, 72.4, ...","[243, 105, 192, 147, 211, 356, 264, 339, 683, ...",10,1,2952,train
4,000000481185.jpg,2,bottle,present,"[239, 36, 98, 187]","[848.8, 773.9, 776.2, 876.1, 1074.5, 450.8, 61...","[536.8, 191.3, 135.9, 141.0, 119.0, 137.0, 202...","[262, 148, 67, 199, 109, 283, 154, 281, 354, 3...",11,0,3184,train


In [None]:
with open("./debug/annotations.json", "r") as f:
    project_json = json.load(f)
    
project_json.keys()

In [None]:
project_json['images']

In [None]:
%%capture
dataset = COCODatasetWithID("./coco18_test.json", './coco18_test', image_size =(224,224))
dataloader = DataLoader(dataset, batch_size=32, num_workers=4, shuffle=True, pin_memory=True, drop_last=True)

In [None]:
image, target_image, bbox_relative, label, annotation_id = dataset[0]

In [None]:
dataset.image_size

In [None]:
plt.imshow(transforms.ToPILImage()(image)).figure

In [None]:
print(bbox_relative, label, annotation_id)

In [None]:
dataset.id2file[20000]

In [None]:
project_json['categories']

In [None]:
project_json['annotations']

extract image feature

In [None]:
def jsonfileprocessing(df):
    df = df[['name', 'subject', 'task', 'bbox', 'correct']]
    df = df[df['subject'] == 1]
    df.drop(columns = ['subject', 'correct'], inplace = True)
    df.reset_index(inplace = True, drop = True)

    # to find out duplicate file names
    counts = df['name'].value_counts()
    name_duplicate = [k for k, v in counts.items() if v > 1]
    df_duplicate = df[df["name"].isin(name_duplicate)]
    df_duplicate.reset_index(inplace = True, drop = True)

    df_unique = df[~df["name"].isin(name_duplicate)]
    df_unique.reset_index(inplace = True, drop = True)
    # return  
    return df_unique, df_duplicate

transfer COCO18 json file to COCO_STUFF style json file

In [None]:
def coco_dict(template_dict):
    coco18_json = copy.deepcopy(template_dict)
    for key in coco18_json.keys():
        coco18_json[key].clear()
        
    return coco18_json

In [None]:
def coco18_json_construct(coco18_json, df, df_duplicate, info, categories, startingIdx=0):
      coco18_json['info'] = info
      coco18_json['categories'] = categories
      length = len(df)
      lookup_table = {dict['name']: dict['id'] for dict in categories}
      # construct image features
      for index, row in df.iterrows():
            annotation_temp = {
                'area': 99999,
                'iscrowd': 0,
                'bbox': row['bbox'],
                'image_id': startingIdx + index,
                'category_id': lookup_table[row['task']]
            }
            image_temp = {
                'height': 1050,
                'width': 1680,
                'file_name': row['name'],
                'id': startingIdx + index
            }
            coco18_json['annotations'].append(annotation_temp)
            coco18_json['images'].append(image_temp) 

      for index, row in df_duplicate.iterrows():
      # initialization
            annotation_temp = {
                'area': 99999,
                'iscrowd': 0,
                'bbox': None,
                'category_id': None,
                'image_id': None
            }
            image_temp = {'file_name': None, 'height': 1050, 'width': 1680, 'id': None}

            annotation_temp['bbox'] = row['bbox']
            annotation_temp['image_id'] = startingIdx + length + index
            annotation_temp['category_id'] = lookup_table[row['task']] 

            image_temp['file_name'] = row['task'] + row['name']
            image_temp['id'] = startingIdx + length + index

            coco18_json['annotations'].append(annotation_temp)
            coco18_json['images'].append(image_temp)   


      return coco18_json

In [None]:
def copyfiles(src, dest, df, df_duplicate):
    # sourcery skip: use-fstring-for-concatenation
    # unique filename
    for index, row in df.iterrows():
        srcfile = src + row['task'] + "/" + row["name"]
        destfile = dest + "/" + row["name"]
        if os.path.exists(destfile):
            continue
        else:
            shutil.copy(srcfile, destfile)
        
    for index, row in df_duplicate.iterrows():
        srcfile = src + row['task'] + "/" + row["name"]
        destfile = dest + "/" + row['task'] + row["name"] 
        if os.path.exists(destfile):
            continue
        else:
            shutil.copy(srcfile, destfile)
    

In [None]:
def dict2jsonsave(json_dict, type_data):
    json_str = json.dumps(json_dict, indent = 4)
    filename = f'coco18_{type_data}.json'
    with open(filename, 'w') as json_file:
        json_file.write(json_str)

In [None]:
df_train, df_train_duplicate = jsonfileprocessing(df_train)
df_valid, df_valid_duplicate = jsonfileprocessing(df_valid)
df_test, df_test_duplicate = jsonfileprocessing(df_test)

processing training set

In [None]:
coco18_train_json = coco_dict(project_json)
info = {'description': 'COCO18 search dataset'}
categories = [
    {'id': 3, 'name': 'car'},
    {'id': 13, 'name': 'stop sign'}, 
    {'id': 44, 'name': 'bottle'},
    {'id': 47, 'name': 'cup'}, 
    {'id': 48, 'name': 'fork'},
    {'id': 49, 'name': 'knife'},  
    {'id': 51, 'name': 'bowl'},
    {'id': 62, 'name': 'chair'}, 
    {'id': 64, 'name': 'potted plant'}, 
    {'id': 70, 'name': 'toilet'}, 
    {'id': 72, 'name': 'tv'},
    {'id': 73, 'name': 'laptop'},
    {'id': 74, 'name': 'mouse'}, 
    {'id': 76, 'name': 'keyboard'},  
    {'id': 78, 'name': 'microwave'}, 
    {'id': 79, 'name': 'oven'}, 
    {'id': 81, 'name': 'sink'}, 
    {'id': 85, 'name': 'clock'}
]
coco18_train_json = coco18_json_construct(coco18_train_json, df_train, df_train_duplicate ,info, categories)
source_folder = './coco18_images/'
destination_folder = './coco18_train'
copyfiles(source_folder, destination_folder, df_train, df_train_duplicate)
dict2jsonsave(coco18_train_json, 'train')


validation dataset

In [None]:
coco18_valid_json = coco_dict(project_json)
coco18_valid_json = coco18_json_construct(coco18_valid_json, df_valid, df_valid_duplicate, info, categories, 10000)
source_folder = './coco18_images/'
destination_folder = './coco18_valid'
copyfiles(source_folder, destination_folder, df_valid, df_valid_duplicate)
dict2jsonsave(coco18_valid_json, 'valid')

In [None]:
coco18_test_json = coco_dict(project_json)
coco18_test_json = coco18_json_construct(coco18_test_json, df_test, df_test_duplicate, info, categories, 20000)
source_folder = './coco18_images/'
destination_folder = './coco18_test'
copyfiles(source_folder, destination_folder, df_test, df_test_duplicate)
dict2jsonsave(coco18_test_json, 'test')