In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import json
import cv2
from tqdm import tqdm

In [3]:
CURRENT_PATH = os.getcwd()
CURRENT_PATH = CURRENT_PATH.replace('\\', '/')

DATASET_JSON_PATH = CURRENT_PATH + '/Dataset/annotations_trainval2017/annotations/'
DATASET_TRAIN_IMAGES_PATH = CURRENT_PATH + '/Dataset/train2017'
DATASET_VAL_IMAGES_PATH = CURRENT_PATH + '/Dataset/val2017'
DATASET_TEST_IMAGES_PATH = CURRENT_PATH + '/Dataset/test2017'

print('CURRENT_PATH: ', CURRENT_PATH)
print('DATASET_JSON_PATH: ', DATASET_JSON_PATH)
print('DATASET_TRAIN_IMAGES_PATH: ', DATASET_TRAIN_IMAGES_PATH)
print('DATASET_VAL_IMAGES_PATH: ', DATASET_VAL_IMAGES_PATH)
print('DATASET_TEST_IMAGES_PATH: ', DATASET_TEST_IMAGES_PATH)

CURRENT_PATH:  c:/Users/david/OneDrive/Documents/GitHub/TextualSegFormer
DATASET_JSON_PATH:  c:/Users/david/OneDrive/Documents/GitHub/TextualSegFormer/Dataset/annotations_trainval2017/annotations/
DATASET_TRAIN_IMAGES_PATH:  c:/Users/david/OneDrive/Documents/GitHub/TextualSegFormer/Dataset/train2017
DATASET_VAL_IMAGES_PATH:  c:/Users/david/OneDrive/Documents/GitHub/TextualSegFormer/Dataset/val2017
DATASET_TEST_IMAGES_PATH:  c:/Users/david/OneDrive/Documents/GitHub/TextualSegFormer/Dataset/test2017


In [4]:
IMAGES_PATH = CURRENT_PATH + '/ProcessedDataset/images'
MASKS_PATH = CURRENT_PATH + '/ProcessedDataset/masks'
INSTANCES_PATH = CURRENT_PATH + '/ProcessedDataset/instances'

print(IMAGES_PATH)
print(MASKS_PATH)
print(INSTANCES_PATH)

c:/Users/david/OneDrive/Documents/GitHub/TextualSegFormer/ProcessedDataset/images
c:/Users/david/OneDrive/Documents/GitHub/TextualSegFormer/ProcessedDataset/masks
c:/Users/david/OneDrive/Documents/GitHub/TextualSegFormer/ProcessedDataset/instances


In [5]:
captions_path_train = DATASET_JSON_PATH + 'captions_train2017.json'
captions_path_val = DATASET_JSON_PATH + 'captions_val2017.json'

instances_path_train = DATASET_JSON_PATH + 'instances_train2017.json'
instances_path_val = DATASET_JSON_PATH + 'instances_val2017.json'

keypoints_path_train = DATASET_JSON_PATH + 'person_keypoints_train2017.json'
keypoints_path_val = DATASET_JSON_PATH + 'person_keypoints_val2017.json'

In [6]:
def fill_zeros_id(id):
    str_id = str(id)
    str_id = '0'*(12-len(str_id)) + str_id
    return str(str_id)

def get_captions_dict(captions_json):
    captions_dict = {}
    for i in captions_json:
        idx = fill_zeros_id(i['image_id'])
        if idx not in captions_dict:
            captions_dict[idx] = []
        captions_dict[fill_zeros_id(i['image_id'])].append(i['caption'])
    return captions_dict

def get_categories_dict(categories_json):
    categories_dict = {}
    for i in categories_json:
        categories_dict[i['id']] = {
            'name': i['name'],
            'supercategory': i['supercategory']
        }
    return categories_dict

def get_mask_count(instances_json):
    mask_count = {}
    for i in instances_json:
        idx = fill_zeros_id(i['image_id'])
        if idx not in mask_count:
            mask_count[idx] = 0
        mask_count[idx] += 1
    return mask_count

def join_mask_annotations(instances_json):
    images_annotations = {}
    mask_count = {}
    for i in range(len(instances_json['annotations'])):
        idx = fill_zeros_id(instances_json['annotations'][i]['image_id'])
        category_id = instances_json['annotations'][i]['category_id']
        if idx not in images_annotations:
            images_annotations[idx] = {}
        if category_id not in images_annotations[idx]:
            images_annotations[idx][category_id] = []
        images_annotations[idx][category_id].append(instances_json['annotations'][i])

        if idx not in mask_count:
            mask_count[idx] = []
        mask_count[idx].append(instances_json['annotations'][i]['category_id'])

    return images_annotations, mask_count

def preprocess_split(instances_json, captions_values, instance_categories, DATASET_IMAGES_PATH, IMAGES_PATH, MASKS_PATH, storeCaptions=True):

    idx = 0
    images, captions, masks, category, supercategory, category_id, mask_num = [], [], [], [], [], [], []
    captions_dict = get_captions_dict(captions_values)
    categories_dict = get_categories_dict(instance_categories)

    images_annotations, mask_count = join_mask_annotations(instances_json)

    if not os.path.exists(IMAGES_PATH):
        os.makedirs(IMAGES_PATH)
    if not os.path.exists(MASKS_PATH):
        os.makedirs(MASKS_PATH)

    for i in tqdm(images_annotations):

        img_str_filled = fill_zeros_id(i)
        image_str_name = img_str_filled + '.jpg'
        img_path = DATASET_IMAGES_PATH + '/' + image_str_name

        if not os.path.exists(img_path):
            print('no existe alv')
            continue

        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)       

        for j in images_annotations[i]:

            # print(i, images_annotations[i][j][0]['category_id'], len(images_annotations[i][j]), mask_count[img_str_filled])
            cat_id = images_annotations[i][j][0]['category_id']
            image_str_name_idx = img_str_filled + '_' + str(idx) + '.jpg'
            mask = np.zeros_like(img)
            
            for pts in images_annotations[i][j][0]['segmentation']:

                pts = np.array(pts, dtype=np.float32)
                pts = pts.reshape(-1, 2)
                cv2.fillPoly(mask, [pts.astype(np.int32)], (255, 255, 255))
                    
            mask = cv2.resize(mask, (256, 256))
            mask = cv2.cvtColor(mask, cv2.COLOR_RGB2BGR)
            # cv2.imwrite(MASKS_PATH + '/' + image_str_name_idx, mask)

            if len(mask_count[img_str_filled]) > 1 and storeCaptions:
                images.append(image_str_name)
                captions.append(captions_dict[img_str_filled][0])
                masks.append(image_str_name_idx)
                category.append(categories_dict[cat_id]['name'])
                supercategory.append(categories_dict[cat_id]['supercategory'])
                category_id.append(cat_id)
                mask_num.append(len(mask_count[img_str_filled]))
            else:
                for cap in captions_dict[img_str_filled]:
                    images.append(image_str_name)
                    captions.append(cap)
                    masks.append(image_str_name_idx)
                    category.append(categories_dict[cat_id]['name'])
                    supercategory.append(categories_dict[cat_id]['supercategory'])
                    category_id.append(cat_id)
                    mask_num.append(len(mask_count[img_str_filled]))

            idx += 1

        img = cv2.resize(img, (256, 256))
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)    
        # cv2.imwrite(IMAGES_PATH + '/' + image_str_name, img)    

    df = pd.DataFrame({'image': images, 'mask': masks, 'captions': captions, 'label': category, 'supercategory': supercategory, 'category_id': category_id, 'mask_num': mask_num})
    return df

In [7]:
with open(instances_path_train) as f:
    instances_train_json = json.load(f)
with open(captions_path_train) as f:
    captions_train_json = json.load(f)

with open(instances_path_val) as f:
    instances_val_json = json.load(f)
with open(captions_path_val) as f:
    captions_val_json = json.load(f)

In [14]:
base_class = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 
20, 21, 22, 24, 25, 26, 27, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 
44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 
65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81, 82, 83, 84, 85, 
86, 87, 89, 90, 91, 92, 93, 95, 96, 97, 98,  99, 100, 101, 102, 103, 104, 105, 
106, 107, 108, 109, 110, 111, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 
123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 134, 135, 138, 139, 140, 141, 
142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 158, 
159, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170]
novel_class = [19, 23, 28, 29, 36, 51, 76, 88, 94, 112, 133, 136, 137, 157, 160]
both_class = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 
39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 
59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 
79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 
99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 
115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 
131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 
147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 
163, 164, 165, 166, 167, 168, 169, 170]

instance_categories = instances_train_json['categories']

# print number of classes

print('Number of classes: ', len(instance_categories))

Number of classes:  80


In [9]:
instances_val = instances_val_json['annotations']
captions_val = captions_val_json['annotations']

instance_categories = instances_val_json['categories']

print('Instances', instances_val[0].keys())
print('Captions', captions_val[0].keys())
print('instance_categories', instance_categories)

instances_set = set([i['image_id'] for i in instances_val])
captions_set = set([i['image_id'] for i in captions_val])

print('Len Instances set', len(instances_set))
print('Len captions set', len(captions_set))

print('Len Instances', len(instances_val))
print('Len Captions', len(captions_val))

# df = preprocess_split(instances_val_json, captions_val, instance_categories, DATASET_VAL_IMAGES_PATH, IMAGES_PATH + '/val/', MASKS_PATH + '/val/')
# print(df.head())
# df.to_csv('val.csv', index=False)

Instances dict_keys(['segmentation', 'area', 'iscrowd', 'image_id', 'bbox', 'category_id', 'id'])
Captions dict_keys(['image_id', 'id', 'caption'])
instance_categories [{'supercategory': 'person', 'id': 1, 'name': 'person'}, {'supercategory': 'vehicle', 'id': 2, 'name': 'bicycle'}, {'supercategory': 'vehicle', 'id': 3, 'name': 'car'}, {'supercategory': 'vehicle', 'id': 4, 'name': 'motorcycle'}, {'supercategory': 'vehicle', 'id': 5, 'name': 'airplane'}, {'supercategory': 'vehicle', 'id': 6, 'name': 'bus'}, {'supercategory': 'vehicle', 'id': 7, 'name': 'train'}, {'supercategory': 'vehicle', 'id': 8, 'name': 'truck'}, {'supercategory': 'vehicle', 'id': 9, 'name': 'boat'}, {'supercategory': 'outdoor', 'id': 10, 'name': 'traffic light'}, {'supercategory': 'outdoor', 'id': 11, 'name': 'fire hydrant'}, {'supercategory': 'outdoor', 'id': 13, 'name': 'stop sign'}, {'supercategory': 'outdoor', 'id': 14, 'name': 'parking meter'}, {'supercategory': 'outdoor', 'id': 15, 'name': 'bench'}, {'supercat

In [10]:
instances_train = instances_train_json['annotations']
captions_train = captions_train_json['annotations']

instance_categories = instances_train_json['categories']

print('Instances', instances_train[0].keys())
print('Captions', captions_train[0].keys())
print('instance_categories', instance_categories)

instances_set = set([i['image_id'] for i in instances_train])
captions_set = set([i['image_id'] for i in captions_train])

print('Len Instances set', len(instances_set))
print('Len captions set', len(captions_set))

print('Len Instances', len(instances_train))
print('Len Captions', len(captions_train))

# df = preprocess_split(instances_train_json, captions_train, instance_categories, DATASET_TRAIN_IMAGES_PATH, IMAGES_PATH + '/train/', MASKS_PATH + '/train/')
# print(df.head())
# df.to_csv('train.csv', index=False)

Instances dict_keys(['segmentation', 'area', 'iscrowd', 'image_id', 'bbox', 'category_id', 'id'])
Captions dict_keys(['image_id', 'id', 'caption'])
instance_categories [{'supercategory': 'person', 'id': 1, 'name': 'person'}, {'supercategory': 'vehicle', 'id': 2, 'name': 'bicycle'}, {'supercategory': 'vehicle', 'id': 3, 'name': 'car'}, {'supercategory': 'vehicle', 'id': 4, 'name': 'motorcycle'}, {'supercategory': 'vehicle', 'id': 5, 'name': 'airplane'}, {'supercategory': 'vehicle', 'id': 6, 'name': 'bus'}, {'supercategory': 'vehicle', 'id': 7, 'name': 'train'}, {'supercategory': 'vehicle', 'id': 8, 'name': 'truck'}, {'supercategory': 'vehicle', 'id': 9, 'name': 'boat'}, {'supercategory': 'outdoor', 'id': 10, 'name': 'traffic light'}, {'supercategory': 'outdoor', 'id': 11, 'name': 'fire hydrant'}, {'supercategory': 'outdoor', 'id': 13, 'name': 'stop sign'}, {'supercategory': 'outdoor', 'id': 14, 'name': 'parking meter'}, {'supercategory': 'outdoor', 'id': 15, 'name': 'bench'}, {'supercat

In [11]:
for x in captions_val:
    if x['image_id'] == 558840:
        print(x['caption'])
        print(x)

for x in instances_val:
    if x['image_id'] == 558840:

        print(x['image_id'], x['bbox'], x['area'], x['iscrowd'], x['category_id'], x['id'])
        
        img = cv2.imread(DATASET_VAL_IMAGES_PATH + '/' + fill_zeros_id(x['image_id']) + '.jpg')
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        mask = np.zeros_like(img)
        pts = np.array(x['segmentation'][0], dtype=np.float32)
        pts = pts.reshape(-1, 2)
        cv2.fillPoly(mask, [pts.astype(np.int32)], (255, 255, 255))


        plt.figure(figsize=(10, 10))
        plt.subplot(1, 3, 1)
        plt.imshow(img)
        plt.subplot(1, 3, 2)
        plt.imshow(mask)
        plt.subplot(1, 3, 3)
        plt.imshow(img)
        plt.imshow(mask, alpha=0.5)
        plt.show()