In [1]:
import pandas as pd
import json
import plotly.express as px
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from imantics import Polygons, Mask, BBox
from pycocotools import mask as maskUtils

from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet

lemmatizer = WordNetLemmatizer()

import warnings
warnings.filterwarnings("ignore")

from tqdm import tqdm
tqdm.pandas()

# COCO

In [2]:
f = open('coco/instances_val2017.json')
data = json.load(f)

f = open('coco/panoptic_val2017.json')
data_panoptic = json.load(f)

f = open('coco/stuff_val2017.json')
data_stuff = json.load(f)

In [3]:
new_data_panoptic = list()
for elem in data_panoptic["annotations"]:
    for seg in elem["segments_info"]:
        segment = seg
        segment["file_name"]=elem["file_name"]
        segment["image_id"]=elem["image_id"]
        segment["x1"]=segment["bbox"][0]
        segment["x2"]=segment["bbox"][1]
        segment["x3"]=segment["bbox"][2]
        segment["x4"]=segment["bbox"][3]
        new_data_panoptic.append(segment)

In [4]:
len(new_data_panoptic)

56728

In [5]:
df_panoptic = pd.DataFrame(new_data_panoptic)
df_panoptic

Unnamed: 0,id,category_id,iscrowd,bbox,area,file_name,image_id,x1,x2,x3,x4
0,3226956,1,0,"[413, 158, 53, 138]",2840,000000000139.png,139,413,158,53,138
1,6979964,1,0,"[384, 172, 16, 36]",439,000000000139.png,139,384,172,16,36
2,3103374,62,0,"[413, 223, 30, 81]",1250,000000000139.png,139,413,223,30,81
3,2831194,62,0,"[291, 218, 62, 98]",1848,000000000139.png,139,291,218,62,98
4,3496593,62,0,"[412, 219, 10, 13]",90,000000000139.png,139,412,219,10,13
...,...,...,...,...,...,...,...,...,...,...,...
56723,8703197,52,0,"[467, 280, 173, 177]",22042,000000581781.png,581781,467,280,173,177
56724,1938563,52,0,"[137, 216, 297, 100]",13576,000000581781.png,581781,137,216,297,100
56725,1995653,52,1,"[0, 20, 640, 458]",24925,000000581781.png,581781,0,20,640,458
56726,1325130,122,0,"[0, 0, 640, 478]",83419,000000581781.png,581781,0,0,640,478


In [6]:
new_data_stuff = list()
for elem in data_stuff["annotations"]:
    segment = elem
    segment["x1"]=segment["bbox"][0]
    segment["x2"]=segment["bbox"][1]
    segment["x3"]=segment["bbox"][2]
    segment["x4"]=segment["bbox"][3]
    new_data_stuff.append(segment)

In [7]:
df_stuff = pd.DataFrame(new_data_stuff).rename(columns={"category_id": "category_id_stuff", "id": "id_stuff", "bbox": "bbox_stuff"})
df_stuff = df_stuff[df_stuff["category_id_stuff"]!=183] # rimuovo classe other
df_stuff

Unnamed: 0,segmentation,area,iscrowd,image_id,bbox_stuff,category_id_stuff,id_stuff,x1,x2,x3,x4
0,{'counts': 'j19[6h1ZNXNf1h1ZNYNe1g1[NYNf1f1YNZ...,25483.0,0,139,"[0.0, 38.0, 549.0, 297.0]",98,20000000,0.0,38.0,549.0,297.0
1,{'counts': '`bh11Y=1O1O1O1O001O001O001O001O001...,20106.0,0,139,"[136.0, 0.0, 473.0, 116.0]",102,20000001,136.0,0.0,473.0,116.0
2,"{'counts': 'omh51Y=0ng31PXL01O10iW10ThN1PPY2',...",11.0,0,139,"[444.0, 226.0, 20.0, 11.0]",105,20000002,444.0,226.0,20.0,11.0
3,{'counts': '_:k2_:00000O1000000000000O10000000...,49754.0,0,139,"[0.0, 269.0, 564.0, 157.0]",118,20000003,0.0,269.0,564.0,157.0
4,{'counts': 'Qi\4:k<6O1bCAo;`0kC@M1W<n0O0N12O1O...,842.0,0,139,"[338.0, 166.0, 29.0, 50.0]",119,20000004,338.0,166.0,29.0,50.0
...,...,...,...,...,...,...,...,...,...,...,...
32794,{'counts': '0Y1l>l3O10OO2M300N101O1N101O1N101N...,176577.0,0,581615,"[0.0, 0.0, 478.0, 640.0]",176,20032794,0.0,0.0,478.0,640.0
32796,{'counts': '0[6c0\Lf4e3ZKZLf4b1mJfM>g0e4U1^KQN...,83419.0,0,581781,"[0.0, 0.0, 640.0, 478.0]",122,20032796,0.0,0.0,640.0,478.0
32797,{'counts': '[6c0\>0O1O100O2N1O101N2N1O2N1O010O...,1167.0,0,581781,"[0.0, 203.0, 40.0, 40.0]",123,20032797,0.0,203.0,40.0,40.0
32798,{'counts': 'e9T4j:O1YL[ET3Y;N1O10001O000000010...,52789.0,0,581781,"[0.0, 0.0, 578.0, 478.0]",139,20032798,0.0,0.0,578.0,478.0


In [8]:
df_things = pd.DataFrame(data["annotations"])
df_things["file_name"]=df_things['image_id'].apply(lambda x: '{0:0>16}'.format(str(x)+".jpg"))
df_things[['x1','x2', 'x3', 'x4']] = pd.DataFrame(df_things.bbox.tolist(), index= df_things.index)
df_things = df_things.sort_values(by=['image_id',"area"])

df_things

Unnamed: 0,segmentation,area,iscrowd,image_id,bbox,category_id,id,file_name,x1,x2,x3,x4
27924,"[[413.7, 220.47, 412.25, 231.06, 419.96, 231.5...",90.98725,0,139,"[412.25, 219.02, 9.63, 12.52]",62,1941808,000000000139.jpg,412.25,219.02,9.63,12.52
28442,"[[339.52, 201.72, 336.79, 216.23, 346.35, 214....",120.23200,0,139,"[336.79, 199.5, 9.73, 16.73]",86,2146548,000000000139.jpg,336.79,199.50,9.73,16.73
27503,"[[361.37, 229.69, 361.56, 226.09, 360.42, 220....",178.18510,0,139,"[350.76, 208.84, 11.37, 22.55]",86,1669970,000000000139.jpg,350.76,208.84,11.37,22.55
28441,"[[242.95, 212.06, 241.24, 199.54, 254.32, 194....",189.56010,0,139,"[241.24, 194.99, 14.22, 17.63]",86,2146194,000000000139.jpg,241.24,194.99,14.22,17.63
24602,"[[317.4, 219.24, 319.8, 230.83, 338.98, 230.03...",210.14820,0,139,"[317.4, 219.24, 21.58, 11.59]",62,110334,000000000139.jpg,317.40,219.24,21.58,11.59
...,...,...,...,...,...,...,...,...,...,...,...,...
19107,"[[140.71, 217.49, 147.16, 216.41, 201.94, 236....",13574.45810,0,581781,"[136.42, 216.41, 297.54, 99.9]",52,1544126,000000581781.jpg,136.42,216.41,297.54,99.90
19164,"[[40.91, 197.01, 37.68, 228.23, 3.23, 305.75, ...",14836.26035,0,581781,"[2.15, 184.09, 137.8, 159.34]",52,1547489,000000581781.jpg,2.15,184.09,137.80,159.34
19176,"[[587.56, 253.32, 556.41, 260.84, 540.3, 266.2...",16690.94945,0,581781,"[439.33, 94.35, 160.05, 171.86]",52,1547752,000000581781.jpg,439.33,94.35,160.05,171.86
19151,"[[532.91, 296.6, 570.59, 305.21, 589.96, 304.1...",22016.99120,0,581781,"[467.23, 280.45, 172.77, 177.63]",52,1546551,000000581781.jpg,467.23,280.45,172.77,177.63


In [9]:
# final df 
df_coco = pd.merge(df_panoptic, df_stuff, on=['image_id', "x1", "x2", "x3", "x4"])[["id","category_id", "bbox","area_x", "file_name", "image_id", "segmentation", "id_stuff", "category_id_stuff","x1", "x2", "x3", "x4"]].rename(columns={"area_x": "area"})
df_coco["file_name"] = df_coco["file_name"].str.replace(".png",".jpg")
df_coco = pd.concat([df_coco, df_things]).sort_values(by=['image_id',"category_id"])
df_coco = df_coco.drop_duplicates(subset=["category_id", "image_id", "x1", "x2", "x3", "x4"])[["id", "category_id", "bbox", "area", "file_name", "image_id", "segmentation"]]

df_coco

Unnamed: 0,id,category_id,bbox,area,file_name,image_id,segmentation
24776,233201,1,"[384.43, 172.21, 15.12, 35.74]",435.14495,000000000139.jpg,139,"[[384.98, 206.58, 384.43, 199.98, 385.25, 193...."
24772,230831,1,"[412.8, 157.61, 53.05, 138.01]",2913.11040,000000000139.jpg,139,"[[428.19, 219.47, 430.94, 209.57, 430.39, 210...."
27924,1941808,62,"[412.25, 219.02, 9.63, 12.52]",90.98725,000000000139.jpg,139,"[[413.7, 220.47, 412.25, 231.06, 419.96, 231.5..."
24602,110334,62,"[317.4, 219.24, 21.58, 11.59]",210.14820,000000000139.jpg,139,"[[317.4, 219.24, 319.8, 230.83, 338.98, 230.03..."
24577,105328,62,"[413.2, 223.01, 30.17, 81.36]",1289.37345,000000000139.jpg,139,"[[436.06, 304.37, 443.37, 300.71, 436.97, 261...."
...,...,...,...,...,...,...,...
19176,1547752,52,"[439.33, 94.35, 160.05, 171.86]",16690.94945,000000581781.jpg,581781,"[[587.56, 253.32, 556.41, 260.84, 540.3, 266.2..."
19151,1546551,52,"[467.23, 280.45, 172.77, 177.63]",22016.99120,000000581781.jpg,581781,"[[532.91, 296.6, 570.59, 305.21, 589.96, 304.1..."
36740,905200581781,52,"[0, 20, 639, 457]",28911.00000,000000581781.jpg,581781,"{'counts': [468, 6, 444, 6, 13, 4, 1, 12, 1, 1..."
19097,1325130,122,"[0, 0, 640, 478]",83419.00000,000000581781.jpg,581781,{'counts': '0[6c0\Lf4e3ZKZLf4b1mJfM>g0e4U1^KQN...


In [10]:
len(set(df_coco["file_name"]))

4999

In [11]:
f = open('../coco/coco_categories_final.json')
 
df_coco_categories_final = pd.DataFrame(json.load(f))
df_coco_categories_final

Unnamed: 0,supercategory,id,name,count,isthing,original_class,dataset
0,person,1,person,262351,,"{'original_class': 'person', 'original_id': 1,...",COCO
1,wall,199,wall,43524,0.0,"{'original_class': 'wall-other-merged', 'origi...",COCO
2,vehicle,3,car,43510,,"{'original_class': 'car', 'original_id': 3, 'd...",COCO
3,plant,184,tree,41075,0.0,"{'original_class': 'tree-merged', 'original_id...",COCO
4,sky,187,sky,40119,0.0,"{'original_class': 'sky-other-merged', 'origin...",COCO
...,...,...,...,...,...,...,...
128,structural,138,net,1362,0.0,"{'original_class': 'net', 'original_id': 138, ...",COCO
129,outdoor,14,parking meter,1285,,"{'original_class': 'parking meter', 'original_...",COCO
130,animal,23,bear,1282,,"{'original_class': 'bear', 'original_id': 23, ...",COCO
131,appliance,80,toaster,224,,"{'original_class': 'toaster', 'original_id': 8...",COCO


In [12]:
df_coco = pd.merge(df_coco_categories_final.rename(columns={"id": "category_id"}), df_coco, on="category_id")
df_coco

Unnamed: 0,supercategory,category_id,name,count,isthing,original_class,dataset,id,bbox,area,file_name,image_id,segmentation
0,person,1,person,262351,,"{'original_class': 'person', 'original_id': 1,...",COCO,233201,"[384.43, 172.21, 15.12, 35.74]",435.14495,000000000139.jpg,139,"[[384.98, 206.58, 384.43, 199.98, 385.25, 193...."
1,person,1,person,262351,,"{'original_class': 'person', 'original_id': 1,...",COCO,230831,"[412.8, 157.61, 53.05, 138.01]",2913.11040,000000000139.jpg,139,"[[428.19, 219.47, 430.94, 209.57, 430.39, 210...."
2,person,1,person,262351,,"{'original_class': 'person', 'original_id': 1,...",COCO,442619,"[280.79, 44.73, 218.7, 346.68]",27789.11055,000000000785.jpg,785,"[[353.37, 67.65, 358.15, 52.37, 362.92, 47.59,..."
3,person,1,person,262351,,"{'original_class': 'person', 'original_id': 1,...",COCO,559508,"[145.26, 100.67, 291.95, 457.35]",25759.04240,000000000872.jpg,872,"[[310.65, 112.18, 339.42, 100.67, 362.43, 106...."
4,person,1,person,262351,,"{'original_class': 'person', 'original_id': 1,...",COCO,560228,"[163.73, 126.42, 265.69, 480.4]",48091.75195,000000000872.jpg,872,"[[233.87, 606.82, 187.55, 602.85, 220.64, 580...."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
55844,indoor,89,hair dryer,198,,"{'original_class': 'hair drier', 'original_id'...",COCO,1676045,"[0.0, 101.67, 163.96, 181.22]",22528.82730,000000350002.jpg,350002,"[[48.9, 274.26, 119.37, 282.89, 163.96, 242.62..."
55845,indoor,89,hair dryer,198,,"{'original_class': 'hair drier', 'original_id'...",COCO,1188212,"[315.51, 187.84, 83.23, 226.16]",5066.24570,000000384136.jpg,384136,"[[386.71, 211.78, 375.66, 192.14, 352.95, 187...."
55846,indoor,89,hair dryer,198,,"{'original_class': 'hair drier', 'original_id'...",COCO,1675983,"[139.83, 48.57, 28.71, 38.29]",592.16200,000000500464.jpg,500464,"[[168.54, 86.47, 168.35, 86.86, 161.71, 85.69,..."
55847,indoor,89,hair dryer,198,,"{'original_class': 'hair drier', 'original_id'...",COCO,2197655,"[155.44, 24.76, 34.97, 20.73]",302.02640,000000501368.jpg,501368,"[[155.44, 37.72, 177.46, 24.76, 190.41, 26.06,..."


In [13]:
len(set(df_coco["category_id"]))

133

In [14]:
df_coco["name"].value_counts()

person        11006
wall           2363
car            1932
chair          1791
sky            1611
              ...  
microwave        55
net              53
scissors         36
hair dryer       11
toaster           9
Name: name, Length: 127, dtype: int64

# LVIS

In [15]:
f = open('lvis/lvis_v1_val.json')
data_lvis = json.load(f)
df_lvis_annotations = pd.DataFrame(data_lvis["annotations"])
df_lvis_annotations

Unnamed: 0,area,id,segmentation,image_id,bbox,category_id
0,73297.48,1,"[[270.75, 598.57, 261.98, 598.57, 247.84, 598....",446522,"[83.08, 219.88, 301.69, 420.12]",232
1,70204.06,2,"[[0.0, 626.21, 90.53, 629.18, 97.12, 611.7, 10...",446522,"[0.0, 0.0, 132.41, 629.18]",421
2,289.97,3,"[[238.16, 386.84, 240.61, 387.95, 245.96, 385....",76261,"[238.16, 340.87, 66.44, 47.08]",99
3,238.39,4,"[[392.16, 220.43, 395.79, 219.28, 400.38, 216....",76261,"[368.84, 181.21, 51.42, 59.88]",99
4,100734.18,5,"[[23.83, 156.28, 0.0, 162.64, 0.0, 427.0, 166....",454750,"[0.0, 14.7, 413.66, 412.3]",1202
...,...,...,...,...,...,...
244702,4.72,244703,"[[209.04, 181.65, 208.15, 182.21, 207.92, 182....",358717,"[207.92, 181.65, 2.35, 2.8]",125
244703,15.41,244704,"[[72.17, 253.4, 74.78, 254.09, 75.99, 252.7, 7...",358717,"[71.65, 249.5, 4.6, 4.59]",125
244704,2.75,244705,"[[24.1, 257.12, 24.62, 257.47, 25.15, 257.64, ...",358717,"[24.1, 256.0, 2.17, 1.64]",125
244705,5.38,244706,"[[215.52, 186.01, 214.45, 185.69, 213.38, 185....",358717,"[212.74, 185.69, 2.86, 2.46]",125


In [16]:
len(set(df_lvis_annotations["image_id"]))

19626

In [17]:
f = open('../lvis/lvis_categories_final.json')
df_lvis_categories_final = pd.DataFrame(json.load(f))
df_lvis_categories_final

Unnamed: 0,name,instance_count,def,synonyms,image_count,id,frequency,synset,original_class,dataset
0,banana,50552,elongated crescent-shaped yellow fruit with so...,[banana],1787,45,f,banana.n.02,"{'original_class': 'banana', 'original_id': 45...",LVIS
1,book,33353,a written work or composition that has been pu...,[book],1903,127,f,book.n.01,"{'original_class': 'book', 'original_id': 127,...",LVIS
2,carrot,18049,deep orange edible root of the cultivated carr...,[carrot],1222,217,f,carrot.n.01,"{'original_class': 'carrot', 'original_id': 21...",LVIS
3,apple,17451,fruit with red or yellow or green skin and swe...,[apple],1207,12,f,apple.n.01,"{'original_class': 'apple', 'original_id': 12,...",LVIS
4,pole,14276,a long (usually round) rod of wood or metal or...,"[pole, post]",1890,827,f,pole.n.01,"{'original_class': 'pole', 'original_id': 827,...",LVIS
...,...,...,...,...,...,...,...,...,...,...
586,animal,103,a domesticated animal kept for companionship o...,[pet],79,794,c,pet.n.01,"{'original_class': 'pet', 'original_id': 794, ...",LVIS
587,sled,102,a vehicle or flat object for transportation ov...,"[sled, sledge, sleigh]",56,970,c,sled.n.01,"{'original_class': 'sled', 'original_id': 970,...",LVIS
588,van,102,a recreational vehicle equipped for camping ou...,"[camper_(vehicle), camping_bus, motor_home]",40,191,c,camper.n.02,"{'original_class': 'camper_(vehicle)', 'origin...",LVIS
589,fruit,101,any of numerous inedible fruits with hard rinds,[gourd],6,509,r,gourd.n.02,"{'original_class': 'gourd', 'original_id': 509...",LVIS


In [18]:
len(set(df_lvis_categories_final["name"].values))

521

In [19]:
df_lvis = pd.merge(df_lvis_categories_final, df_lvis_annotations.rename(columns={"id": "id_annotations", "category_id": "id"}), on="id")
df_lvis

Unnamed: 0,name,instance_count,def,synonyms,image_count,id,frequency,synset,original_class,dataset,area,id_annotations,segmentation,image_id,bbox
0,banana,50552,elongated crescent-shaped yellow fruit with so...,[banana],1787,45,f,banana.n.02,"{'original_class': 'banana', 'original_id': 45...",LVIS,216.73,558,"[[347.16, 146.34, 348.62, 148.35, 348.62, 150....",480275,"[344.41, 134.46, 21.78, 17.91]"
1,banana,50552,elongated crescent-shaped yellow fruit with so...,[banana],1787,45,f,banana.n.02,"{'original_class': 'banana', 'original_id': 45...",LVIS,153.76,559,"[[454.44, 130.81, 458.22, 129.49, 461.43, 128....",480275,"[449.72, 128.35, 18.13, 14.72]"
2,banana,50552,elongated crescent-shaped yellow fruit with so...,[banana],1787,45,f,banana.n.02,"{'original_class': 'banana', 'original_id': 45...",LVIS,366.66,560,"[[357.26, 131.01, 355.56, 133.39, 356.58, 134....",480275,"[355.56, 130.84, 41.34, 16.32]"
3,banana,50552,elongated crescent-shaped yellow fruit with so...,[banana],1787,45,f,banana.n.02,"{'original_class': 'banana', 'original_id': 45...",LVIS,195.67,561,"[[437.8, 134.13, 441.01, 130.57, 444.93, 128.9...",480275,"[435.31, 127.72, 18.53, 20.48]"
4,banana,50552,elongated crescent-shaped yellow fruit with so...,[banana],1787,45,f,banana.n.02,"{'original_class': 'banana', 'original_id': 45...",LVIS,24.90,562,"[[308.01, 174.28, 307.26, 175.4, 306.7, 176.16...",480275,"[297.86, 171.27, 10.15, 4.89]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
240882,clock,100,a clock mounted on a wall,[wall_clock],48,1154,c,wall_clock.n.01,"{'original_class': 'wall_clock', 'original_id'...",LVIS,4933.66,120495,"[[201.58, 0.0, 201.16, 18.15, 199.46, 21.55, 2...",497558,"[186.71, 0.0, 67.98, 112.9]"
240883,clock,100,a clock mounted on a wall,[wall_clock],48,1154,c,wall_clock.n.01,"{'original_class': 'wall_clock', 'original_id'...",LVIS,1955.59,126976,"[[496.41, 110.32, 500.47, 105.59, 503.04, 100....",384793,"[462.01, 60.78, 44.0, 57.62]"
240884,clock,100,a clock mounted on a wall,[wall_clock],48,1154,c,wall_clock.n.01,"{'original_class': 'wall_clock', 'original_id'...",LVIS,3344.10,130543,"[[166.72, 44.26, 175.38, 43.07, 182.68, 41.71,...",65329,"[97.35, 0.0, 85.33, 44.94]"
240885,clock,100,a clock mounted on a wall,[wall_clock],48,1154,c,wall_clock.n.01,"{'original_class': 'wall_clock', 'original_id'...",LVIS,3799.83,130544,"[[482.2, 122.5, 443.61, 121.3, 440.4, 116.75, ...",65329,"[433.86, 46.37, 60.5, 76.13]"


In [20]:
len(set(df_lvis["name"]))

521

Sistemo COCO per eliminare le annotazioni duplicate

In [21]:
unique_lvis_classes = list(set(df_lvis["name"].values) - set(df_coco["name"].values))
print("numero di classi presenti solo in LVIS:",len(unique_lvis_classes))
unique_lvis_classes

numero di classi presenti solo in LVIS: 437


['wire',
 'earring',
 'crumb',
 'scooter',
 'bar',
 'bandana',
 'coffee table',
 'watermelon',
 'bookcase',
 'stirrup',
 'marker',
 'card',
 'steering wheel',
 'tablecloth',
 'box',
 'street lamp',
 'egg',
 'carton',
 'steak',
 'weather vane',
 'shirt',
 'can',
 'vent',
 'sweater',
 'wristband',
 'saddle blanket',
 'swimsuit',
 'cauliflower',
 'bag',
 'sunglasses',
 'dress',
 'date (fruit)',
 'short',
 'saddle',
 'stroller',
 'paper towel',
 'camera',
 'water tank',
 'button',
 'beer bottle',
 'plate',
 'cracker',
 'brake light',
 'buoy',
 'mattress',
 'blouse',
 'street light',
 'parasail',
 'pastry',
 'home plate',
 'croissant',
 'blender',
 'trunk',
 'jacket',
 'knee pad',
 'pajama',
 'cork (bottle plug)',
 'barrel',
 'ham',
 'shower curtain',
 'computer monitor',
 'mailbox',
 'belt buckle',
 'awning',
 'magazine',
 'mound',
 'panda',
 'ski jacket',
 'tortilla',
 'dish',
 'trash can',
 'jersey',
 'cooler',
 'pencil',
 'toothpick',
 'shoe',
 'whipped cream',
 'pennant',
 'clock tower

In [22]:
unique_coco_classes = list(set(df_coco["name"].values) - set(df_lvis["name"].values))
print("numero di classi presenti solo in COCO:",len(unique_coco_classes))
unique_coco_classes

numero di classi presenti solo in COCO: 43


['house',
 'furniture',
 'door',
 'road',
 'sky',
 'sand',
 'grass',
 'keyboard',
 'couch',
 'river',
 'tent',
 'rug',
 'shelf',
 'laptop',
 'wall',
 'plant',
 'rock',
 'pavement',
 'staircase',
 'net',
 'fence',
 'building',
 'mountain',
 'cell phone',
 'railroad',
 'water',
 'tree',
 'remote',
 'hot dog',
 'tie',
 'cardboard',
 'snow',
 'sea',
 'floor',
 'ceiling',
 'platform',
 'dirt',
 'bridge',
 'light',
 'roof',
 'window',
 'mouse',
 'gravel']

In [23]:
df_lvis_images = pd.DataFrame(data_lvis["images"]).rename(columns={"id":"image_id"})
df_lvis_images

Unnamed: 0,date_captured,neg_category_ids,image_id,license,height,width,flickr_url,coco_url,not_exhaustive_category_ids
0,2013-11-14 17:02:52,"[279, 899, 127, 180, 1136, 725, 663]",397133,4,427,640,http://farm7.staticflickr.com/6116/6255196340_...,http://images.cocodataset.org/val2017/00000039...,"[914, 801, 566, 139, 1021]"
1,2013-11-14 20:55:31,"[1002, 434, 924, 928, 951, 378, 1012, 867, 108...",37777,1,230,352,http://farm9.staticflickr.com/8429/7839199426_...,http://images.cocodataset.org/val2017/00000003...,"[181, 45]"
2,2013-11-14 22:32:02,"[194, 899, 248, 587, 201, 838, 1112, 1061, 337...",252219,4,428,640,http://farm4.staticflickr.com/3446/3232237447_...,http://images.cocodataset.org/val2017/00000025...,[948]
3,2013-11-14 23:11:37,"[149, 713, 654, 1112, 753, 579]",87038,1,480,640,http://farm8.staticflickr.com/7355/8825114508_...,http://images.cocodataset.org/val2017/00000008...,"[94, 1043]"
4,2013-11-14 23:16:55,"[22, 369, 176, 653, 89, 11, 753, 474]",174482,6,388,640,http://farm8.staticflickr.com/7020/6478877255_...,http://images.cocodataset.org/val2017/00000017...,[1112]
...,...,...,...,...,...,...,...,...,...
19804,2013-11-24 13:20:54,"[1014, 1148, 857, 80, 660, 142, 13]",125910,4,640,480,http://farm8.staticflickr.com/7392/9308795953_...,http://images.cocodataset.org/train2017/000000...,[1122]
19805,2013-11-24 20:54:28,"[782, 1126, 608, 857, 1053, 486, 380, 992, 270]",342307,3,375,500,http://farm4.staticflickr.com/3076/3160532038_...,http://images.cocodataset.org/train2017/000000...,[]
19806,2013-11-24 21:59:45,"[21, 857, 1008, 57, 800, 531, 15, 1202, 1120]",432062,1,433,640,http://farm4.staticflickr.com/3114/2305083105_...,http://images.cocodataset.org/train2017/000000...,[]
19807,2013-11-24 22:39:24,"[335, 1085, 672, 190, 489]",347405,3,427,640,http://farm6.staticflickr.com/5511/9585778258_...,http://images.cocodataset.org/train2017/000000...,[]


In [24]:
df_lvis = pd.merge(df_lvis, df_lvis_images, on="image_id")
df_lvis

Unnamed: 0,name,instance_count,def,synonyms,image_count,id,frequency,synset,original_class,dataset,...,image_id,bbox,date_captured,neg_category_ids,license,height,width,flickr_url,coco_url,not_exhaustive_category_ids
0,banana,50552,elongated crescent-shaped yellow fruit with so...,[banana],1787,45,f,banana.n.02,"{'original_class': 'banana', 'original_id': 45...",LVIS,...,480275,"[344.41, 134.46, 21.78, 17.91]",2013-11-19 18:49:01,"[29, 1128, 155, 36, 443, 68, 43, 895]",2,471,640,http://farm3.staticflickr.com/2740/4268400738_...,http://images.cocodataset.org/val2017/00000048...,"[1152, 45]"
1,banana,50552,elongated crescent-shaped yellow fruit with so...,[banana],1787,45,f,banana.n.02,"{'original_class': 'banana', 'original_id': 45...",LVIS,...,480275,"[449.72, 128.35, 18.13, 14.72]",2013-11-19 18:49:01,"[29, 1128, 155, 36, 443, 68, 43, 895]",2,471,640,http://farm3.staticflickr.com/2740/4268400738_...,http://images.cocodataset.org/val2017/00000048...,"[1152, 45]"
2,banana,50552,elongated crescent-shaped yellow fruit with so...,[banana],1787,45,f,banana.n.02,"{'original_class': 'banana', 'original_id': 45...",LVIS,...,480275,"[355.56, 130.84, 41.34, 16.32]",2013-11-19 18:49:01,"[29, 1128, 155, 36, 443, 68, 43, 895]",2,471,640,http://farm3.staticflickr.com/2740/4268400738_...,http://images.cocodataset.org/val2017/00000048...,"[1152, 45]"
3,banana,50552,elongated crescent-shaped yellow fruit with so...,[banana],1787,45,f,banana.n.02,"{'original_class': 'banana', 'original_id': 45...",LVIS,...,480275,"[435.31, 127.72, 18.53, 20.48]",2013-11-19 18:49:01,"[29, 1128, 155, 36, 443, 68, 43, 895]",2,471,640,http://farm3.staticflickr.com/2740/4268400738_...,http://images.cocodataset.org/val2017/00000048...,"[1152, 45]"
4,banana,50552,elongated crescent-shaped yellow fruit with so...,[banana],1787,45,f,banana.n.02,"{'original_class': 'banana', 'original_id': 45...",LVIS,...,480275,"[297.86, 171.27, 10.15, 4.89]",2013-11-19 18:49:01,"[29, 1128, 155, 36, 443, 68, 43, 895]",2,471,640,http://farm3.staticflickr.com/2740/4268400738_...,http://images.cocodataset.org/val2017/00000048...,"[1152, 45]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
240882,van,102,a recreational vehicle equipped for camping ou...,"[camper_(vehicle), camping_bus, motor_home]",40,191,c,camper.n.02,"{'original_class': 'camper_(vehicle)', 'origin...",LVIS,...,521221,"[110.08, 33.53, 156.22, 59.73]",2013-11-15 21:57:58,"[101, 501, 416, 591, 83, 31, 398, 491, 361, 252]",1,384,640,http://farm7.staticflickr.com/6223/6231820585_...,http://images.cocodataset.org/train2017/000000...,[191]
240883,van,102,a recreational vehicle equipped for camping ou...,"[camper_(vehicle), camping_bus, motor_home]",40,191,c,camper.n.02,"{'original_class': 'camper_(vehicle)', 'origin...",LVIS,...,521221,"[44.06, 45.0, 47.69, 22.51]",2013-11-15 21:57:58,"[101, 501, 416, 591, 83, 31, 398, 491, 361, 252]",1,384,640,http://farm7.staticflickr.com/6223/6231820585_...,http://images.cocodataset.org/train2017/000000...,[191]
240884,van,102,a recreational vehicle equipped for camping ou...,"[camper_(vehicle), camping_bus, motor_home]",40,191,c,camper.n.02,"{'original_class': 'camper_(vehicle)', 'origin...",LVIS,...,521221,"[6.77, 54.61, 46.28, 21.82]",2013-11-15 21:57:58,"[101, 501, 416, 591, 83, 31, 398, 491, 361, 252]",1,384,640,http://farm7.staticflickr.com/6223/6231820585_...,http://images.cocodataset.org/train2017/000000...,[191]
240885,clock,100,a clock mounted on a wall,[wall_clock],48,1154,c,wall_clock.n.01,"{'original_class': 'wall_clock', 'original_id'...",LVIS,...,233139,"[51.58, 172.71, 176.33, 173.26]",2013-11-24 11:49:20,"[661, 369, 259, 737, 1006, 740, 1079, 33, 494,...",4,427,640,http://farm8.staticflickr.com/7012/6449253229_...,http://images.cocodataset.org/val2017/00000023...,[]


In [25]:
def extract_coco_filename(url):
    return url[-16:]

In [26]:
df_lvis["file_name"] = df_lvis["coco_url"].apply(extract_coco_filename)

In [27]:
len(set(df_lvis["file_name"]))

19548

In [28]:
coco_filename_not_in_lvis = list(set(df_coco["file_name"].values) - set(df_lvis["file_name"].values))
print(len(coco_filename_not_in_lvis))

278


In [29]:
lvis_filename_not_in_coco = list(set(df_lvis["file_name"].values) - set(df_coco["file_name"].values))
print(len(lvis_filename_not_in_coco))

14827


In [30]:
# Filtro COCO prendendo le classi non presenti in LVIS
df_coco = df_coco[(df_coco["name"].isin(unique_coco_classes))] #| (df_coco["file_name"].isin(coco_filename_not_in_lvis))]
len(set(df_coco["name"].values))

43

In [31]:
df_coco["name"].value_counts()

wall          2363
sky           1611
tree          1407
building       978
grass          955
floor          943
pavement       873
road           651
window         602
fence          554
light          448
dirt           444
ceiling        415
door           352
plant          343
sea            291
remote         283
house          277
rug            266
cell phone     262
couch          261
tie            254
mountain       247
laptop         231
cardboard      229
shelf          225
sand           212
rock           178
snow           174
roof           169
furniture      162
keyboard       153
hot dog        127
railroad       119
platform       113
gravel         113
mouse          106
staircase       92
river           88
bridge          87
water           79
tent            67
net             53
Name: name, dtype: int64

In [32]:
df_lvis["name"].value_counts()

banana      9156
book        7022
carrot      3762
apple       3116
pole        2913
            ... 
cube           7
eggplant       7
sled           7
windmill       5
candy          3
Name: name, Length: 521, dtype: int64

In [33]:
#df_lvis[df_lvis["name"]=="person"]

# ADE20K

In [34]:
f = open('./ade20k/ade_validation.json')
df_ade = pd.DataFrame(json.load(f))
df_ade

Unnamed: 0,id_annotations,name,original_class,dataset,bbox,file_name,image_id,segmentation
0,0,sky,"{'original_class': 'sky', 'original_id': 0, 'd...",ADE20K,"[0, 0, 1278, 675]",ADE_val_00000001.jpg,ADE_val_00000001.jpg,"[0, 0, 0, 588, 37, 588, 65, 588, 81, 604, 90, ..."
1,1,tree,"{'original_class': 'trees', 'original_id': 1, ...",ADE20K,"[2, 577, 323, 735]",ADE_val_00000001.jpg,ADE_val_00000001.jpg,"[3, 683, 2, 584, 10, 583, 30, 583, 45, 577, 52..."
2,2,wall,"{'original_class': 'wall', 'original_id': 2, '...",ADE20K,"[0, 718, 654, 816]",ADE_val_00000001.jpg,ADE_val_00000001.jpg,"[3, 816, 119, 808, 244, 809, 339, 801, 390, 80..."
3,3,building,"{'original_class': 'building', 'original_id': ...",ADE20K,"[312, 87, 1275, 807]",ADE_val_00000001.jpg,ADE_val_00000001.jpg,"[760, 783, 654, 797, 653, 724, 322, 713, 312, ..."
4,4,dormer,"{'original_class': 'dormer', 'original_id': 4,...",ADE20K,"[802, 477, 915, 561]",ADE_val_00000001.jpg,ADE_val_00000001.jpg,"[861, 549, 915, 487, 854, 477, 802, 490, 808, ..."
...,...,...,...,...,...,...,...,...
70862,15,cliff,"{'original_class': 'cliff', 'original_id': 15,...",ADE20K,"[163, 0, 255, 256]",ADE_val_00001999.jpg,ADE_val_00001999.jpg,"[255, 0, 212, 0, 209, 2, 208, 8, 206, 9, 202, ..."
70863,1,stair,"{'original_class': 'steps', 'original_id': 1, ...",ADE20K,"[135, 193, 189, 239]",ADE_val_00002000.jpg,ADE_val_00002000.jpg,"[162, 239, 189, 226, 185, 200, 158, 193, 153, ..."
70864,2,platform,"{'original_class': 'platform', 'original_id': ...",ADE20K,"[50, 165, 351, 261]",ADE_val_00002000.jpg,ADE_val_00002000.jpg,"[50, 261, 51, 182, 78, 170, 80, 192, 225, 233,..."
70865,3,wall,"{'original_class': 'wall', 'original_id': 3, '...",ADE20K,"[50, 38, 175, 181]",ADE_val_00002000.jpg,ADE_val_00002000.jpg,"[50, 38, 172, 38, 175, 124, 175, 130, 175, 136..."


In [35]:
len(set(df_ade["file_name"]))

2000

# Union

In [36]:
df_coco = df_coco[["id", "name", "original_class", "image_id", "dataset", "bbox", "segmentation", "file_name"]].rename(columns={"id": "id_annotations"})
df_ade = df_ade[["id_annotations", "name", "original_class", "image_id", "dataset", "bbox", "segmentation", "file_name"]]
df_lvis = df_lvis[["id_annotations", "name", "original_class", "image_id", "dataset", "bbox", "segmentation", "file_name"]]
df_dataset_full = df_coco.append(df_lvis).append(df_ade)
df_dataset_full.groupby("dataset").size()

dataset
ADE20K     70867
COCO       17857
LVIS      240887
dtype: int64

In [37]:
f = open('../final_classes/final_vocabulary.json')
df_final_vocabulary = pd.DataFrame(json.load(f))[["class", "id"]]
df_dataset_full = pd.merge(df_dataset_full.rename(columns={"name": "class"}), df_final_vocabulary, on=["class"])
df_dataset_full["split"] = "validation"
df_dataset_full = df_dataset_full[["id_annotations", "class", "id", "dataset", "original_class", "image_id", "file_name", "bbox", "segmentation", "split"]]
df_dataset_full

Unnamed: 0,id_annotations,class,id,dataset,original_class,image_id,file_name,bbox,segmentation,split
0,4224910,wall,5,COCO,"{'original_class': 'wall-other-merged', 'origi...",139,000000000139.jpg,"[0, 0, 640, 358]",{'counts': '0j19a4c4_K]Ka4c4_K^K`4b4`K^Ka4a4^K...,validation
1,7897479,wall,5,COCO,"{'original_class': 'wall-other-merged', 'origi...",802,000000000802.jpg,"[0, 0, 424, 640]",{'counts': '0P\98hWGb0a\OXObb0Z2iN<D=l^OlLb`0T...,validation
2,4074010,wall,5,COCO,"{'original_class': 'wall-other-merged', 'origi...",885,000000000885.jpg,"[0, 0, 640, 255]",{'counts': '0n7]5ZHdJ\7]5`HhJ_7X5^HkJb7U5^HkJb...,validation
3,2637857,wall,5,COCO,"{'original_class': 'wall-other-merged', 'origi...",1000,000000001000.jpg,"[342, 32, 254, 207]",{'counts': 'Q^P52\=OTD2aNNY=1UD5POJY<1eD7dNM0O...,validation
4,2697775,wall,5,COCO,"{'original_class': 'wall-other-merged', 'origi...",1268,000000001268.jpg,"[387, 268, 157, 59]",{'counts': 'meQ51Z=102@MWC04:^<=M3N20000O1O2O0...,validation
...,...,...,...,...,...,...,...,...,...,...
322757,13,decoration,387,ADE20K,"{'original_class': 'decoration', 'original_id'...",ADE_val_00001477.jpg,ADE_val_00001477.jpg,"[89, 1, 132, 48]","[99, 41, 89, 25, 89, 12, 91, 4, 117, 1, 127, 1...",validation
322758,16,decoration,387,ADE20K,"{'original_class': 'decoration', 'original_id'...",ADE_val_00001529.jpg,ADE_val_00001529.jpg,"[118, 135, 131, 151]","[120, 135, 118, 140, 119, 148, 123, 151, 130, ...",validation
322759,27,decoration,387,ADE20K,"{'original_class': 'decoration', 'original_id'...",ADE_val_00001859.jpg,ADE_val_00001859.jpg,"[11, 7, 715, 312]","[715, 88, 530, 7, 14, 11, 11, 102, 715, 312]",validation
322760,3,pond,497,ADE20K,"{'original_class': 'pond', 'original_id': 3, '...",ADE_val_00001621.jpg,ADE_val_00001621.jpg,"[1, 506, 941, 958]","[15, 587, 151, 566, 279, 551, 381, 530, 458, 5...",validation


In [38]:
len(set(df_dataset_full["class"]))

662

In [39]:
set(df_dataset_full["class"].values) - set(df_final_vocabulary["class"].values)

set()

In [40]:
 set(df_final_vocabulary["class"].values) - set(df_dataset_full["class"].values)

{'bouquet', 'bush', 'hedge', 'shrub'}

check per vedere se ci sono classi al di fuori di quelle di partenza

In [41]:
f = open('../original_classes/VG-SGG-dicts-vgoi6-clipped.json')
 
original_classes = json.load(f)
print("original number of categories: ",len(original_classes["label_to_idx"]))

original number of categories:  1594


In [42]:
df_original_classes = pd.DataFrame({"class": original_classes["object_count"].keys(), "count": original_classes["object_count"].values()})
df_original_classes.head()

Unnamed: 0,class,count
0,window,53848
1,tree,50339
2,man,47202
3,person,43031
4,shirt,38238


In [43]:
set(df_dataset_full["class"].values) - set(df_original_classes["class"].values)

set()

Dubbi:
1. Come mi comporto con l'oversampling effettuto sul training? riduco anche sul validation le classi ridotte?
2. Mancano 4 classi nel validation, sistemo o lascio così? (ho rispettato la divisione originale dei datasets, bouquet proviene da Lvis mentre le altre da ADE20K)
3. Lvis e COCO hanno le immagini del validation e del train "mischiate" lascio così o seguo la divisione di Lvis (19548 immagini) che ha più immagini  quella di COCO (4618 immagini) che ne ha di meno ?
4. Come mi comporto per il test? uso quello usato da Mask R-CNN? non c'è un benchmark usato per valutare i modelli che estraggono le features