In [1]:
import sys
import os
import pickle
import csv
import lmdb
import json

from io import BytesIO
import numpy as np

from rdflib import URIRef
from rdflib.namespace import ClosedNamespace


In [2]:
sys.path.append("/home/alireza/aida/tools/AIDA-Interchange-Format/python/aida_interchange")
from Bounding_Box import Bounding_Box
import aifutils


In [3]:
with open('../../../wsod/metadata/ont_300/300-classes.csv', 'r') as fin:
    all_labels = [item.strip() for item in fin]

In [4]:
with open('../../../../data/OpenImages/metadata-v4/class-names/class-descriptions.csv', 'r') as fin:
    mid2name = {item[0]: item[1] for item in csv.reader(fin)}

In [5]:
mid2name[all_labels[0]]

'Air force'

In [6]:
mid2term = {item: 'm_' + item.split('/')[-1] + '_' + mid2name[item].replace(' ', '_').replace('&', 'and') for item in all_labels}

In [7]:
VIS_ONTOLOGY = ClosedNamespace(
    uri=URIRef("http://www.columbia.edu/AIDA/DVMM/Ontologies/OpenImagesSubset02#"),
    terms=[mid2term[item] for item in all_labels]
)


In [8]:
mid_to_ere = {}
mid_is_event = {}
with open('../../../wsod/metadata/ont_300/ere_link.csv', 'r') as fin:
    flag = False
    for row in csv.reader(fin):
        if not flag:
            flag=True
            continue
        if row[0] == '':
            continue
        if row[1] != '':
            mid_to_ere[row[0]] = row[1]
        if row[2] == 'Entity':
            mid_is_event[row[0]] = False
        elif row[2] == 'Event':
            mid_is_event[row[0]] = True
        else:
            print(row)
            raise Exception("Neither Yes nor No")

In [9]:
ere_terms = np.unique(list(mid_to_ere.values()) + ['Commodity'])

In [10]:
ere_terms

array(['Commodity', 'Conflict.Attack', 'Conflict.Demonstrate',
       'Contact.Meet', 'Facility', 'Life.Die', 'Life.Injure',
       'Life.Marry', 'Movement.TransportArtifact',
       'Movement.TransportPerson', 'Organization', 'Person', 'Vehicle',
       'Weapon'], dtype='<U26')

In [11]:
SEEDLING_ONTOLOGY = ClosedNamespace(
    uri=URIRef("https://tac.nist.gov/tracks/SM-KBP/2018/ontologies/SeedlingOntology#"),
    terms=ere_terms
)


In [12]:
with open('../../results/det_results_postproc_17.pkl', 'rb') as fin:
    det_results_jpg = pickle.load(fin)

with open('../../results/det_results_postproc_19.pkl', 'rb') as fin:
    det_results_vid = pickle.load(fin)
    


In [13]:
with open('../../temp/background_jpg_imgsize_1.pkl', 'rb') as fin:
    image_shape = pickle.load(fin)
with open('../../temp/background_keyframe_imgsize_2.pkl', 'rb') as fin:
    image_shape.update(pickle.load(fin))

In [14]:
# lmdb_path = '/home/alireza/aida/export/Hassan/background/instance_features_keyframe_background.lmdb'
# env = lmdb.open(lmdb_path, map_size=1e10, readonly=True, lock=False)
# txn = env.begin(write=False)
# cursor = txn.cursor()

In [15]:
root_to_leaf = {}
leaf_to_root = {}
flag = False
with open('../../../../data/background/parent_children.tab', 'r') as fin:
    for line in fin:
        if not flag:
            flag = True
            continue
        row = line.split()
        if root_to_leaf.get(row[2]) == None:
            root_to_leaf[row[2]] = []
        root_to_leaf[row[2]].append(row[3])
        
        leaf_to_root[row[3]] = row[2]

In [16]:
def add_detections_to_graph(g, detections, imgid, is_keyframe):
    
    if is_keyframe:
        vidid = imgid.split('_')[0]
    
    for ii, det in enumerate(detections):
        label = det['label']
        score = det['score']
        bbox = det['bbox']
        model = det['model']

        if label not in mid_is_event:
            continue

        if score < 0.10:
            continue

        if model == 'coco':
            sys = system_co
        elif model == 'pascal':
            sys = system_pa
        elif model == 'oi':
            sys = system_oi
        elif model == 'ws':
            sys = system_ws
        elif model == 'coco/J':
            sys = system_co
        elif model == 'pascal/J':
            sys = system_pa
        elif model == 'oi/J':
            sys = system_oi
        elif model == 'ws/J':
            sys = system_ws
        else:
            raise

        labelrdf_fringe = VIS_ONTOLOGY.term(mid2term[label])

        if mid_to_ere.get(label) != None:
            labelrdf_ere = SEEDLING_ONTOLOGY.term(mid_to_ere[label])
        else:
            labelrdf_ere = SEEDLING_ONTOLOGY.term('Commodity')

            
        if not mid_is_event[label]:            

            eid = f"http://www.columbia.edu/AIDA/DVMM/Entities/ObjectDetection/RUN00003/{imgid}/{ii}"

            entity = aifutils.make_entity(g, eid, sys)
            
            entity_dict[eid] = entity

            type_assertion = aifutils.mark_type(g, eid + "/ere", entity, labelrdf_ere, sys, score)

            bb = Bounding_Box((bbox[0], bbox[1]), (bbox[2], bbox[3]))
            
            if is_keyframe:
                aifutils.mark_keyframe_video_justification(g, [entity, type_assertion], vidid, imgid, bb, sys, score)
            else:
                aifutils.mark_image_justification(g, [entity, type_assertion], imgid, bb, sys, score)

            aifutils.mark_private_data(g, entity, json.dumps({
                'columbia_fringe_entity_type': str(labelrdf_fringe),
                #'columbia_vector_grounding_v1.0': np.frombuffer(txn.get(f"{imgid}/{ii}".encode('utf-8')), dtype='float32').tolist(),
                #'columbia_vector_insmatch_v1.0': np.frombuffer(txn.get(f"{imgid}/{ii}".encode('utf-8')), dtype='float32').tolist()
            }), sys)

        else:            

            eid = f"http://www.columbia.edu/AIDA/DVMM/Events/ObjectDetection/RUN00003/{imgid}/{ii}"

            event = aifutils.make_event(g, eid, sys)

            event_dict[eid] = event

            type_assertion = aifutils.mark_type(g, eid + "/ere", event, labelrdf_ere, sys, score)

            bb = Bounding_Box((0, 0), image_shape[imgid])

            if is_keyframe:
                aifutils.mark_keyframe_video_justification(g, [event, type_assertion], vidid, imgid, bb, sys, score)
            else:
                aifutils.mark_image_justification(g, [event, type_assertion], imgid, bb, sys, score)

            
            aifutils.mark_private_data(g, event, json.dumps({
                'columbia_fringe_event_type': str(labelrdf_fringe),
                #'columbia_vector_grounding_v1.0': np.frombuffer(txn.get(f"{imgid}/{ii}".encode('utf-8')), dtype='float32').tolist(),
                #'columbia_vector_insmatch_v1.0': np.frombuffer(txn.get(f"{imgid}/{ii}".encode('utf-8')), dtype='float32').tolist()
            }), sys)
            

In [17]:
id_set_jpg = set([item[:-9] for item in det_results_jpg])
id_set_vid = set()
for imgid in det_results_vid:
    vidid = imgid.split('_')[0]
    id_set_vid.add(vidid)

In [18]:
kb_dict = {}
entity_dict = {}
event_dict = {}

for root_doc in root_to_leaf:

    g = aifutils.make_graph()

    system_pa = aifutils.make_system_with_uri(g, "http://www.columbia.edu/AIDA/USC/Systems/ObjectDetection/FasterRCNN/Pascal")
    system_co = aifutils.make_system_with_uri(g, "http://www.columbia.edu/AIDA/DVMM/Systems/ObjectDetection/FasterRCNN-NASNet/COCO")
    system_oi = aifutils.make_system_with_uri(g, "http://www.columbia.edu/AIDA/DVMM/Systems/ObjectDetection/FasterRCNN-InceptionResNet/OpenImages")
    system_ws = aifutils.make_system_with_uri(g, "http://www.columbia.edu/AIDA/DVMM/Systems/ObjectDetection/MITWeaklySupervised-ResNet/OpenImages")
        
    for imgid in id_set_jpg & set(root_to_leaf[root_doc]):
        add_detections_to_graph(g, det_results_jpg[imgid + '.jpg.ldcc'], imgid + '.jpg.ldcc', is_keyframe=False)
        
    for imgid in det_results_vid:
        vidid = imgid.split('_')[0]
        if vidid in root_to_leaf[root_doc]:
            add_detections_to_graph(g, det_results_vid[imgid], imgid, is_keyframe=True)        

    kb_dict[root_doc] = g


In [19]:
with open('../../results/aida_output_26.pkl', 'wb') as fout:
    pickle.dump((kb_dict, entity_dict, event_dict), fout)

In [20]:
export_dir = '../../results/aida_output_26'
if not os.path.isdir(export_dir):
    os.makedirs(export_dir)
for root_doc, g in kb_dict.items():
    with open(os.path.join(export_dir, root_doc+'.ttl'), 'w') as fout:
        serialization = BytesIO()
        g.serialize(destination=serialization, format='turtle')
        fout.write(serialization.getvalue().decode('utf-8'))
