In [1]:
from pprint import pprint

import os
import time
import json
import uuid

from nltk.tokenize import TreebankWordTokenizer

from representation.container import TemporalContainer, Ruler, TemporalRuler
from representation.entity import Friend, Person, Object, Gender, Emotion
from representation.util import Identifier, serializer
from representation.entity import Friend, Gender, Emotion
from representation.scenario import Scenario, ScenarioContext, Modality, Mention
from representation.scenario import ImageSignal, TextSignal, append_signal
from representation.mention import UtteranceAnnotation, PersonAnnotation, Token, Triple
from representation.util import serializer

Define a time segments for the signals.

In [2]:
def scenario_data_path(scenario_id, file_name, modality=None, data_dir="data/test-scenes"):
    path = os.path.join(data_dir, str(scenario_id))
    if modality:
        path = os.path.join(path, modality.name.lower())
    
    return os.path.join(path, file_name)

def clean(scenario_id, data_dir="data/test-scenes"):
    # clean data
    path = scenario_data_path(scenario_id, "", data_dir=data_dir)
    json_files = (os.path.join(root, file)
             for root, _, files in os.walk(path)
             for file in files if file.endswith(".json"))

    for file in json_files:
        try:
            os.remove(file)
            print("Cleaned", file)
        except OSError:
            pass


In [3]:
scenario_id = "test_scenario"
clean(scenario_id)

piek = Friend(None, "Piek", 59, Gender.MALE, None)
context = ScenarioContext("leolani", piek, [], [])
signals = {
    Modality.IMAGE.name.lower(): scenario_data_path(scenario_id, Modality.IMAGE.name.lower() + ".json"),
    Modality.TEXT.name.lower(): scenario_data_path(scenario_id, Modality.TEXT.name.lower() + ".json")
}
scenario = Scenario(scenario_id, 1603139000, 1603150000, context, signals)

print(json.dumps(scenario, default=serializer, indent=4))

scenario_path = scenario_data_path(scenario_id, str(scenario_id) + ".json")
os.makedirs(os.path.dirname(scenario_path), exist_ok=True)
with open(scenario_path, 'w') as scenario_file:
    json.dump(scenario, scenario_file, default=serializer, indent=4)


Cleaned data/test-scenes/test_scenario/test_scenario.json
Cleaned data/test-scenes/test_scenario/image.json
Cleaned data/test-scenes/test_scenario/text.json
{
    "start_time": 1603139000,
    "end_time": 1603150000,
    "id": "test_scenario",
    "ruler": {
        "type": "TemporalRuler",
        "container_id": "test_scenario",
        "start": 1603139000,
        "end": 1603150000
    },
    "context": {
        "agent": "leolani",
        "speaker": {
            "id": "902ddf2c-64b1-4cd0-8fed-e7690f1b8e05",
            "name": "Piek",
            "age": 59,
            "gender": "MALE",
            "emotion": null
        },
        "persons": [],
        "objects": []
    },
    "signals": {
        "image": "data/test-scenes/test_scenario/image.json",
        "text": "data/test-scenes/test_scenario/text.json"
    }
}


Define the speaker and his/her emotion.

In [4]:
speaker_img_file = scenario_data_path(scenario_id, "piek-1.jpg", Modality.IMAGE)
speaker_img_time = scenario.ruler.get_time_segment(1603139705, 1603140000)

speaker_img_signal = ImageSignal(None, speaker_img_time, [speaker_img_file], ((0, 550), (0, 550)))
speaker_bounding_box = speaker_img_signal.ruler.get_area_bounding_box(10,15,521,518)
piek = Friend(None, "Piek", 59, Gender.MALE, Emotion.JOY)
speaker = PersonAnnotation(piek, speaker_bounding_box, "face_recognition", time.time())
speaker_img_signal.mentions.append(speaker)


print(json.dumps(speaker_img_signal, default=serializer, indent=4))
append_signal(scenario.signals[Modality.IMAGE.name.lower()], speaker_img_signal)

{
    "modality": "IMAGE",
    "time": {
        "type": "TemporalRuler",
        "container_id": "test_scenario",
        "start": 1603139705,
        "end": 1603140000
    },
    "files": [
        "data/test-scenes/test_scenario/image/piek-1.jpg"
    ],
    "mentions": [
        {
            "type": "PersonAnnotation",
            "segment": {
                "type": "MultiIndex",
                "container_id": "66fafca3-2c14-4b69-84b7-5fdaa06ffe0a",
                "bounds": [
                    [
                        10,
                        521
                    ],
                    [
                        15,
                        518
                    ]
                ]
            },
            "source": "face_recognitions",
            "timestamp": 1604697979.097769,
            "person": {
                "id": "7cdd5e66-1143-418a-95d8-bcc0fa837af2",
                "name": "Piek",
                "age": 59,
                "gender": "MALE",
            

In [5]:
def tokenize(text):
    offsets = tuple(TreebankWordTokenizer().span_tokenize(text))
    
    return tuple(((start, end), text[start:end]) for start, end in offsets)
    
pprint([t for t in enumerate(tokenize("That is my brother Jim"))])

[(0, ((0, 4), 'That')),
 (1, ((5, 7), 'is')),
 (2, ((8, 10), 'my')),
 (3, ((11, 18), 'brother')),
 (4, ((19, 22), 'Jim'))]


In [6]:
speaker = piek

transcript = scenario_data_path(scenario_id, "chat1_utterance1.txt", Modality.TEXT)
with open(transcript) as f:
    utterance = f.readline()

utterance_time = scenario.ruler.get_time_segment(1603139850, 1603149890)
text_signal =  TextSignal(None, utterance_time, [transcript], len(utterance))

tokens = tokenize(utterance)
token_annotations = [Token(token[1], text_signal.ruler.get_offset(*token[0]), "treebank_tokenizer", time.time())
                     for token in tokens]
text_signal.mentions.extend(token_annotations)

utterance_annotation = UtteranceAnnotation(None, None, utterance, token_annotations, piek, Emotion.JOY, "annotator_1", time.time())
text_signal.mentions.append(utterance_annotation)

token = tokens[4]
jim = Friend(None, token[1], 32, Gender.MALE, None)
person_annotation = PersonAnnotation(jim, utterance_annotation.ruler.get_offset(4,4), "annotator_1", time.time())
text_signal.mentions.append(person_annotation)

utt_ruler = utterance_annotation.ruler
triple_segments = (utt_ruler.get_offset(2,2), utt_ruler.get_offset(3,3), utt_ruler.get_offset(4,4))
triples = (Triple.from_friends(triple_segments, str(speaker.id), "brother-of", str(jim.id), "annotator_1", time.time()),)

print(json.dumps(text_signal, default=serializer, indent=4))
append_signal(scenario.signals[Modality.TEXT.name.lower()], text_signal)

{
    "modality": "TEXT",
    "time": {
        "type": "TemporalRuler",
        "container_id": "test_scenario",
        "start": 1603139850,
        "end": 1603149890
    },
    "files": [
        "data/test-scenes/test_scenario/text/chat1_utterance1.txt"
    ],
    "mentions": [
        {
            "type": "Token",
            "segment": {
                "type": "Index",
                "container_id": "9b3b792c-797c-44a2-a340-9f64cd532c67",
                "start": 0,
                "stop": 4
            },
            "source": "treebank_tokenizer",
            "timestamp": 1604697979.1308181,
            "value": "That",
            "id": "0ee3d4f0-de33-4074-961f-4c8719e56012",
            "ruler": {
                "type": "AtomicRuler",
                "container_id": "0ee3d4f0-de33-4074-961f-4c8719e56012"
            }
        },
        {
            "type": "Token",
            "segment": {
                "type": "Index",
                "container_id": "9b3b792c-797c-4

Define the annotation that goes with a family picture

In [7]:
family_img_file = scenario_data_path(scenario_id, "pexels-victoria-borodinova-1648358.jpg", Modality.IMAGE)
family_img_time = scenario.ruler.get_time_segment(1603139840, 1603149860)

family_img_signal = ImageSignal(None, family_img_time, [family_img_file], ((0, 3456), (0, 5184)))
jim_bounding_box = family_img_signal.ruler.get_area_bounding_box(450,1280,1200,1900)

jim_annotation = PersonAnnotation(jim, jim_bounding_box, "face_recognition", time.time())
family_img_signal.mentions.append(jim_annotation)

print(json.dumps(family_img_signal, default=serializer, indent=4))
append_signal(scenario.signals[Modality.IMAGE.name.lower()], family_img_signal)

{
    "modality": "IMAGE",
    "time": {
        "type": "TemporalRuler",
        "container_id": "test_scenario",
        "start": 1603139840,
        "end": 1603149860
    },
    "files": [
        "data/test-scenes/test_scenario/image/pexels-victoria-borodinova-1648358.jpg"
    ],
    "mentions": [
        {
            "type": "PersonAnnotation",
            "segment": {
                "type": "MultiIndex",
                "container_id": "87505005-be32-40b9-90bd-8d175039be91",
                "bounds": [
                    [
                        450,
                        1200
                    ],
                    [
                        1280,
                        1900
                    ]
                ]
            },
            "source": "face_recognition",
            "timestamp": 1604697979.1626801,
            "person": {
                "id": "50767d79-1ac2-422a-a278-56e5daaccbd2",
                "name": "Jim",
                "age": 32,
              

Annotation of the communication about the picture(s)

In [8]:
for signal_file in scenario.signals.values():
    append_signal(signal_file, None, terminate=True)

### Convert to Leolani domain

In [9]:
# from integration.convert import convert, integrate_image_signal, integrate_text_signal

In [10]:
# leolani_context = convert(scenario)
# print(json.dumps(leolani_context, default=vars, indent=4))

In [11]:
# print(str([(topic, vars(ev)) for topic, ev in integrate_image_signal(speaker_img_signal)]))

In [12]:
# events, triples = integrate_text_signal(text_signal, leolani_context)
# print(str([(topic, vars(ev)) for topic, ev in events]))
# print()
# print(triples)

In [13]:
# print(json.dumps(leolani_context, default=vars, indent=4))