In [1]:
from pprint import pprint

import os
import json
import uuid

from nltk.tokenize import TreebankWordTokenizer

from signal_meta_data import Friend, Gender, Emotion, Triple, Mention, Modality
from signal_meta_data import OffsetSegment, BoundingBoxSegment, TimeSegment
from signal_meta_data import Scenario, ScenarioContext
from signal_meta_data import ImageSignal, TextSignal
from signal_meta_data import SpeakerAnnotation, UtteranceAnnotation
from signal_meta_data import append_signal, serializer

Define a time segments for the signals.

In [2]:
def scenario_data_path(scenario_id, file_name, modality=None, data_dir="data/test-scenes"):
    path = os.path.join(data_dir, str(scenario_id))
    if modality:
        path = os.path.join(path, modality.name.lower())
    
    return os.path.join(path, file_name)

def clean(scenario_id, data_dir="data/test-scenes"):
    # clean data
    path = scenario_data_path(scenario_id, "", data_dir=data_dir)
    json_files = (os.path.join(root, file)
             for root, _, files in os.walk(path)
             for file in files if file.endswith(".json"))

    for file in json_files:
        try:
            os.remove(file)
            print("Cleaned", file)
        except OSError:
            pass


In [3]:
scenario_id = "test_scenario"
clean(scenario_id)
scenario_time = TimeSegment(1603139000, 1603150000)

piek = Friend(None, "Piek", 59, Gender.MALE, None)
context = ScenarioContext("leolani", piek, [], [])
signals = {
    Modality.IMAGE.name.lower(): scenario_data_path(scenario_id, Modality.IMAGE.name.lower() + ".json"),
    Modality.TEXT.name.lower(): scenario_data_path(scenario_id, Modality.TEXT.name.lower() + ".json")
}
scenario = Scenario(scenario_id, scenario_time, context, signals)

print(json.dumps(scenario, default=serializer, indent=4))

scenario_path = scenario_data_path(scenario_id, str(scenario_id) + ".json")
os.makedirs(os.path.dirname(scenario_path), exist_ok=True)
with open(scenario_path, 'w') as scenario_file:
    json.dump(scenario, scenario_file, default=serializer, indent=4)


Cleaned data/test-scenes/test_scenario/test_scenario.json
Cleaned data/test-scenes/test_scenario/image.json
Cleaned data/test-scenes/test_scenario/text.json
{
    "id": "test_scenario",
    "time": {
        "end": 1603150000,
        "start": 1603139000
    },
    "context": {
        "agent": "leolani",
        "speaker": {
            "id": "5ae67b9c-dcde-4d37-bc86-ddd2fcbc05b4",
            "name": "Piek",
            "age": 59,
            "gender": "MALE",
            "emotion": null
        },
        "persons": [],
        "objects": []
    },
    "signals": {
        "image": "data/test-scenes/test_scenario/image.json",
        "text": "data/test-scenes/test_scenario/text.json"
    }
}


Define the speaker and his/her emotion.

In [4]:
speaker_img_file = scenario_data_path(scenario_id, "piek-1.jpg", Modality.IMAGE)
speaker_img_time = TimeSegment(1603139705, 1603140000)

piek = Friend(None, "Piek", 59, Gender.MALE, Emotion.JOY)
bounding_box_piek = BoundingBoxSegment(10,15,521,518)
speaker = SpeakerAnnotation(piek, bounding_box_piek)

speaker_img_signal = ImageSignal(None, speaker_img_time, [speaker_img_file], Emotion.HAPPINESS, [speaker], speaker)
print(json.dumps(speaker_img_signal, default=serializer, indent=4))
append_signal(scenario.signals[Modality.IMAGE.name.lower()], speaker_img_signal)

{
    "id": "6745fc57-c40b-410e-acae-17d7222ec497",
    "modality": "IMAGE",
    "time": {
        "end": 1603140000,
        "start": 1603139705
    },
    "files": [
        "data/test-scenes/test_scenario/image/piek-1.jpg"
    ],
    "speaker": {
        "person": {
            "id": "0e182be6-29e9-4f44-aae5-3eb22a2dc071",
            "name": "Piek",
            "age": 59,
            "gender": "MALE",
            "emotion": "JOY"
        },
        "segment": {
            "bounding_box": [
                10,
                15,
                521,
                518
            ]
        }
    },
    "emotion": "HAPPINESS"
}


Define the annotation that goes with a family picture

In [5]:
family_img_file = scenario_data_path(scenario_id, "pexels-victoria-borodinova-1648358.jpg", Modality.IMAGE)
family_img_time = TimeSegment(1603139840, 1603149860)

jim = Friend(None, token[1], 32, Gender.MALE, None)
family_img_signal = ImageSignal(None, family_img_time, [family_img_file], Emotion.HAPPINESS, [jim], piek)

print(json.dumps(family_img_signal, default=serializer, indent=4))
append_signal(scenario.signals[Modality.IMAGE.name.lower()], family_img_signal)

{
    "id": "f931bded-80cd-427f-936c-0ef12019af73",
    "modality": "IMAGE",
    "time": {
        "end": 1603149860,
        "start": 1603139840
    },
    "files": [
        "data/test-scenes/test_scenario/image/pexels-victoria-borodinova-1648358.jpg"
    ],
    "speaker": null,
    "emotion": "HAPPINESS"
}


Annotation of the communication about the picture(s)

In [6]:
def tokenize(text):
    offsets = tuple(TreebankWordTokenizer().span_tokenize(text))
    
    return tuple(((start, end), text[start:end]) for start, end in offsets)
    
pprint([t for t in enumerate(tokenize("That is my brother Jim"))])

[(0, ((0, 4), 'That')),
 (1, ((5, 7), 'is')),
 (2, ((8, 10), 'my')),
 (3, ((11, 18), 'brother')),
 (4, ((19, 22), 'Jim'))]


In [7]:
speaker = piek
utterance_time = TimeSegment(1603139850, 1603149890)

transcript = scenario_data_path(scenario_id, "chat1_utterance1.txt", Modality.TEXT)
utterance = "That is my brother Jim"
tokens = tokenize(utterance)

#referentImage = ()
token = tokens[4]
referent_jim = Friend(None, token[1], 32, Gender.MALE, None)
mention = Mention(OffsetSegment(4,5), tokens[4], referent_jim)

utterance_annotation = UtteranceAnnotation(None, None, utterance, tokens, piek,
                                           Emotion.JOY, [mention])

triples = (Triple.from_friends(str(speaker.id), "brother-of", str(referent_jim.id)),)

text_signal =  TextSignal(None, utterance_time, [transcript], [utterance_annotation], triples)

print(json.dumps(text_signal, default=serializer, indent=4))
append_signal(scenario.signals[Modality.TEXT.name.lower()], text_signal)

{
    "id": "2ec83de9-9701-44db-90cc-4df5ba41f93c",
    "modality": "TEXT",
    "time": {
        "end": 1603149890,
        "start": 1603139850
    },
    "files": [
        "data/test-scenes/test_scenario/text/chat1_utterance1.txt"
    ],
    "utterances": [
        {
            "chat_id": "9bdf5723-8bd1-4b57-b9a2-f61af07e6245",
            "utterance_id": "e419f966-6f7b-444b-aa30-378dbdeaf07a",
            "utterance": "That is my brother Jim",
            "tokens": [
                [
                    [
                        0,
                        4
                    ],
                    "That"
                ],
                [
                    [
                        5,
                        7
                    ],
                    "is"
                ],
                [
                    [
                        8,
                        10
                    ],
                    "my"
                ],
                [
                    [


In [8]:
for signal_file in scenario.signals.values():
    append_signal(signal_file, None, terminate=True)

### Convert to Leolani domain

In [9]:
from convert import convert, integrate_image_signal, integrate_text_signal

In [10]:
leolani_context = convert(scenario)
print(json.dumps(leolani_context, default=vars, indent=4))

{
    "_id": 59488721220420899366191374731082536200,
    "_name": "test_scenario",
    "_friends": [],
    "_chats": [],
    "_chat_start": null,
    "_chatting": false,
    "_people": {},
    "_current_people": [],
    "_objects": {
        "_object_observations": {}
    },
    "_intention": null,
    "_location": {
        "_id": 173329505591303661316720785455624394805,
        "_label": "Unknown",
        "_country": "Unknown",
        "_region": "Unknown",
        "_city": "Unknown"
    }
}


In [11]:
print(str([(topic, vars(ev)) for topic, ev in integrate_image_signal(speaker_img_signal)]))

[('pepper.framework.sensor.api.face_detector.topic', {'payload': Face(Piek, 100%), 'metadata': None})]


In [12]:
events, triples = integrate_text_signal(text_signal, leolani_context)
print(str([(topic, vars(ev)) for topic, ev in events]))
print()
print(triples)

[('pepper.framework.context.topic.chat_enter', {'payload': 'Piek', 'metadata': None}), ('pepper.framework.context.topic.chat_turn', {'payload': <'That is my brother Jim' [100.00%]>, 'metadata': None})]

(http://cltl.nl/leolani/friends/0e182be6-29e9-4f44-aae5-3eb22a2dc071_http://cltl.nl/combot/predicate/brother-of_http://cltl.nl/leolani/friends/668c04e8-259f-4325-b6b2-c791aead2434 [friend_->_friend]),)


In [13]:
print(json.dumps(leolani_context, default=vars, indent=4))

ValueError: Circular reference detected