# データ読み込み

In [2]:
# アクティビティ内のevent取得
import re
import pandas as pd
from SPARQLWrapper import SPARQLWrapper
from lib.episodes_loader import load_episodes_from_json
from lib.convert_answers import jsonify_answers

REPLACE_INSTANCE = "http://kgrc4si.home.kg/virtualhome2kg/instance/"
REPLACE_ONTOLOGY = "http://kgrc4si.home.kg/virtualhome2kg/ontology/"

sparql = SPARQLWrapper(endpoint='https://morita.it.aoyama.ac.jp/homekg/sparql',
                        returnFormat='json')
# 削除するurlの定義
url_instance = "http://kgrc4si.home.kg/virtualhome2kg/instance/"
url_action = "http://kgrc4si.home.kg/virtualhome2kg/ontology/action/"
# event番号取得のパターン
event_num_pattern = re.compile(r'event([0-9]*)')

# jsonファイルからエピソードデータを読み込む
episodes = load_episodes_from_json("./data/Episodes.json")
"""
jsonファイルの場合，読み込まれるデータは以下の通り

id: エピソードのid 本番ではSenarioに該当
title: Day1など 本番では Morning Ritualsなど
scene: sceneのidのみが入る
activities: アクティビティの配列

"""

'\njsonファイルの場合，読み込まれるデータは以下の通り\n\nid: エピソードのid 本番ではSenarioに該当\ntitle: Day1など 本番では Morning Ritualsなど\nscene: sceneのidのみが入る\nactivities: アクティビティの配列\n\n'

# 関数定義

In [7]:

df = pd.DataFrame()
section_count = 0
columns = ["scene", "event", "action", "place", "obj", "duration"]

for i, episode in enumerate(episodes):

    print(f"{episode['id']}")

    # それぞれのeventでsparqlを発行
    for activity in episode["activities"]:
        activity_name = (activity + "_scene" + str(episode["scene"])).strip()
        sparql.setQuery(f"""
        PREFIX ex: <http://kgrc4si.home.kg/virtualhome2kg/instance/>
        PREFIX : <http://kgrc4si.home.kg/virtualhome2kg/ontology/>
        PREFIX ac: <http://kgrc4si.home.kg/virtualhome2kg/ontology/action/>
        PREFIX ho: <http://www.owl-ontologies.com/VirtualHome.owl#>
        PREFIX time: <http://www.w3.org/2006/time#>
        select DISTINCT * where {{
            ex:{activity_name} :hasEvent ?event .
            ?event :action ?action .
            ?event (:mainObject|:targetObject) ?object .
            ?event :place ?place .
            ?event :time ?time.
            ?time time:numericDuration ?duration.
        }}
        """)
        results = sparql.query().convert()['results']['bindings']
        section_count += 1
        
        # 結果をまとめる
        # results_outerloop = []
        scenes = []
        days = []
        senarios = []
        events = []
        actions = []
        places = []
        objs = []
        durations = []

        for result in results:
            scenes.append(episode["scene"])
            days.append(episode["title"])
            senarios.append(episode["id"])
            events.append(result["event"]["value"].replace(REPLACE_INSTANCE, ""))
            actions.append(result["action"]["value"].replace(REPLACE_ONTOLOGY + "action/", ""))
            places.append(result["place"]["value"].replace(REPLACE_INSTANCE, ""))
            objs.append(result["object"]["value"].replace(REPLACE_INSTANCE, ""))
            durations.append(result["duration"]["value"])
        
        df_results_outerloop = pd.DataFrame(columns=columns)
        df_results_outerloop["scene"] = scenes
        df_results_outerloop["day"] = days
        df_results_outerloop["senario"] = senarios
        df_results_outerloop["event"] = events
        df_results_outerloop["action"] = actions
        df_results_outerloop["place"] = places
        df_results_outerloop["obj"] = objs
        df_results_outerloop["duration"] = durations
        df = pd.concat([df, df_results_outerloop], ignore_index=True)

scene1_Day1
scene1_Day10
scene1_Day2
scene1_Day3
scene1_Day4
scene1_Day5
scene1_Day6
scene1_Day7
scene1_Day8
scene1_Day9
scene2_Day1
scene2_Day10
scene2_Day2
scene2_Day3
scene2_Day4
scene2_Day5
scene2_Day6
scene2_Day7
scene2_Day8
scene2_Day9
scene3_Day1
scene3_Day10
scene3_Day2
scene3_Day3
scene3_Day4
scene3_Day5
scene3_Day6
scene3_Day7
scene3_Day8
scene3_Day9
scene4_Day1
scene4_Day10
scene4_Day2
scene4_Day3
scene4_Day4
scene4_Day5
scene4_Day6
scene4_Day7
scene4_Day8
scene4_Day9
scene5_Day1
scene5_Day10
scene5_Day2
scene5_Day3
scene5_Day4
scene5_Day5
scene5_Day6
scene5_Day7
scene5_Day8
scene5_Day9
scene6_Day1
scene6_Day10
scene6_Day2
scene6_Day3
scene6_Day4
scene6_Day5
scene6_Day6
scene6_Day7
scene6_Day8
scene6_Day9
scene7_Day1
scene7_Day10
scene7_Day2
scene7_Day3
scene7_Day4
scene7_Day5
scene7_Day6
scene7_Day7
scene7_Day8
scene7_Day9


In [8]:
scenes = ["scene1", "scene2", "scene3", "scene4", "scene5", "scene6", "scene7",]

In [9]:
def get_h_m_s(sec):
    m, s = divmod(sec, 60)
    h, m = divmod(m, 60)

    time = ""
    if 10 > h:
        time += "0"
    time += str(int(h)) + "h-"
    if 10 > m:
        time += "0"
    time += str(int(m)) + "m-"
    if 10 > s:
        time += "0"
    time += str(int(s)) + "s"
    return time


def make_time(df):

    duration_total = 0.0
    scene_tmp = ""

    for i, item in df.iterrows():
        if i == 0:
            scene_tmp = item["scene"]
            df.loc[i, "duration_total"] = 0.0
            continue

        if scene_tmp != item["scene"]:
            scene_tmp = item["scene"]
            duration_total = 0.0
            df.loc[i, "duration_total"] = 0.0
            continue
        
        duration_total += float(df.loc[i-1, "duration"])
        df.loc[i, "duration_total"] = duration_total
    
    for i, item in df.iterrows():
        df.loc[i, "time"] = get_h_m_s(float(item["duration_total"]))

    return df


def remove_suffix(df):
    for i, item in df.iterrows():
        obj = item["obj"]
        obj = obj.replace("_scene", "")
        obj = re.sub("[0-9]", "", obj)
        df.loc[i, "obj"] = obj
        
        event = item["event"]
        event = event.replace("_scene", "")
        event = re.sub("[0-9]", "", event)
        df.loc[i, "event"] = event
        
        place = item["place"]
        place = place.replace("_scene", "")
        place = re.sub("[0-9]", "", place)
        df.loc[i, "place"] = place
    return df


def get_action(df, action_name, columns):
    return df[df["action"]==action_name][columns]

# 関数実行

In [10]:
df_ = make_time(df)
df_ = remove_suffix(df_)
results = get_action(df_, "grab", ["scene", "day", "senario", "event", "place","obj", "time"])

# 結果出力

In [44]:
results[["time", "place", "obj"]]

Unnamed: 0,time,place,obj
3,00h-00m-16s,bedroom,pillow
9,00h-00m-53s,bathroom,towel
15,00h-01m-51s,bedroom,slippers
19,00h-02m-10s,bathroom,towel
28,00h-03m-00s,kitchen,alcohol
...,...,...,...
7863,01h-47m-56s,livingroom,book
7864,01h-47m-59s,livingroom,book
7867,01h-48m-14s,kitchen,creamybuns
7874,01h-48m-40s,kitchen,milk
