In [1]:
import numpy as np
from utils import key_frame_detection, generate_scene_graph
from dataloader import read_bag, load_pickle
from llm_engine import NarrationEngine
from rosbags.typesys import Stores, get_typestore, get_types_from_msg
import pickle
import json
import matplotlib.pyplot as plt 
from os import listdir
from os.path import isfile, join

with open('config/constraints_summary.txt', 'r') as f:
    constraints = f.read()

ModuleNotFoundError: No module named 'dataloader'

In [2]:
smach_container_status_msg = '''
std_msgs/Header header
string path
string[] initial_states
string[] active_states
uint8[] local_data
string info
'''

smach_container_structure_msg = '''
std_msgs/Header header
string path
string[] children
string[] internal_outcomes
string[] outcomes_from
string[] outcomes_to
string[] container_outcomes
'''

add_types = {}
add_types.update(get_types_from_msg(smach_container_status_msg, 'smach_msgs/msg/SmachContainerStatus'))
add_types.update(get_types_from_msg(smach_container_structure_msg, 'smach_msgs/msg/SmachContainerStructure'))

typestore = get_typestore(Stores.ROS2_HUMBLE)
typestore.register(add_types)

camera_info = {'cx': 385.22119140625,
               'cy': 640.2664794921875,
               'fx': 907.1741333007812,
               'fy': 907.033447265625}

In [3]:
task = 'cup'
vocab_cup = ['cup', 'table', 'chair', 'person', 'sofa', 'stool', 'coffee table', 'gripper', 'sink']
task_name = 'Pick and place the cup in the sink'
task_desc = 'The robot should navigate to the table with dirty cup on it, pick up the cup, find the sink, navigate to the sink and place the cup in the sink.'

task = 'microwave'
vocab_cup = ['microwave', 'table', 'chair', 'person', 'sofa', 'stool', 'microwave door', 'microwave handle', 'gripper', 'sink']
task_name = 'Microwave the lunchbox'
task_desc = ''

### Load data

In [8]:
def vis(data):
    for keyframe in range(len(data['rgb'])):
        if log:
            print('frame:', keyframe)
            print('state:', data['state'][keyframe])
            print('state history:', data['state_history'][keyframe])
            print('odom:', data['odom'][keyframe])
            print('joint state:', data['joint_state'][keyframe])
            plt.imshow(data['rgb'][keyframe])

In [9]:
path = '../data/keyframes/keyframes_only/'+task+'/'
files = sorted([f for f in listdir(path) if isfile(join(path, f)) and '.pkl' in f])
print(len(files))

32


### RONAR

In [10]:
narr_engine = NarrationEngine(model='gpt-4o')

#### Env 

In [13]:
for file_name in files:
    if '.json' not in file_name:
        print('File Name:', file_name)
        file_path = path + file_name
        data = load_pickle(file_path)
        data['camera_info'] = camera_info
        env_summary = []
        for i in range(len(data['rgb'])):
            print('Key Frame:', str(i))
            keyframe = {'rgb': data['rgb'][i], 
                        'depth': data['depth'][i], 
                        'odom': data['odom'][i], 
                        'joint_state': data['joint_state'][i], 
                        'camera_info': data['camera_info']}
            
            env_rgbd_summary = narr_engine.summarize_env_rgbd(keyframe, vocab_list=vocab_cup)
            env_summary.append(env_rgbd_summary)
        with open(file_path.replace('.pkl', '_env_sum_35_turbo.json'), 'w') as f:
            json.dump({'env_sum': env_summary}, f)

File Name: rosbag2_2024_05_15-01_44_42.pkl
Key Frame: 0

0: 1280x704 2 chairs, 1 sofa, 22.9ms
Speed: 3.1ms preprocess, 22.9ms inference, 0.5ms postprocess per image at shape (1, 3, 1280, 704)

0: 1280x704 54 objects, 36.9ms
Speed: 1.8ms preprocess, 36.9ms inference, 1.8ms postprocess per image at shape (1, 3, 1280, 704)

0: 1280x704 54 objects, 36.7ms
Speed: 1.9ms preprocess, 36.7ms inference, 2.0ms postprocess per image at shape (1, 3, 1280, 704)

0: 1280x704 54 objects, 36.7ms
Speed: 2.2ms preprocess, 36.7ms inference, 1.9ms postprocess per image at shape (1, 3, 1280, 704)
Key Frame: 1

0: 1280x704 2 chairs, 1 sofa, 1 coffee table, 22.9ms
Speed: 1.8ms preprocess, 22.9ms inference, 0.5ms postprocess per image at shape (1, 3, 1280, 704)

0: 1280x704 52 objects, 36.7ms
Speed: 1.8ms preprocess, 36.7ms inference, 1.8ms postprocess per image at shape (1, 3, 1280, 704)

0: 1280x704 52 objects, 36.1ms
Speed: 2.2ms preprocess, 36.1ms inference, 1.9ms postprocess per image at shape (1, 3, 1280

#### Internal

In [11]:
internal_summaries = []
for file_name in files:
    if '.json' not in file_name:
        print('File Name:', file_name)
        file_path = path + file_name
        data = load_pickle(file_path)
        data['camera_info'] = camera_info
        env_summary = []
        for i in range(len(data['rgb'])):
            print('Key Frame:', str(i))
            keyframe = {'rgb': data['rgb'][i], 
                        'depth': data['depth'][i], 
                        'odom': data['odom'][i], 
                        'joint_state': data['joint_state'][i], 
                        'camera_info': data['camera_info']}
            internal_summary = narr_engine.summarize_internal(keyframe)
            internal_summaries.append(internal_summary)
            with open(file_path.replace('.pkl', '_internal_sum.json'), 'w') as f:
                json.dump({'internal_sum': internal_summaries}, f)

File Name: rosbag2_2024_05_15-01_10_46.pkl
Key Frame: 0
Key Frame: 1
Key Frame: 2
Key Frame: 3
Key Frame: 4
Key Frame: 5
Key Frame: 6
Key Frame: 7
Key Frame: 8
Key Frame: 9
Key Frame: 10
Key Frame: 11
Key Frame: 12
Key Frame: 13
Key Frame: 14
Key Frame: 15
Key Frame: 16
Key Frame: 17
Key Frame: 18
Key Frame: 19
Key Frame: 20
Key Frame: 21
Key Frame: 22
Key Frame: 23
Key Frame: 24
Key Frame: 25
Key Frame: 26
Key Frame: 27
Key Frame: 28
Key Frame: 29
Key Frame: 30
Key Frame: 31
Key Frame: 32
Key Frame: 33
Key Frame: 34
Key Frame: 35
Key Frame: 36
Key Frame: 37
Key Frame: 38
Key Frame: 39
Key Frame: 40
Key Frame: 41
Key Frame: 42
Key Frame: 43
Key Frame: 44
Key Frame: 45
File Name: rosbag2_2024_05_15-01_19_23.pkl
Key Frame: 0
Key Frame: 1
Key Frame: 2
Key Frame: 3
Key Frame: 4
Key Frame: 5
Key Frame: 6
Key Frame: 7
Key Frame: 8
Key Frame: 9
Key Frame: 10
Key Frame: 11
Key Frame: 12
Key Frame: 13
Key Frame: 14
Key Frame: 15
Key Frame: 16
Key Frame: 17
Key Frame: 18
Key Frame: 19
Key Frame:

KeyboardInterrupt: 

#### planning

In [None]:
planning_summaries = []
for file_name in files:
    if '.json' not in file_name:
        print('File Name:', file_name)
        file_path = path + file_name
        data = load_pickle(file_path)
        data['camera_info'] = camera_info
        env_summary = []
        for i in range(len(data['rgb'])):
            print('Key Frame:', str(i))
            keyframe = {'rgb': data['rgb'][i], 
                        'depth': data['depth'][i], 
                        'odom': data['odom'][i], 
                        'joint_state': data['joint_state'][i], 
                        'state': data['state'][i],
                        'state_history': data['state_history'][i],
                        'camera_info': data['camera_info']}
            
            planning_summary = narr_engine.summarize_planning(keyframe, task_name=task_name, task_desc=task_desc)
            planning_summaries.append(planning_summary)

            # print(planning_summary)
            with open(file_path.replace('.pkl', '_planning_sum.json'), 'w') as f:
                json.dump({'planning_sum': planning_summaries}, f)

#### Narration (info)

In [None]:
task_name = 'Pick and place the cup in the sink'
# task_desc = 'The robot should navigate to the table with dirty cup on it, pick up the cup, find the sink, navigate to the sink and place the cup in the sink.'

for file_name in files:
    if '.pkl' in file_name:
        bag_name = file_name.replace('.pkl', '')
        if (bag_name+'_env_sum.json' in files or bag_name+'_env_sum_35_turbo.json' in files) and bag_name+'_internal_sum.json' in files and bag_name+'_planning_sum.json' in files:
            print('File Name:', file_name)
            if bag_name+'_env_sum.json' in files:
                env_summaries = load_json(path+bag_name+'_env_sum.json')['env_sum']
            elif bag_name+'_env_sum_35_turbo.json' in files:
                env_summaries = load_json(path+bag_name+'_env_sum_35_turbo.json')['env_sum']
            internal_summaries = load_json(path+bag_name+'_internal_sum.json')['internal_sum']
            planning_summaries = load_json(path+bag_name+'_planning_sum.json')['planning_sum']
            
            narration_history = []
            for i in range(len(env_summaries)):
                print('Frame # / Timestamp:', i)
                narration = narr_engine.narrate_frame(task, env_summary=env_summaries[i], internal_summary=internal_summaries[i], planning_summary=planning_summaries[i], constrains=None, narration_history=narration_history, mode='info')
                narration_history.append(narration)
                print(narration)
                print('\n')
                # with open(file_path.replace('.pkl', '_narration.json'), 'w') as f:
                #     json.dump({'narration': narration_history}, f)

        break