In [1]:
import os
import glob
import json
import warnings
import pymysql
import getpass as gp
import tensorflow as tf
from tensorflow.core.example import example_pb2
import warnings
warnings.filterwarnings('ignore')

2025-06-10 09:43:18.683325: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-10 09:43:18.710055: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749516198.717471   19339 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749516198.720139   19339 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-06-10 09:43:18.747908: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
# 계정 정보 사전
accounts = {
    'root':   {'host': '127.0.0.1'},
    'reader': {'host': '127.0.0.1'},
    'writer': {'host': '127.0.0.1'},
    'edward': {'host': '192.168.0.27'},
    # 여기에 계정 추가 가능
}

def connect_to_db():
    user = input("ID: ").strip()
    if user not in accounts:
        raise ValueError("등록되지 않은 사용자 ID")

    password = gp.getpass("비밀번호: ")
    host = accounts[user]['host']

    # DB 이름을 반드시 입력받도록
    while True:
        db_name = input("접속할 DB 이름을 입력하세요: ").strip()
        if db_name:
            break
        print("DB 이름은 비어 있을 수 없습니다. 다시 입력해주세요.")

    conn = pymysql.connect(
        host=host,
        user=user,
        password=password,
        db=db_name,
        charset='utf8mb4'
    )
    return conn


In [3]:
def get_bytes(feature, key):
    if key in feature and feature[key].bytes_list.value:
        return feature[key].bytes_list.value[0].decode('utf-8')
    return None

def get_floats(feature, key):
    return list(feature[key].float_list.value) if key in feature else []

In [4]:
conn = connect_to_db()
cur = conn.cursor()
cur.execute("USE droid_meta;")

0

In [5]:
cur.execute("""
CREATE TABLE IF NOT EXISTS dataset_versions (
  dataset_name   VARCHAR(255) PRIMARY KEY,
  version        VARCHAR(64) NOT NULL,
  file_format    VARCHAR(64),
  release_notes  TEXT
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
""")


0

In [6]:
cur.execute("""
CREATE TABLE IF NOT EXISTS splits (
  split_id     INT AUTO_INCREMENT PRIMARY KEY,
  dataset_name VARCHAR(255) NOT NULL,
  split_name   VARCHAR(255) NOT NULL,
  num_bytes    BIGINT,
  num_shards   INT,
  UNIQUE KEY ux_dataset_split (dataset_name, split_name),
  FOREIGN KEY (dataset_name)
    REFERENCES dataset_versions(dataset_name)
      ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
""")
cur.execute("""
CREATE TABLE IF NOT EXISTS shards (
  shard_id     INT AUTO_INCREMENT PRIMARY KEY,
  split_id     INT        NOT NULL,
  shard_index  INT        NOT NULL,
  num_examples INT        NOT NULL,
  filepath     TEXT       NOT NULL,
  UNIQUE KEY ux_split_shard (split_id, shard_index),
  FOREIGN KEY (split_id)
    REFERENCES splits(split_id)
      ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
""")


0

In [7]:
cur.execute("""
CREATE TABLE IF NOT EXISTS episodes (
  episode_id      VARCHAR(255) PRIMARY KEY,
  file_path       TEXT        NOT NULL,
  recording_path  TEXT        NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
""")
cur.execute("""
CREATE TABLE IF NOT EXISTS steps (
  step_id         BIGINT      PRIMARY KEY AUTO_INCREMENT,
  episode_id      VARCHAR(255) NOT NULL,
  step_index      INT         NOT NULL,
  discount        FLOAT,
  is_first        TINYINT(1),
  is_last         TINYINT(1),
  is_terminal     TINYINT(1),
  reward          FLOAT,
  lang_inst_1     TEXT,
  lang_inst_2     TEXT,
  lang_inst_3     TEXT,
  action          JSON,
  action_dict     JSON,
  obs_cart_pos    JSON,
  UNIQUE KEY ux_episode_step (episode_id, step_index),
  FOREIGN KEY (episode_id)
    REFERENCES episodes(episode_id)
      ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
""")


0

In [8]:
with open("dataset_info.json", "r") as f:
    ds_info = json.load(f)

dataset_name = ds_info["name"]
version      = ds_info["version"]
file_format  = ds_info["fileFormat"]
release_notes = ds_info["releaseNotes"].get(version, "")

In [9]:
cur.execute("""
INSERT INTO dataset_versions (dataset_name, version, file_format, release_notes)
VALUES (%s,%s,%s,%s)
ON DUPLICATE KEY UPDATE version=VALUES(version), file_format=VALUES(file_format)
""", (dataset_name, version, file_format, release_notes))

1

In [10]:
for split in ds_info["splits"]:
    name = split["name"]
    num_bytes = int(split.get("numBytes", 0))
    lengths = list(map(int, split["shardLengths"]))
    num_shards = len(lengths)

    cur.execute("""
    INSERT INTO splits (dataset_name, split_name, num_bytes, num_shards)
    VALUES (%s,%s,%s,%s)
    ON DUPLICATE KEY UPDATE num_bytes=VALUES(num_bytes), num_shards=VALUES(num_shards)
    """, (dataset_name, name, num_bytes, num_shards))

    cur.execute(
        "SELECT split_id FROM splits WHERE dataset_name=%s AND split_name=%s",
        (dataset_name, name)
    )
    split_id = cur.fetchone()[0]

    template = split["filepathTemplate"]
    for idx, cnt in enumerate(lengths):
        fp = template.replace("{DATASET}", dataset_name)\
                     .replace("{SPLIT}", name)\
                     .replace("{FILEFORMAT}", file_format)\
                     .replace("{SHARD_X_OF_Y}", f"{idx:05d}-of-{num_shards:05d}")
        cur.execute("""
        INSERT INTO shards (split_id, shard_index, num_examples, filepath)
        VALUES (%s,%s,%s,%s)
        ON DUPLICATE KEY UPDATE num_examples=VALUES(num_examples)
        """, (split_id, idx, cnt, fp))

In [11]:
shard_pattern = f"{dataset_name}-train.{file_format}-*-of-{num_shards:05d}"
shard_paths   = sorted(glob.glob(shard_pattern))
print(f"✔ Found {len(shard_paths)} shards for pattern: {shard_pattern}")

for shard_fp in shard_paths:
    ds = tf.data.TFRecordDataset(shard_fp)
    for i, raw in enumerate(ds):
        ex = example_pb2.Example()
        ex.ParseFromString(raw.numpy())
        f = ex.features.feature

        # episode_metadata
        ep_id = get_bytes(f, "episode_metadata/file_path")
        rec   = get_bytes(f, "episode_metadata/recording_folderpath")
        if ep_id and rec:
            cur.execute("""
            INSERT IGNORE INTO episodes (episode_id, file_path, recording_path)
            VALUES (%s,%s,%s)
            """, (ep_id, shard_fp, rec))

        # step-level metadata
        # scalars / bools
        discount = f["steps/discount"].float_list.value[0] if "steps/discount" in f else None
        is_first = int(f["steps/is_first"].int64_list.value[0])    if "steps/is_first" in f    else 0
        is_last  = int(f["steps/is_last"].int64_list.value[0])     if "steps/is_last" in f     else 0
        is_term  = int(f["steps/is_terminal"].int64_list.value[0]) if "steps/is_terminal" in f else 0
        reward   = f["steps/reward"].float_list.value[0] if "steps/reward" in f else None

        # text instructions
        li1 = get_bytes(f, "steps/language_instruction")
        li2 = get_bytes(f, "steps/language_instruction_2")
        li3 = get_bytes(f, "steps/language_instruction_3")

        # vectors
        action = json.dumps(get_floats(f, "steps/action"))

        # action_dict subfields
        adict = {}
        for key in f:
            if key.startswith("steps/action_dict/"):
                sub = key.split("/", 2)[-1]
                adict[sub] = get_floats(f, key)
        action_dict = json.dumps(adict)

        obs_cart = json.dumps(get_floats(f, "steps/observation/cartesian_position"))

        # insert step
        cur.execute("""
        INSERT INTO steps
          (episode_id, step_index, discount, is_first, is_last, is_terminal,
           reward, lang_inst_1, lang_inst_2, lang_inst_3,
           action, action_dict, obs_cart_pos)
        VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
        """, (
            ep_id, i, discount, is_first, is_last, is_term,
            reward, li1, li2, li3,
            action, action_dict, obs_cart
        ))
    print(f"→ Inserted shard: {os.path.basename(shard_fp)}")

print("✅ All metadata loaded into droid_meta DB!")

✔ Found 31 shards for pattern: r2d2_faceblur-train.tfrecord-*-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00000-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00001-of-00031


W0000 00:00:1749516213.514790   19339 gpu_device.cc:2433] TensorFlow was not built with CUDA kernel binaries compatible with compute capability 12.0. CUDA kernels will be jit-compiled from PTX, which could take 30 minutes or longer.
W0000 00:00:1749516213.518754   19339 gpu_device.cc:2433] TensorFlow was not built with CUDA kernel binaries compatible with compute capability 12.0. CUDA kernels will be jit-compiled from PTX, which could take 30 minutes or longer.
I0000 00:00:1749516213.602859   19339 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13188 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 5070 Ti, pci bus id: 0000:01:00.0, compute capability: 12.0
2025-06-10 09:43:33.699108: I tensorflow/core/kernels/data/tf_record_dataset_op.cc:370] TFRecordDataset `buffer_size` is unspecified, default to 262144
2025-06-10 09:43:33.729676: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End o

→ Inserted shard: r2d2_faceblur-train.tfrecord-00002-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00003-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00004-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00005-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00006-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00007-of-00031


2025-06-10 09:43:33.910087: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2025-06-10 09:43:34.052550: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


→ Inserted shard: r2d2_faceblur-train.tfrecord-00008-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00009-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00010-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00011-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00012-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00013-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00014-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00015-of-00031


2025-06-10 09:43:34.588171: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


→ Inserted shard: r2d2_faceblur-train.tfrecord-00016-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00017-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00018-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00019-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00020-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00021-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00022-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00023-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00024-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00025-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00026-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00027-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00028-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00029-of-00031
→ Inserted shard: r2d2_faceblur-train.tfrecord-00030-of-00031
✅ All metadata loaded into droid_meta DB!


In [12]:
cur.execute("SELECT COUNT(*) AS episode_count FROM episodes;")

1

In [13]:
cur.execute("SELECT * FROM episodes LIMIT 10;")

10

In [14]:
import pandas as pd

# 1) 전체 테이블 목록을 DataFrame 으로 가져오기
tables_df = pd.read_sql("SHOW TABLES;", conn)
display(tables_df)

# 2) 각 테이블을 DataFrame 으로 로드해서 보기
for tbl in tables_df.iloc[:,0]:
    print(f"--- Table: {tbl} ---")
    df = pd.read_sql(f"SELECT * FROM {tbl};", conn)
    display(df)   # 상위 5개 행만 표시


Unnamed: 0,Tables_in_droid_meta
0,dataset_versions
1,episodes
2,shards
3,splits
4,steps


--- Table: dataset_versions ---


Unnamed: 0,dataset_name,version,file_format,release_notes
0,r2d2_faceblur,1.0.0,tfrecord,Initial release.


--- Table: episodes ---


Unnamed: 0,episode_id,file_path,recording_path
0,/nfs/kun2/datasets/r2d2/r2d2-data-full/AUTOLab...,r2d2_faceblur-train.tfrecord-00023-of-00031,/nfs/kun2/datasets/r2d2/r2d2-data-full/AUTOLab...
1,/nfs/kun2/datasets/r2d2/r2d2-data-full/AUTOLab...,r2d2_faceblur-train.tfrecord-00027-of-00031,/nfs/kun2/datasets/r2d2/r2d2-data-full/AUTOLab...
2,/nfs/kun2/datasets/r2d2/r2d2-data-full/AUTOLab...,r2d2_faceblur-train.tfrecord-00001-of-00031,/nfs/kun2/datasets/r2d2/r2d2-data-full/AUTOLab...
3,/nfs/kun2/datasets/r2d2/r2d2-data-full/AUTOLab...,r2d2_faceblur-train.tfrecord-00019-of-00031,/nfs/kun2/datasets/r2d2/r2d2-data-full/AUTOLab...
4,/nfs/kun2/datasets/r2d2/r2d2-data-full/AUTOLab...,r2d2_faceblur-train.tfrecord-00007-of-00031,/nfs/kun2/datasets/r2d2/r2d2-data-full/AUTOLab...
...,...,...,...
95,/nfs/kun2/datasets/r2d2/r2d2-data-full/TRI/suc...,r2d2_faceblur-train.tfrecord-00004-of-00031,/nfs/kun2/datasets/r2d2/r2d2-data-full/TRI/suc...
96,/nfs/kun2/datasets/r2d2/r2d2-data-full/TRI/suc...,r2d2_faceblur-train.tfrecord-00013-of-00031,/nfs/kun2/datasets/r2d2/r2d2-data-full/TRI/suc...
97,/nfs/kun2/datasets/r2d2/r2d2-data-full/TRI/suc...,r2d2_faceblur-train.tfrecord-00012-of-00031,/nfs/kun2/datasets/r2d2/r2d2-data-full/TRI/suc...
98,/nfs/kun2/datasets/r2d2/r2d2-data-full/WEIRD/s...,r2d2_faceblur-train.tfrecord-00024-of-00031,/nfs/kun2/datasets/r2d2/r2d2-data-full/WEIRD/s...


--- Table: shards ---


Unnamed: 0,shard_id,split_id,shard_index,num_examples,filepath
0,63,3,0,2,r2d2_faceblur-train.tfrecord-00000-of-00031
1,64,3,1,5,r2d2_faceblur-train.tfrecord-00001-of-00031
2,65,3,2,4,r2d2_faceblur-train.tfrecord-00002-of-00031
3,66,3,3,2,r2d2_faceblur-train.tfrecord-00003-of-00031
4,67,3,4,3,r2d2_faceblur-train.tfrecord-00004-of-00031
5,68,3,5,2,r2d2_faceblur-train.tfrecord-00005-of-00031
6,69,3,6,1,r2d2_faceblur-train.tfrecord-00006-of-00031
7,70,3,7,4,r2d2_faceblur-train.tfrecord-00007-of-00031
8,71,3,8,3,r2d2_faceblur-train.tfrecord-00008-of-00031
9,72,3,9,1,r2d2_faceblur-train.tfrecord-00009-of-00031


--- Table: splits ---


Unnamed: 0,split_id,dataset_name,split_name,num_bytes,num_shards
0,3,r2d2_faceblur,train,2192594069,31


--- Table: steps ---


Unnamed: 0,step_id,episode_id,step_index,discount,is_first,is_last,is_terminal,reward,lang_inst_1,lang_inst_2,lang_inst_3,action,action_dict,obs_cart_pos
0,201,/nfs/kun2/datasets/r2d2/r2d2-data-full/RAIL/su...,0,1.0,1,0,0,0.0,Put the marker in the pot,Get the marker from the table and put it insid...,Put the marker inside the silver pot,"[0.3835748434066773, 0.07346952706575394, 0.55...","{""joint_position"": [0.14362852275371552, -0.59...","[0.3833988308906555, 0.07344377785921097, 0.54..."
1,202,/nfs/kun2/datasets/r2d2/r2d2-data-full/RPL/suc...,1,1.0,1,0,0,0.0,Put the candy bar on the left side of the firs...,,,"[0.42742159962654114, 0.17553342878818512, 0.5...","{""joint_position"": [0.11570542305707932, -0.29...","[0.42734912037849426, 0.17551840841770172, 0.5..."
2,203,/nfs/kun2/datasets/r2d2/r2d2-data-full/TRI/suc...,0,1.0,1,0,0,0.0,Put one green sachet in the grey bowl.,,,"[0.3836604654788971, -0.009820712730288506, 0....","{""joint_position"": [-0.005477741360664368, -0....","[0.3836599588394165, -0.00982198305428028, 0.4..."
3,204,/nfs/kun2/datasets/r2d2/r2d2-data-full/RPL/suc...,1,1.0,1,0,0,0.0,Place the pack of doritos inside the sink,,,"[0.38447052240371704, 0.07318933308124542, 0.5...","{""joint_position"": [0.11285721510648727, -0.53...","[0.384344220161438, 0.07318230718374252, 0.515..."
4,205,/nfs/kun2/datasets/r2d2/r2d2-data-full/TRI/suc...,2,1.0,1,0,0,0.0,Move the sharpie to the table,,,"[0.30847686529159546, 0.09625189006328584, 0.5...","{""joint_position"": [0.10213392227888109, -0.80...","[0.3084981143474579, 0.09617278724908827, 0.55..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,296,/nfs/kun2/datasets/r2d2/r2d2-data-full/TRI/suc...,1,1.0,1,0,0,0.0,,,,"[0.333864688873291, 0.05677780136466026, 0.403...","{""joint_position"": [0.0154561223462224, -0.573...","[0.3338628113269806, 0.056786153465509415, 0.4..."
96,297,/nfs/kun2/datasets/r2d2/r2d2-data-full/IRIS/su...,0,1.0,1,0,0,0.0,Get the marker from the table and place it in ...,Put the red marker in the cup,Take the marker from the table and put it in t...,"[0.3845632076263428, -0.1679576337337494, 0.52...","{""joint_position"": [-0.08209364861249924, -0.4...","[0.3845545053482056, -0.16795629262924194, 0.5..."
97,298,/nfs/kun2/datasets/r2d2/r2d2-data-full/IPRL/su...,1,1.0,1,0,0,0.0,Pick the two bottles on the stove and place th...,"Pick the bottles on the stove one by one, put ...",Move the spice bottles on the stove to the cou...,"[0.39935746788978577, 0.05866711959242821, 0.5...","{""joint_position"": [0.007885289378464222, -0.4...","[0.39935749769210815, 0.058667995035648346, 0...."
98,299,/nfs/kun2/datasets/r2d2/r2d2-data-full/AUTOLab...,2,1.0,1,0,0,0.0,Take the blocks out of the gift disappointed,Remove the blocks from the bowl one by one and...,Remove the colorful blocks from the black bowl...,"[0.3173839747905731, -0.017680292949080467, 0....","{""joint_position"": [0.06782697141170502, -0.72...","[0.3171532154083252, -0.017610549926757812, 0...."
