In [4]:
import requests
import json
import pyarrow as pa
import base64

In [116]:


# Define the API URL
url = "http://127.0.0.1:11635/create"
schema = pa.schema([
    pa.field("disclaimer", pa.string()),
    pa.field("file_path", pa.string()),
    pa.field("n_transitions", pa.int32()),
    pa.field("success", pa.bool_()),  # Corrected boolean field
    pa.field("success_labeled_by", pa.string()),
    pa.field("episode_path", pa.string()),
    pa.field("vector", pa.list_(pa.float32(), 768)), # Need to specify name as "vector", dim must be defined
    pa.field("language_instruction", pa.string())],
    metadata={
        "language_instruction" : "nomic-ai/nomic-embed-text-v1.5",
    }
)

serialized_schema = schema.serialize().to_pybytes()
encoded_schema = base64.b64encode(serialized_schema).decode('utf-8')

# Define the payload
data = {
    "dataset": "ucsd_pick_and_place_dataset_converted_externally_to_rlds", # create a dataset table with this name
    "uri": "/Users/haoweichung/Development/CMU/Spring2024/FogROS2-RTX/FogX-Store/_datasets/dataset_db", # load lancedb from this uri
    "schema" : encoded_schema # user defined data schema
}

# Send the POST request
response = requests.post(url, json=data)

# Print the response
print(response.status_code)
print(response.json())


500
{'message': 'Dataset already exists: /Users/haoweichung/Development/CMU/Spring2024/FogROS2-RTX/FogX-Store/_datasets/dataset_db/ucsd_pick_and_place_dataset_converted_externally_to_rlds.lance, /Users/runner/work/lance/lance/rust/lance/src/dataset.rs:484:27'}


In [117]:
schema

disclaimer: string
file_path: string
n_transitions: int32
success: bool
success_labeled_by: string
episode_path: string
vector: fixed_size_list<item: float>[768]
  child 0, item: float
language_instruction: string
-- schema metadata --
language_instruction: 'nomic-ai/nomic-embed-text-v1.5'

In [118]:
import requests

# Define the API URL
url = "http://127.0.0.1:11635/write"

# Define the payload
data = {
    "ds_path": "gs://gresearch/robotics/ucsd_pick_and_place_dataset_converted_externally_to_rlds/0.1.0", # local or remote rlds dataset location
    "dataset" : "ucsd_pick_and_place_dataset_converted_externally_to_rlds", # The dataset to append new data
    "uri": "/Users/haoweichung/Development/CMU/Spring2024/FogROS2-RTX/FogX-Store/_datasets/dataset_db" # lancedb uri
}

# Send the POST request
response = requests.post(url, json=data)

# Print the response
print(response.status_code)
print(response.json())


200
{'message': 'Dataset written successfully'}


# Let's read some data from the database

In [114]:
import lancedb
uri = "/Users/haoweichung/Development/CMU/Spring2024/FogROS2-RTX/FogX-Store/_datasets/dataset_db"
db = lancedb.connect(uri)
tbl = db.open_table("ucsd_pick_and_place_dataset_converted_externally_to_rlds")

In [119]:
tbl.to_arrow()

pyarrow.Table
disclaimer: string
file_path: string
n_transitions: int32
success: bool
success_labeled_by: string
episode_path: string
vector: fixed_size_list<item: float>[768]
  child 0, item: float
language_instruction: string
----
disclaimer: [["b'reward is noisy for this dataset partition'"],["b'none'"],...,["b'reward is noisy for this dataset partition'"],["b'reward is noisy for this dataset partition'"]]
file_path: [["b'data/train/tabletop_uncurated/209.p'"],["b'data/train/tabletop_base/515.p'"],...,["b'data/train/tabletop_uncurated/333.p'"],["b'data/train/tabletop_uncurated/49.p'"]]
n_transitions: [[50],[50],...,[50],[50]]
success: [[true],[false],...,[false],[true]]
success_labeled_by: [["b'classifier'"],["b'classifier'"],...,["b'classifier'"],["b'classifier'"]]
episode_path: [["./../../_datasets/parquet/ucsd_pick_and_place_dataset_converted_externally_to_rlds/50b05ce3-2793-44cb-a0a0-fcf78ed371b2/steps.parquet"],["./../../_datasets/parquet/ucsd_pick_and_place_dataset_converted_e

# Select all the data with success is true

In [120]:
from fastembed import TextEmbedding


model = TextEmbedding(model_name="nomic-ai/nomic-embed-text-v1.5")
query = list(model.embed("pick up the red object from the table"))[0]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

In [121]:
result = (
    tbl.search(query)
    .limit(10)
    .where("success = true", prefilter=True)
    .to_arrow()
)

In [122]:
result

pyarrow.Table
disclaimer: string
file_path: string
n_transitions: int32
success: bool
success_labeled_by: string
episode_path: string
vector: fixed_size_list<item: float>[768]
  child 0, item: float
language_instruction: string
_distance: float
----
disclaimer: [["b'none'","b'reward is noisy for this dataset partition'","b'reward is noisy for this dataset partition'","b'reward is noisy for this dataset partition'","b'reward is noisy for this dataset partition'","b'none'","b'reward is noisy for this dataset partition'","b'reward is noisy for this dataset partition'","b'reward is noisy for this dataset partition'","b'reward is noisy for this dataset partition'"]]
file_path: [["b'data/train/tabletop_base/157.p'","b'data/train/tabletop_uncurated/494.p'","b'data/train/tabletop_uncurated/533.p'","b'data/train/tabletop_uncurated/76.p'","b'data/train/tabletop_uncurated/166.p'","b'data/train/tabletop_base/585.p'","b'data/train/tabletop_uncurated/442.p'","b'data/train/tabletop_uncurated/463.p'",

In [97]:
import pyarrow.parquet as pq


path = "/Users/haoweichung/Development/CMU/Spring2024/FogROS2-RTX/FogX-Store/_datasets/parquet/ucsd_pick_and_place_dataset_converted_externally_to_rlds/2e6a9d51-1eb7-4e2d-a6ba-54a771e8c2f1/steps.parquet"

table = pq.read_table(path)

In [98]:
table.schema

action: list<element: float>
  child 0, element: float
discount: float
is_first: bool
is_last: bool
is_terminal: bool
language_embedding: list<element: float>
  child 0, element: float
language_instruction: string
image: binary
reward: float