In [2]:
import json
from tqdm import tqdm
from prompts import system_prompt
import datasets
import os
from PIL import Image

In [3]:
str_template = """1. Basic Information
```json
{}
```

2. Critical Objects
```json
{}
```

3. Decision Analysis
{}

4. Meta Action
{}"""

In [4]:
results = []
json_file = "../../downloads/datasets/raw_annotations/positive/bus-only_lane/bus-only_lane_annotations_v5.json"
raw_data = json.load(open(json_file))
for current_data in tqdm(raw_data):
    part1 = {
        "scene_summary": current_data["scene_summary"],
        "weather": current_data["weather"],
        "time": current_data["time"],
        "road_environment": current_data["road_environment"],
        "ego_lane_position": current_data["ego_lane_position"],
    }
    part2 = [
        {
            "object_name": ann["static_attributes"],
            "2d_bbox": ann["2d_bbox"],
            "description": ann["influence"],
        }
        for ann in current_data["annotations"][0]["key_object_infos"]
    ]
    for x in part2:
        for i, y in enumerate(x["2d_bbox"]):
            if i % 2 == 0:
                x["2d_bbox"][i] = int(x["2d_bbox"][i] / 3840 * 1000)
            else:
                x["2d_bbox"][i] = int(x["2d_bbox"][i] / 2160 * 1000)
    part3 = current_data["decision_description"]
    part4 = current_data["meta_action"][0]
    response = str_template.format(part1, part2, part3, part4)
    image_path = os.path.join(
        "../../downloads/datasets/raw_annotations/positive/bus-only_lane/images",
        current_data["annotations"][0]["image_id"],
    )
    image = Image.open(image_path).convert("RGB").resize((448 * 2, 448))
    # save it to the output folder
    os.makedirs("images", exist_ok=True)
    image.save(os.path.join("images", current_data["annotations"][0]["image_id"]))
    messages = [
        {"role": "system", "content": [{"text": system_prompt, "type": "text"}]},
        {"role": "user", "content": [{"text": None, "type": "image"}]},
        {"role": "assistant", "content": [{"text": response, "type": "text"}]},
    ]
    results.append({"images": [current_data["annotations"][0]["image_id"]], "messages": messages})

100%|██████████| 57/57 [00:03<00:00, 17.06it/s]


In [5]:
results = {"train": results}
d = datasets.Dataset.from_dict(results)

In [7]:
d["train"][0]

{'images': ['20230811083159.060417_FrontCam02.jpeg'],
 'messages': [{'content': [{'text': 'You are a smart assistant for the autonomous driving car. Your task is to analyze the some needed information based on the user given photo taken from the camera on the ego car. The user will only provide you image without further information. You should read carefully to get the idea what to reply in this system prompt.\n\n# Task Requirement\n1. Basic Information\n- Scene Summary: Provide a brief summary of the scene in the photo.\n- Weather: weather condition of the environment in the photo. It should be one of the following: sunny, cloudy, rainy, snowy, foggy.\n- Time: time of the day in the photo. It should be described in string. For example: morning, afternoon, evening, night.\n- Road Environment: environment of the road in the photo. It should be one of the following: urban, rural, highway.\n- Ego Lane Position: You should describe the lane position of the ego car. Consider all the lanes a