## Import Libraries

In [39]:
import pandas as pd
from IPython.display import display, Image

import os
import json

from utils import *

## Important Paths

In [40]:
# LingoQA Paths
LINGO_DATASET_PATH = './datasets/lingoqa_dataset'
LINGO_ACTION_PATH = os.path.join(LINGO_DATASET_PATH, 'action')
LINGO_SCENERY_PATH = os.path.join(LINGO_DATASET_PATH, 'scenery')
LINGO_EVAL_PATH = os.path.join(LINGO_DATASET_PATH, 'evaluation')
LINGO_IMAGES_PATH = os.path.join(LINGO_DATASET_PATH, 'images')
LINGO_TRAIN_PATH = os.path.join(LINGO_DATASET_PATH, 'train')
LINGO_VAL_PATH = os.path.join(LINGO_DATASET_PATH, 'val')

# DriveGPT4 Paths
DRIVE_BDDX_DATASET_PATH = './datasets/drivegpt4_dataset'
DRIVE_BDDX_IMAGES_PATH = os.path.join(DRIVE_BDDX_DATASET_PATH, 'BDD_X_imgs_select')

# Our Paths
OUR_DATASET_Path = './our_datasets'
OUR_LINGO_DATASET_PATH = os.path.join(OUR_DATASET_Path, 'lingoqa_dataset')

# Analyze Datasets

## Analyze LingoQA Dataset

### Analyze Action Data

In [3]:
df_action = pd.read_parquet(os.path.join(LINGO_ACTION_PATH, 'train.parquet'))
print(len(df_action))
df_action.head(2)

265323


Unnamed: 0,question_id,segment_id,images,question,answer
0,959d64df1f47dd115fb4ed14997106b3,a9f0e311b0c6f46a9cc7cb923234e60a,[images/train/a9f0e311b0c6f46a9cc7cb923234e60a...,What are you currently doing and why?,I am starting as the zebra crossing becomes cl...
1,6c6e29403bbfc6761b7d9eafbf6d3040,a9f0e311b0c6f46a9cc7cb923234e60a,[images/train/a9f0e311b0c6f46a9cc7cb923234e60a...,What are you paying attention to and why?,I am paying attention to the zebra crossing to...


In [4]:
df_action['images'].iloc[0]

array(['images/train/a9f0e311b0c6f46a9cc7cb923234e60a/0.jpg',
       'images/train/a9f0e311b0c6f46a9cc7cb923234e60a/1.jpg',
       'images/train/a9f0e311b0c6f46a9cc7cb923234e60a/2.jpg',
       'images/train/a9f0e311b0c6f46a9cc7cb923234e60a/3.jpg',
       'images/train/a9f0e311b0c6f46a9cc7cb923234e60a/4.jpg'],
      dtype=object)

In [5]:
df_action['question'].iloc[0], df_action['answer'].iloc[0]

('What are you currently doing and why?',
 'I am starting as the zebra crossing becomes clear once the pedestrians have finished crossing.')

In [11]:
index = 0
question = df_action['question'].iloc[index]
answer = df_action['answer'].iloc[index]
images = df_action['images'].iloc[index]

print(f'Question: {question}\nAnswer: {answer}')
display_images(images, LINGO_ACTION_PATH)

Question: What are you currently doing and why?
Answer: I am starting as the zebra crossing becomes clear once the pedestrians have finished crossing.


In [12]:
index = 25
question = df_action['question'].iloc[index]
answer = df_action['answer'].iloc[index]
images = df_action['images'].iloc[index]

print(f'Question: {question}\nAnswer: {answer}')
display_images(images, LINGO_ACTION_PATH)

Question: What are you paying attention to and why?
Answer: I am paying attention to the bus lane on the left because I need to keep a safe distance from it.


In [13]:
index = 250
question = df_action['question'].iloc[index]
answer = df_action['answer'].iloc[index]
images = df_action['images'].iloc[index]

print(f'Question: {question}\nAnswer: {answer}')
display_images(images, LINGO_ACTION_PATH)

Question: How should you adapt your speed in this situation?
Answer: I should maintain my speed as long as the gap in front of me remains clear.


### Analyze Scenery Data

In [14]:
df_scenery = pd.read_parquet(os.path.join(LINGO_SCENERY_PATH, 'train.parquet'))
print(len(df_scenery))
df_scenery.head(2)

148506


Unnamed: 0,question_id,segment_id,images,question,answer
0,e0c57fef6e10d9d2a813b14cae5c9517,355a614a2263d30aac9cd1505852c4dd,[images/train/355a614a2263d30aac9cd1505852c4dd...,What action are you currently taking as the dr...,I am turning left as the road weaves in that d...
1,088a0fcd840c8e7e545adfc550aa1d9b,355a614a2263d30aac9cd1505852c4dd,[images/train/355a614a2263d30aac9cd1505852c4dd...,Why are you taking this action?,It’s necessary to follow the curvature of the ...


In [15]:
df_scenery['images'].iloc[0]

array(['images/train/355a614a2263d30aac9cd1505852c4dd/0.jpg',
       'images/train/355a614a2263d30aac9cd1505852c4dd/1.jpg',
       'images/train/355a614a2263d30aac9cd1505852c4dd/2.jpg',
       'images/train/355a614a2263d30aac9cd1505852c4dd/3.jpg',
       'images/train/355a614a2263d30aac9cd1505852c4dd/4.jpg'],
      dtype=object)

In [16]:
df_scenery['question'].iloc[0], df_scenery['answer'].iloc[0]

('What action are you currently taking as the driver?',
 'I am turning left as the road weaves in that direction and continuing straight when the road is clear.')

In [17]:
index = 0
question = df_scenery['question'].iloc[index]
answer = df_scenery['answer'].iloc[index]
images = df_scenery['images'].iloc[index]

print(f'Question: {question}\nAnswer: {answer}')
display_images(images, LINGO_SCENERY_PATH)

Question: What action are you currently taking as the driver?
Answer: I am turning left as the road weaves in that direction and continuing straight when the road is clear.


In [18]:
index = 150
question = df_scenery['question'].iloc[index]
answer = df_scenery['answer'].iloc[index]
images = df_scenery['images'].iloc[index]

print(f'Question: {question}\nAnswer: {answer}')
display_images(images, LINGO_SCENERY_PATH)

Question: Do you see any cycle lanes near you?
Answer: No, there are no cycle lanes here.


In [19]:
index = 250
question = df_scenery['question'].iloc[index]
answer = df_scenery['answer'].iloc[index]
images = df_scenery['images'].iloc[index]

print(f'Question: {question}\nAnswer: {answer}')
display_images(images, LINGO_SCENERY_PATH)

Question: Can you describe the traffic ahead of you in your lane?
Answer: There is a white van crossing the traffic ahead of me in my lane.


### Analyze Evaluation Data

In [20]:
df_eval = pd.read_parquet(os.path.join(LINGO_EVAL_PATH, 'val.parquet'))
print(len(df_eval))
df_eval.head(2)

1000


Unnamed: 0,question_id,segment_id,images,question,answer
0,1a938d25604410ccd63b60285919eaec,f2e4286e94457b8605069190e29f955a,[images/val/f2e4286e94457b8605069190e29f955a/0...,"Is there a traffic light? If yes, what color i...","Yes, green."
1,1a938d25604410ccd63b60285919eaec,f2e4286e94457b8605069190e29f955a,[images/val/f2e4286e94457b8605069190e29f955a/0...,"Is there a traffic light? If yes, what color i...","Yes, a temporary traffic light. It is showing ..."


In [21]:
df_eval['images'].iloc[0]

array(['images/val/f2e4286e94457b8605069190e29f955a/0.jpg',
       'images/val/f2e4286e94457b8605069190e29f955a/1.jpg',
       'images/val/f2e4286e94457b8605069190e29f955a/2.jpg',
       'images/val/f2e4286e94457b8605069190e29f955a/3.jpg',
       'images/val/f2e4286e94457b8605069190e29f955a/4.jpg'], dtype=object)

In [22]:
df_eval['question'].iloc[0], df_eval['answer'].iloc[0]

('Is there a traffic light? If yes, what color is displayed?', 'Yes, green.')

In [23]:
index = 0
question = df_eval['question'].iloc[index]
answer = df_eval['answer'].iloc[index]
images = df_eval['images'].iloc[index]

print(f'Question: {question}\nAnswer: {answer}')
display_images(images, LINGO_EVAL_PATH)

Question: Is there a traffic light? If yes, what color is displayed?
Answer: Yes, green.


In [25]:
index = 150
question = df_eval['question'].iloc[index]
answer = df_eval['answer'].iloc[index]
images = df_eval['images'].iloc[index]

print(f'Question: {question}\nAnswer: {answer}')
display_images(images, LINGO_EVAL_PATH)

Question: Are there any zebra crossings ahead?
Answer: There is a zebra crossing directly ahead of us.


In [26]:
index = 250
question = df_eval['question'].iloc[index]
answer = df_eval['answer'].iloc[index]
images = df_eval['images'].iloc[index]

print(f'Question: {question}\nAnswer: {answer}')
display_images(images, LINGO_EVAL_PATH)

Question: Is it possible for you to accelerate in this situation, and if so, why?
Answer: It is not possible to accelerate as there are pedestrians crossing the road.


## Analyze BDD-X DriveGPT4 Dataset

#### Training Dataset

In [47]:
with open(os.path.join(DRIVE_BDDX_DATASET_PATH, 'BDD_X_training_label.json'), 'r') as f:
    drive_bddx_train_data = json.load(f)

In [48]:
index = 0
images = []
idx_lst = drive_bddx_train_data[index]['idx_list']
for i in idx_lst:
    images.append(drive_bddx_train_data[index]['image'].replace('.png', '') + f'_{i}.png')
    
    
print('Question: {}'.format(drive_bddx_train_data[index]['conversations'][0]['value']))
print('Answer: {}'.format(drive_bddx_train_data[index]['conversations'][1]['value']))
display_images(images, DRIVE_BDDX_IMAGES_PATH)

Question: Current speed of the vehicle: 4.93m/s. The length of the video is 4.00 seconds.  What objects can you see in the video?
<video>
Answer:  In the video, I can see several objects in each frame. In Frame 0, there are multiple cars, a person, and some other cars in subsequent frames. Frame 1 also has cars, a person, and some more cars in the following frames. Frame 2 includes cars, a person, and additional cars and objects in the subsequent frames. Frame 3 has more cars, a person, and additional cars and objects in the following frames. Frame 4 shows cars, a person, and more cars and objects in the video. Frame 5 includes cars, a person, and additional cars and objects. Frame 6 has cars, a person, a bus, and more cars and objects. And finally, Frame 7 shows cars, a person, and additional cars in the video.


In [49]:
index = 2
images = []
idx_lst = drive_bddx_train_data[index]['idx_list']
for i in idx_lst:
    images.append(drive_bddx_train_data[index]['image'].replace('.png', '') + f'_{i}.png')
    

print('Question: {}'.format(drive_bddx_train_data[index]['conversations'][2]['value']))
print('Answer: {}'.format(drive_bddx_train_data[index]['conversations'][3]['value']))
print(images)
display_images(images, './datasets/drivegpt4_dataset/BDD_X_imgs_select')

Question: What circumstances led to this vehicle's behavior?
Answer: since the light turns red and a crossing guard leads children across the street.
['training_07116ae0-e9b4fa4a_06930_0.png', 'training_07116ae0-e9b4fa4a_06930_3.png', 'training_07116ae0-e9b4fa4a_06930_7.png', 'training_07116ae0-e9b4fa4a_06930_11.png', 'training_07116ae0-e9b4fa4a_06930_15.png', 'training_07116ae0-e9b4fa4a_06930_19.png', 'training_07116ae0-e9b4fa4a_06930_23.png', 'training_07116ae0-e9b4fa4a_06930_27.png']


In [50]:
index = 70
images = []
idx_lst = drive_bddx_train_data[index]['idx_list']
for i in idx_lst:
    images.append(drive_bddx_train_data[index]['image'].replace('.png', '') + f'_{i}.png')
    

print('Question: {}'.format(drive_bddx_train_data[index]['conversations'][2]['value']))
print('Answer: {}'.format(drive_bddx_train_data[index]['conversations'][3]['value']))
display_images(images, DRIVE_BDDX_IMAGES_PATH)

Question:  Is the car turning or changing lanes in the video?
Answer:  No, the car in the video is not turning or changing lanes. The turning angle of the vehicle remains constant at 0 degrees throughout the entire video. This indicates that the car is driving in a straight line without any significant changes in direction.


#### Number of videos

In [51]:
videos = []
for data in drive_bddx_train_data:
    videos.append(data['id'].split('_')[1])
len(set(videos))

3438

### Testing Dataset

In [52]:
with open(os.path.join(DRIVE_BDDX_DATASET_PATH, 'BDD_X_testing_label.json'), 'r') as f:
    drive_bddx_test_data = json.load(f)

In [58]:
index = 1
idx_lst = drive_bddx_test_data[index]['idx_list']
images = []
for i in idx_lst:
    images.append(drive_bddx_test_data[index]['image'].replace('.png', '') + f'_{i}.png')
    
    
print('Question: {}'.format(drive_bddx_test_data[index]['conversations'][0]['value']))
print('Answer: {}'.format(drive_bddx_test_data[index]['conversations'][1]['value']))
display_images(images, DRIVE_BDDX_IMAGES_PATH)

Question: Current speed of the vehicle: 3.09m/s. The length of the video is 3.00 seconds. What action is the vehicle performing in this video at the moment?
<video>
Answer: The car slows down to a stop


In [57]:
index = 15
idx_lst = drive_bddx_test_data[index]['idx_list']
images = []
for i in idx_lst:
    images.append(drive_bddx_test_data[index]['image'].replace('.png', '') + f'_{i}.png')
    
    
print('Question: {}'.format(drive_bddx_test_data[index]['conversations'][0]['value']))
print('Answer: {}'.format(drive_bddx_test_data[index]['conversations'][1]['value']))
display_images(images, DRIVE_BDDX_IMAGES_PATH)

Question: Current speed of the vehicle: 0.20m/s. The length of the video is 2.00 seconds. How is the vehicle behaving at this point in the video?
<video>
Answer: The car has stopped


In [56]:
index = 50
idx_lst = drive_bddx_test_data[index]['idx_list']
images = []
for i in idx_lst:
    images.append(drive_bddx_test_data[index]['image'].replace('.png', '') + f'_{i}.png')
    
    
print('Question: {}'.format(drive_bddx_test_data[index]['conversations'][0]['value']))
print('Answer: {}'.format(drive_bddx_test_data[index]['conversations'][1]['value']))
display_images(images, DRIVE_BDDX_IMAGES_PATH)

Question: Current speed of the vehicle: 8.90m/s. The length of the video is 16.00 seconds. What is the vehicle doing right now in this video?
<video>
Answer: The car is travelling down the road


#### Number of videos

In [108]:
videos = []
for data in drive_bddx_test_data:
    videos.append(data['id'].split('_')[1])
len(set(videos))

502

## Analyze BDD-X Original Dataset

## Analyze NuInstruct Dataset