### Basic Installation

In [None]:
!python3 -m pip install gcsfs waymo-open-dataset-tf-2-11-0==1.6.1
!python3 -m pip install "notebook>=5.3" "ipywidgets>=7.5"

### Python Version

In [2]:
!python --version

Python 3.9.18


### Install TensorFlow

In [None]:
!pip install tensorflow

### Initial Setup

In [6]:
#@title Initial setup
from typing import Optional
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import tensorflow as tf
import dask.dataframe as dd
from waymo_open_dataset import v2

dataset_dir = '/scratch/gpfs/ms90/dataset_prep'

context_name = '10017090168044687777_6380_000_6400_000'

def read(tag: str, file_name: str) -> dd.DataFrame:
  """Creates a Dask DataFrame for the component specified by its tag."""
  paths = tf.io.gfile.glob(f'{dataset_dir}/{tag}/{file_name}')
  print(paths)
  return dd.read_parquet(paths)


### Image Processing

In [None]:
from PIL import Image
import io
import os
import itertools

base_folder_path = '/scratch/gpfs/ms90/dataset_prep'

folder_path = os.path.join(base_folder_path, 'camera_image')
output_base_path = os.path.join(base_folder_path, 'waymo_images')

if not os.path.exists(output_base_path):
    os.makedirs(output_base_path)

files = os.listdir(folder_path)

for file_name in files:
    if not file_name.startswith('.'):
        print(file_name)
        cam_image_df = read('camera_image', file_name)
        
        for i, row in itertools.islice(iter(cam_image_df.iterrows()), None, None, cam_image_df.shape[0].compute() // 5):
            img_key = i.replace(';', '_')
            cam_image_raw = row['[CameraImageComponent].image']
            im = Image.open(io.BytesIO(cam_image_raw))
            
            img_width, img_height = im.size
            dimension_folder_name = f"{img_width}x{img_height}"
            dimension_folder_path = os.path.join(output_base_path, dimension_folder_name)
            
            if not os.path.exists(dimension_folder_path):
                os.makedirs(dimension_folder_path)
            
            im.save(os.path.join(dimension_folder_path, f'cam_img_{img_key}.png'))
