In [1]:
import time
import json
import ast
import os
import datetime
import io
from collections import defaultdict

import imageio
import boto3
import pandas as pd
import numpy as np
import imageio
import matplotlib.pyplot as plt
import seaborn as sns

from brtdevkit.core.db.athena import AthenaClient
from brtdevkit.data import Dataset
from timezonefinder import TimezoneFinderL
import pytz

from aletheia_dataset_creator.dataset_tools.aletheia_dataset_helpers import imageids_to_dataset
from aletheia_dataset_creator.config.dataset_config import LEFT_CAMERAS, ALL_CAMERA_PAIRS_LIST
%matplotlib inline

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
pd.set_option('display.max_rows', 500)
athena = AthenaClient()
s3 = boto3.resource('s3')
tf = TimezoneFinderL()
from pathlib import Path
home = Path(os.path.expanduser('~'))
data_path = home / 'data' / 'dust_dataset'

In [7]:
if os.path.exists(data_path / 'df_sequences.parquet'):
    df_sequences = pd.read_parquet(data_path / 'df_sequences.parquet')
else:
    query = """SELECT ij.id, hard_drive_name, robot_name, collected_on,
        bag_name, operating_field_name, operation_time, latitude, longitude, geohash, camera_location, 
        bundle, group_id, s3_bucket, s3_key, special_notes
    FROM image_jupiter AS ij
    JOIN "image_artifact_jupiter" ON ij."id" = "image_artifact_jupiter"."image"
    WHERE "hard_drive_name" IN ('JUPD-004_2023-7-19', 'JUPD-006_2023-7-19', 'JUPD-007_2023-7-11')
    """
    df_sequences: pd.DataFrame = athena.get_df(query) # type: ignore
    df_sequences.to_parquet(data_path / 'df_sequences.parquet')


In [8]:
df_sequences.iloc[0]

id                                               64bee4dd4010e725e44ceed7
hard_drive_name                                        JUPD-006_2023-7-19
robot_name                                                      loamy_731
collected_on                                   2023-07-14 01:08:43.273000
bag_name                                              07_14_2023-01_08_03
operating_field_name                                              Tract 1
operation_time                                                    daytime
latitude                                                        27.817392
longitude                                                       -97.56502
geohash                                                      9ufw7b5243b0
camera_location                                        front-center-right
bundle                                                               6524
group_id                                 87c504e01ed0471b8e8cb844a1db2888
s3_bucket                             

In [5]:
if os.path.exists(data_path / 'df_dusty.parquet'):
    df_sequences = pd.read_parquet(data_path / 'df_dusty.parquet')
else:
    query = """SELECT ij.id, hard_drive_name, robot_name, collected_on,
        bag_name, operating_field_name, operation_time, latitude, longitude, geohash, camera_location, 
        bundle, group_id, s3_bucket, s3_key
    FROM image_jupiter AS ij
    JOIN "image_artifact_jupiter" ON ij."id" = "image_artifact_jupiter"."image"
    WHERE "hard_drive_name" IN ('JUPD-153_2023-6-29')
    """
    df_dusty: pd.DataFrame = athena.get_df(query) # type: ignore
    df_dusty.to_parquet(data_path / 'df_dusty.parquet')

In [6]:
df_sequences.iloc[0]['s3_bucket']

'brt-mesa-jupiter-images-eng'

In [8]:
df_sequences.iloc[0]

id                                                64b6ec4fc84fb119ea6debf9
hard_drive_name                                         JUPD-007_2023-7-11
robot_name                                                       loamy_731
collected_on                                    2023-07-15 01:23:42.814000
bag_name                                               07_15_2023-01_22_50
operating_field_name                                               Tract 1
operation_time                                                     unknown
latitude                                                               0.0
longitude                                                              0.0
geohash                                                       7zzzzzzzzzzz
camera_location                                            side-right-left
sensor_type                                                           <NA>
created_at                                      2023-07-18 19:47:27.131000
bundle                   

In [9]:
from brtdevkit.util.aws.s3 import S3
client = S3()

In [13]:
client.download_file('brt-mesa-jupiter-images-eng', '2023/07/18/e4777c3ef4b147c79b224a88a67ed0e0-datapoint_side-right-left_debayeredrgb.png', 'out.png')

True

In [None]:
# OK. First, let's look through these sequences.

In [16]:
df_groups = df_sequences.groupby('special_notes')

56