In [None]:
import time
import json
import ast
import os
import datetime
import io
from collections import defaultdict

import imageio
import boto3
import pandas as pd
import numpy as np
import imageio
import matplotlib.pyplot as plt
import seaborn as sns

from brtdevkit.core.db.athena import AthenaClient
from brtdevkit.data import Dataset
from timezonefinder import TimezoneFinderL
import pytz

from aletheia_dataset_creator.dataset_tools.aletheia_dataset_helpers import imageids_to_dataset
from aletheia_dataset_creator.config.dataset_config import LEFT_CAMERAS, ALL_CAMERA_PAIRS_LIST
%matplotlib inline
pd.set_option('display.max_rows', 500)

In [None]:
athena = AthenaClient()
s3 = boto3.resource('s3')
tf = TimezoneFinderL()
home = os.path.expanduser('~')
outpath = os.environ['OUTPUT_PATH']
data_path = home + '/data'

In [None]:
df_cache = outpath + "/shortlong_exposure.csv"
try:
    skip_df = pd.read_csv(df_cache)
except FileNotFoundError:
    print("file not found")
    query = f"""
    SELECT
        image_jupiter.id, image_jupiter.camera_location AS camera_location, json_extract(image_jupiter.calibration_data__json, '$.exposure_ms') exposure_ms
    FROM image_jupiter
    WHERE
        image_jupiter.camera_location IN ('T01', 'T02', 'T03', 'T04', 'T05', 'T06', 'T07', 'T08', 'T09', 'T10', 'T11', 'T12', 'T13', 'T14', 'T15', 'T16', 'I01', 'I03', 'I05', 'I07', 'I02', 'I04', 'I06', 'I08')
        AND (cast(json_extract_scalar(image_jupiter.calibration_data__json, '$.exposure_ms.1') AS double)  > 0.2
        OR cast(json_extract_scalar(image_jupiter.calibration_data__json, '$.exposure_ms.1') AS double)  < 0.02)
    LIMIT 1000
    """
    start = time.time()
    skip_df = athena.get_df(query)
    end = time.time()
    print(end - start)
    skip_df.to_csv(df_cache, index=False)

In [None]:
fname = os.path.expanduser("~/data/labelbox_slice_ids.json")
if os.path.exists(path=fname):
    with open(fname, 'r') as f:
        labelbox_image_ids = json.load(f)
image_ids = list(skip_df['id']) + labelbox_image_ids

In [None]:
print(len(image_ids))

In [None]:
from brtdevkit.data import Dataset
dataset_name = "bad_iq_halo_labelbox_plus_exposure"
dataset_description = "Images with a very high or very low exposure time"
dataset_kind = Dataset.KIND_IMAGE
imageids_to_dataset(image_ids, dataset_name, dataset_description, dataset_kind=Dataset.KIND_IMAGE, mode='stereo')

In [None]:
image_ids

In [None]:
Dataset.create(
    name=dataset_name,
    description=dataset_description,
    kind=Dataset.KIND_IMAGE,
    image_ids=image_ids,
)