# Data Query (specific groupMetadataID)


#### **ONLY RUN IF MASSIVE AMOUNTS OF DATA IS YOUR GOAL**

Runs a query for the chosen groupMetadataID, retrieving chassis and best_pose data.
Then runs a query for metadata.


In [4]:
import plotly.express as px
import pandas as pd
import numpy as np
import os

# Import plotly.express to utilize map

### Import your groupMetadataIDs below and choose to query data or not


In [16]:
# select a groupMetadataID
gmID = '5976b77a-a504-11ee-88ec-eb6a8d5269b4'

# set to False if data is stored as .csv file.
# otherwise, set to True
queryPose = True
queryChassis = True
queryMeta = True

In [10]:
from boto3.dynamodb.conditions import Key, Attr
import boto3

if queryPose or queryChassis or queryMeta:
    # Get the service resource
    dynamodb = boto3.resource('dynamodb')
    # Instantiate a table resource object
    table = dynamodb.Table('ads_passenger_processed')
    table_meta = dynamodb.Table('ads_passenger_processed_metadata')

### Query and save Best_pose data


In [5]:
if queryPose:

    # query arguments for best_pose data
    keywords = dict(
        IndexName='topic-index',
        KeyConditionExpression=Key('topic').eq(
            '/apollo/sensor/gnss/best_pose'),
        ProjectionExpression="groupMetadataID, #t, solStatus, solType, latitudeStdDev, longitudeStdDev, numSatsTracked, numSatsMulti, numSatsInSolution, differentialAge, solutionAge, latitude, longitude",
        Limit=1500,
        ExpressionAttributeNames={'#t': 'time'},
        FilterExpression=Key('groupMetadataID').eq(f'{gmID}')

    )

    # run initial query and coerce results into a dataframe
    res = table.query(**keywords)
    df_pose = pd.DataFrame.from_dict(
        pd.json_normalize(res['Items']), orient='columns')

    done = False
    leek = res["LastEvaluatedKey"]

    while not done:
        try:
            leek = res["LastEvaluatedKey"]
            keywords["ExclusiveStartKey"] = leek
            res = table.query(**keywords)
            tmpDF = pd.DataFrame.from_dict(
                pd.json_normalize(res['Items']), orient='columns')
            df_pose = pd.concat([df_pose, tmpDF])
        except KeyError:
            print("Done querying best_pose data")
            done = True

    df_pose.to_csv(f"./data/best_pose/{gmID}.csv", index=False)
    print(f"Saved best_pose data at ./data/best_pose/{gmID}.csv")


else:
    df_pose = pd.read_csv(f"./data/best_pose/{gmID}.csv")
    print("Loaded best_pose data")

df_pose.shape

done querying best_pose data


### Query and save Chassis data


Warning: This query can take awhile. frequently over 5 minutes


In [8]:
if queryChassis:
    keywords = dict(
        IndexName='topic-index',
        KeyConditionExpression=Key('topic').eq(
            '/apollo/canbus/chassis'),
        Limit=2000,
        FilterExpression=Key('groupMetadataID').eq(gmID)
    )

    res = table.query(**keywords)
    df_chassis = pd.DataFrame.from_dict(
        pd.json_normalize(res['Items']), orient='columns')

    done = False
    leek = res["LastEvaluatedKey"]

    while not done:
        try:
            leek = res["LastEvaluatedKey"]
            keywords["ExclusiveStartKey"] = leek
            res = table.query(**keywords)
            tmpDF = pd.DataFrame.from_dict(
                pd.json_normalize(res['Items']), orient='columns')
            df_chassis = pd.concat([df_chassis, tmpDF])
            print(leek)

        except KeyError:
            print(f"done querying for chassis data")
            done = True

    df_chassis.to_csv(f"./data/chassis/{gmID}.csv", index=False)
    print(f"Saved chassis data at ./data/chassis/{gmID}.csv")
else:
    df_chassis = pd.read_csv(f"./data/chassis/{gmID}.csv")
    print("Loaded chassis data")

print(df_chassis.shape)

{'_id': '6015180a-d999-11ee-a158-97f8443fd730', 'topic': '/apollo/canbus/chassis', 'time': Decimal('1685640696339576516')}
{'_id': '54d8c5f0-a4f7-11ee-88ec-eb6a8d5269b4', 'topic': '/apollo/canbus/chassis', 'time': Decimal('1695055458882433416')}
{'_id': '1b1fec39-a608-11ee-88ec-eb6a8d5269b4', 'topic': '/apollo/canbus/chassis', 'time': Decimal('1697474605685134410')}
{'_id': '4765f392-a52c-11ee-88ec-eb6a8d5269b4', 'topic': '/apollo/canbus/chassis', 'time': Decimal('1696265475822805899')}
{'_id': '96adc501-a512-11ee-88ec-eb6a8d5269b4', 'topic': '/apollo/canbus/chassis', 'time': Decimal('1695932384754554560')}
{'_id': '0bc4a59d-d2ea-11ee-b437-336917683bb8', 'topic': '/apollo/canbus/chassis', 'time': Decimal('1706300311314840820')}
{'_id': '811f0269-d34a-11ee-b437-336917683bb8', 'topic': '/apollo/canbus/chassis', 'time': Decimal('1698256011076222180')}
{'_id': '9a06d7b8-d361-11ee-b437-336917683bb8', 'topic': '/apollo/canbus/chassis', 'time': Decimal('1698348426105658694')}
{'_id': '79202b4

### Query and save Metadata


In [13]:
if queryMeta:

    res = table_meta.query(
        IndexName='groupMetadataID-index',
        KeyConditionExpression=Key("groupMetadataID").eq(gmID),
        ProjectionExpression="groupMetadataID, #o.Weather, #o.#m, #o.Notes",
        Limit=1500,
        ExpressionAttributeNames={"#o": "other", "#m": "Map"},
    )

    df_meta = pd.DataFrame.from_dict(
        pd.json_normalize(res['Items']), orient='columns').drop_duplicates()

    df_meta.to_csv(f"./data/metadata/{gmID}.csv", index=False)
    print(f"Saved metadata data at ./data/metadata/{gmID}.csv")

else:
    df_meta = pd.read_csv(f"./data/metadata/{gmID}.csv")
    print("Loaded metadata")


print(df_meta.shape)

Saved metadata data at ./data/metadata/5976b77a-a504-11ee-88ec-eb6a8d5269b4.csv
(1, 4)
