# Data Query (specific groupMetadataID)


#### **ONLY RUN IF MASSIVE AMOUNTS OF DATA IS YOUR GOAL**

Runs a query for the chosen groupMetadataID, retrieving chassis and best_pose data.
Then runs a query for metadata.


In [1]:
import plotly.express as px
import pandas as pd
import numpy as np
import os

# Import plotly.express to utilize map

### Import your groupMetadataIDs below and choose to query data or not


In [2]:
# select a groupMetadataID
gmID = '3a2a78cc-db21-11ee-a158-97f8443fd730'

# set to False if data is stored as .csv file.
# otherwise, set to True
queryPose = True
queryChassis = True
queryMeta = True

In [3]:
from boto3.dynamodb.conditions import Key, Attr
import boto3

if queryPose or queryChassis or queryMeta:
    # Get the service resource
    dynamodb = boto3.resource('dynamodb')
    # Instantiate a table resource object
    table = dynamodb.Table('ads_passenger_processed')
    table_meta = dynamodb.Table('ads_passenger_processed_metadata')

### Query and save Best_pose data


In [4]:
if not os.path.exists(f"./data/best_pose"):
        os.makedirs(f"./data/best_pose")

if queryPose:

    # query arguments for best_pose data
    keywords = dict(
        IndexName='topic-index',
        KeyConditionExpression=Key('topic').eq(
            '/apollo/sensor/gnss/best_pose'),
        ProjectionExpression="groupMetadataID, #t, solStatus, solType, latitudeStdDev, longitudeStdDev, numSatsTracked, numSatsMulti, numSatsInSolution, differentialAge, solutionAge, latitude, longitude",
        Limit=1500,
        ExpressionAttributeNames={'#t': 'time'},
        FilterExpression=Key('groupMetadataID').eq(f'{gmID}')

    )

    # run initial query and coerce results into a dataframe
    res = table.query(**keywords)
    df_pose = pd.DataFrame.from_dict(
        pd.json_normalize(res['Items']), orient='columns')

    done = False
    leek = res["LastEvaluatedKey"]

    while not done:
        try:
            leek = res["LastEvaluatedKey"]
            keywords["ExclusiveStartKey"] = leek
            print(leek)
            res = table.query(**keywords)
            tmpDF = pd.DataFrame.from_dict(
                pd.json_normalize(res['Items']), orient='columns')
            df_pose = pd.concat([df_pose, tmpDF])
        except KeyError:
            print("Done querying best_pose data")
            done = True

    df_pose.to_csv(f"./data/best_pose/{gmID}.csv", index=False)
    print(f"Saved best_pose data at ./data/best_pose/{gmID}.csv")


else:
    df_pose = pd.read_csv(f"./data/best_pose/{gmID}.csv")
    print("Loaded best_pose data")

df_pose.shape

{'_id': '9b6e9c6a-d9b0-11ee-a158-97f8443fd730', 'topic': '/apollo/sensor/gnss/best_pose', 'time': Decimal('1690479137037723809')}
{'_id': 'e87c004b-cbb2-11ee-909c-e1dc60cf66f9', 'topic': '/apollo/sensor/gnss/best_pose', 'time': Decimal('1697743591043257131')}
{'_id': 'c4d75fe6-cb59-11ee-909c-e1dc60cf66f9', 'topic': '/apollo/sensor/gnss/best_pose', 'time': Decimal('1694719523048517973')}
{'_id': 'ff200a4f-c876-11ee-a7fc-dd032dba19e8', 'topic': '/apollo/sensor/gnss/best_pose', 'time': Decimal('1695228578043383965')}
{'_id': 'a25c6bed-d296-11ee-b437-336917683bb8', 'topic': '/apollo/sensor/gnss/best_pose', 'time': Decimal('1698252366307861572')}
{'_id': 'cb99f220-db2d-11ee-a158-97f8443fd730', 'topic': '/apollo/sensor/gnss/best_pose', 'time': Decimal('1692288267030524101')}
{'_id': '4cff14b9-d997-11ee-a158-97f8443fd730', 'topic': '/apollo/sensor/gnss/best_pose', 'time': Decimal('1685640411033819963')}
{'_id': '4938c427-d299-11ee-b437-336917683bb8', 'topic': '/apollo/sensor/gnss/best_pose', 

(2001, 13)

### Query and save Chassis data


Warning: This query can take awhile. frequently over 5 minutes


In [11]:
if not os.path.exists(f"./data/chassis"):
        os.makedirs(f"./data/chassis")
        
if queryChassis:
    keywords = dict(
        IndexName='groupMetadataID-index',
        KeyConditionExpression=Key('groupMetadataID').eq(gmID),
        FilterExpression=Key('topic').eq(
            '/apollo/canbus/chassis'),
        Limit=2000,
    )

    res = table.query(**keywords)
    df_chassis = pd.DataFrame.from_dict(
        pd.json_normalize(res['Items']), orient='columns')

    done = False
    leek = res["LastEvaluatedKey"]

    while not done:
        try:
            leek = res["LastEvaluatedKey"]
            keywords["ExclusiveStartKey"] = leek
            res = table.query(**keywords)
            tmpDF = pd.DataFrame.from_dict(
                pd.json_normalize(res['Items']), orient='columns')
            df_chassis = pd.concat([df_chassis, tmpDF])
            print(leek)

        except KeyError:
            print(f"done querying for chassis data")
            done = True

    df_chassis.to_csv(f"./data/chassis/{gmID}.csv", index=False)
    print(f"Saved chassis data at ./data/chassis/{gmID}.csv")
else:
    df_chassis = pd.read_csv(f"./data/chassis/{gmID}.csv")
    print("Loaded chassis data")

print(df_chassis.shape)

{'_id': 'c84745b2-db2a-11ee-a158-97f8443fd730', 'groupMetadataID': '3a2a78cc-db21-11ee-a158-97f8443fd730', 'time': Decimal('1692287240262890136')}
{'_id': '4f41c300-db22-11ee-a158-97f8443fd730', 'groupMetadataID': '3a2a78cc-db21-11ee-a158-97f8443fd730', 'time': Decimal('1692285431614024838')}
{'_id': '78664106-db28-11ee-a158-97f8443fd730', 'groupMetadataID': '3a2a78cc-db21-11ee-a158-97f8443fd730', 'time': Decimal('1692286734939994237')}
{'_id': 'dabc9d88-db28-11ee-a158-97f8443fd730', 'groupMetadataID': '3a2a78cc-db21-11ee-a158-97f8443fd730', 'time': Decimal('1692286828657149421')}
{'_id': 'b4989100-db2a-11ee-a158-97f8443fd730', 'groupMetadataID': '3a2a78cc-db21-11ee-a158-97f8443fd730', 'time': Decimal('1692287221166969522')}
{'_id': '5caea969-db25-11ee-a158-97f8443fd730', 'groupMetadataID': '3a2a78cc-db21-11ee-a158-97f8443fd730', 'time': Decimal('1692286109514029735')}
{'_id': 'eeb89e34-db27-11ee-a158-97f8443fd730', 'groupMetadataID': '3a2a78cc-db21-11ee-a158-97f8443fd730', 'time': Dec

### Query and save Metadata


In [13]:
if not os.path.exists(f"./data/metadata"):
        os.makedirs(f"./data/metadata")
if queryMeta:


    res = table_meta.query(
        IndexName='groupMetadataID-index',
        KeyConditionExpression=Key("groupMetadataID").eq(gmID),
        ProjectionExpression="groupMetadataID, #o.Weather, #o.#m, #o.Notes",
        Limit=1500,
        ExpressionAttributeNames={"#o": "other", "#m": "Map"},
    )

    df_meta = pd.DataFrame.from_dict(
        pd.json_normalize(res['Items']), orient='columns').drop_duplicates()

    df_meta.to_csv(f"./data/metadata/{gmID}.csv", index=False)
    print(f"Saved metadata data at ./data/metadata/{gmID}.csv")

else:
    df_meta = pd.read_csv(f"./data/metadata/{gmID}.csv")
    print("Loaded metadata")


print(df_meta.shape)

Saved metadata data at ./data/metadata/5976b77a-a504-11ee-88ec-eb6a8d5269b4.csv
(1, 4)
