# Starter Transition-Mapping Template Notebook


In [59]:
import plotly.express as px
import pandas as pd

import os

### Import a chosen groupMetadataID below.

If we already have the data, load it.


In [60]:
gmID = '3a2a78cc-db21-11ee-a158-97f8443fd730'

# set to False if data is stored as .csv file.
# otherwise, set to True

if os.path.isfile(f"./data/chassis/{gmID}.csv"):
    queryChassis = False
    print("chassis csv file found!")
else:
    queryChassis = True
    print("no chassis csv. Query for data necessary.")

if os.path.isfile(f"./data/best_pose/{gmID}.csv"):
    queryPose = False
    print("best_pose csv file found!")
else:
    queryPose = True
    print("no best_ose csv. Query for data necessary.")

if os.path.isfile(f"./data/metadata/{gmID}.csv"):
    queryMeta = False
    print("metadata csv file found!")
else:
    queryMeta = True
    print("no metadata csv. Query for data necessary.")

no chassis csv. Query for data necessary.
best_pose csv file found!
metadata csv file found!


### Connect to database and query


In [61]:
from boto3.dynamodb.conditions import Key, Attr
import boto3

if queryPose or queryChassis or queryMeta:
    # Get the service resource
    dynamodb = boto3.resource('dynamodb')
    # Instantiate a table resource object
    table = dynamodb.Table('ads_passenger_processed')
    table_meta = dynamodb.Table('ads_passenger_processed_metadata')

In [62]:
if not os.path.exists(f"./data/best_pose"):
        os.makedirs(f"./data/best_pose")
if queryPose:

    # query arguments for best_pose data
    keywords = dict(
        IndexName='topic-index',
        KeyConditionExpression=Key('topic').eq(
            '/apollo/sensor/gnss/best_pose'),
        ProjectionExpression="groupMetadataID, #t, solStatus, solType, latitudeStdDev, longitudeStdDev, numSatsTracked, numSatsMulti, numSatsInSolution, differentialAge, solutionAge, latitude, longitude",
        Limit=1500,
        ExpressionAttributeNames={'#t': 'time'},
        FilterExpression=Key('groupMetadataID').eq(f'{gmID}')

    )

    # run initial query and coerce results into a dataframe
    res = table.query(**keywords)
    df_pose = pd.DataFrame.from_dict(
        pd.json_normalize(res['Items']), orient='columns')

    done = False
    leek = res["LastEvaluatedKey"]

    while not done:
        try:
            leek = res["LastEvaluatedKey"]
            keywords["ExclusiveStartKey"] = leek
            res = table.query(**keywords)
            tmpDF = pd.DataFrame.from_dict(
                pd.json_normalize(res['Items']), orient='columns')
            df_pose = pd.concat([df_pose, tmpDF])
        except KeyError:
            print("Done querying best_pose data")
            done = True

    df_pose.to_csv(f"./data/best_pose/{gmID}.csv")
    print(f"Saved best_pose data at ./data/best_pose/{gmID}.csv")


else:
    df_pose = pd.read_csv(f"./data/best_pose/{gmID}.csv")
    print("Loaded best_pose data")

df_pose.shape

Loaded best_pose data


(2001, 13)

In [64]:
if not os.path.exists(f"./data/chassis"):
        os.makedirs(f"./data/chassis")

if queryChassis:
    keywords = dict(
        IndexName='topic-index',
        KeyConditionExpression=Key('topic').eq(
            '/apollo/canbus/chassis'),
        Limit=2000,
        FilterExpression=Key('groupMetadataID').eq(gmID)
    )

    res = table.query(**keywords)
    df_chassis = pd.DataFrame.from_dict(
        pd.json_normalize(res['Items']), orient='columns')

    done = False
    leek = res["LastEvaluatedKey"]

    while not done:
        try:
            leek = res["LastEvaluatedKey"]
            print(leek)
            keywords["ExclusiveStartKey"] = leek
            res = table.query(**keywords)
            tmpDF = pd.DataFrame.from_dict(
                pd.json_normalize(res['Items']), orient='columns')
            df_chassis = pd.concat([df_chassis, tmpDF])

        except KeyError:
            print(f"done querying for chassis data")
            done = True

    df_chassis.to_csv(f"./data/chassis/{gmID}.csv")
    print(f"Saved chassis data at ./data/chassis/{gmID}.csv")
else:
    df_chassis = pd.read_csv(f"./data/chassis/{gmID}.csv")
    print("Loaded chassis data")

print(df_chassis.shape)

{'_id': 'cc82e50c-db4a-11ee-a158-97f8443fd730', 'topic': '/apollo/canbus/chassis', 'time': Decimal('1692301504388532934')}
{'_id': '15fa05e3-d2a4-11ee-b437-336917683bb8', 'topic': '/apollo/canbus/chassis', 'time': Decimal('1698253449947109077')}
{'_id': '70806656-a520-11ee-88ec-eb6a8d5269b4', 'topic': '/apollo/canbus/chassis', 'time': Decimal('1695933146014667852')}
{'_id': '94f1f0b4-cb84-11ee-909c-e1dc60cf66f9', 'topic': '/apollo/canbus/chassis', 'time': Decimal('1696533741090559411')}
{'_id': '2b7bfbf6-cb92-11ee-909c-e1dc60cf66f9', 'topic': '/apollo/canbus/chassis', 'time': Decimal('1697138242295307523')}
{'_id': '86676011-cb83-11ee-909c-e1dc60cf66f9', 'topic': '/apollo/canbus/chassis', 'time': Decimal('1696533657411429261')}
{'_id': '27327ff6-a51a-11ee-88ec-eb6a8d5269b4', 'topic': '/apollo/canbus/chassis', 'time': Decimal('1695932808855487870')}
{'_id': '1c9d1502-c867-11ee-a7fc-dd032dba19e8', 'topic': '/apollo/canbus/chassis', 'time': Decimal('1694625122048416187')}
{'_id': '1e2dcbe

KeyboardInterrupt: 

In [None]:
if not os.path.exists(f"./data/metadata"):
        os.makedirs(f"./data/metadata")

if queryMeta:

    res = table_meta.query(
        IndexName='groupMetadataID-index',
        KeyConditionExpression=Key("groupMetadataID").eq(gmID),
        ProjectionExpression="groupMetadataID, #o.Weather, #o.#m, #o.Notes",
        Limit=1500,
        ExpressionAttributeNames={"#o": "other", "#m": "Map"},
    )

    df_meta = pd.DataFrame.from_dict(
        pd.json_normalize(res['Items']), orient='columns').drop_duplicates()

    df_meta.to_csv(f"./data/metadata/{gmID}.csv")
    print(f"Saved metadata data at ./data/metadata/{gmID}.csv")

else:
    df_meta = pd.read_csv(f"./data/metadata/{gmID}.csv")
    print("Loaded metadata")


print(df_meta.shape)

Loaded metadata
(1, 8)


The output of this box will be messed up. It automatically sets row 0 to True. Not sure why, but it doesn't count as a transition in the end.
We are also counting emergency mode as MANUAL


In [None]:
# read the timeSorted csv
pts = df_chassis.copy()
pts = pts.sort_values('time')
# reduce df to only time and drivingMode
pts = pts[["time", "drivingMode"]]
# function to tell us what state transition it is


def getState(old, new):
    if old == "COMPLETE_AUTO_DRIVE" and new == "COMPLETE_MANUAL":
        return "A->M"
    elif old == "COMPLETE_AUTO_DRIVE" and new == "EMERGENCY_MODE":
        return "A->M"
    elif old == "COMPLETE_MANUAL" and new == "COMPLETE_AUTO_DRIVE":
        return "M->A"
    elif old == "EMERGENCY_MODE" and new == "COMPLETE_AUTO_DRIVE":
        return "M->A"
    else:
        return "NONE"


# we shift the drivingMode column down by 1
pts["shift"] = pts["drivingMode"].shift()
# create transition field
pts['transition'] = (pts["drivingMode"] != pts["shift"])
# set Transition type to NONE for all
pts['transitionType'] = "NONE"
# Set first transition to False, since it was auto True
pts.loc[0, 'transition'] = False
# print(pts['transition'])
# Show counts for transitions
print(pts['transition'].value_counts())
# iterate over all rows and find the ones where transition is true. Then we compare shift and dM to see what
# transition Type we have.
for index, row in pts.iterrows():
    if pts['transition'][index] == True:
        old = pts['shift'][index]
        new = pts['drivingMode'][index]
        pts.loc[index, "transitionType"] = getState(old, new)
print(pts['transitionType'].value_counts())
pts = pts[['time', 'drivingMode', 'transition', 'transitionType']]
pts = pts.sort_values('time')
pts['time'] = pts['time'].astype(float)
pts['time'] = pts['time'].astype('datetime64[ns]')
pts['time'] = pts['time'].astype('datetime64[s]')
pts[['time']]

# show(pts)

transition
False    33483
True        15
Name: count, dtype: int64


transitionType
NONE    33487
A->M        6
M->A        5
Name: count, dtype: int64


Unnamed: 0,time
229,2023-05-31 13:55:02
6170,2023-05-31 13:55:02
2348,2023-05-31 13:55:02
12706,2023-05-31 13:55:02
4063,2023-05-31 13:55:02
...,...
947,2023-05-31 14:06:12
26339,2023-05-31 14:06:12
21380,2023-05-31 14:06:12
7061,2023-05-31 14:06:12


In [None]:
ll = df_pose.copy()
ll = ll.sort_values('time')
ll['time'] = ll['time'].astype(float)
ll['time'] = ll['time'].astype('datetime64[ns]')
ll['time'] = ll['time'].astype('datetime64[s]')
ll = ll[["groupMetadataID", 'time', 'latitude', 'longitude']]
merged_df = pd.merge(pts, ll, on='time')

In [None]:
merged_df = merged_df.drop_duplicates()
merged_df

Unnamed: 0,time,drivingMode,transition,transitionType,groupMetadataID,latitude,longitude
0,2023-05-31 13:55:02,COMPLETE_AUTO_DRIVE,True,NONE,ba6e1072-9524-11ee-956e-9da2d070324c,39.325937,-82.103313
1,2023-05-31 13:55:02,COMPLETE_AUTO_DRIVE,False,NONE,ba6e1072-9524-11ee-956e-9da2d070324c,39.325937,-82.103313
30,2023-05-31 13:55:03,COMPLETE_AUTO_DRIVE,False,NONE,ba6e1072-9524-11ee-956e-9da2d070324c,39.326024,-82.103266
80,2023-05-31 13:55:04,COMPLETE_AUTO_DRIVE,False,NONE,ba6e1072-9524-11ee-956e-9da2d070324c,39.326106,-82.103222
130,2023-05-31 13:55:05,COMPLETE_AUTO_DRIVE,False,NONE,ba6e1072-9524-11ee-956e-9da2d070324c,39.326184,-82.103181
...,...,...,...,...,...,...,...
33269,2023-05-31 14:06:08,EMERGENCY_MODE,False,NONE,ba6e1072-9524-11ee-956e-9da2d070324c,39.331453,-82.126043
33319,2023-05-31 14:06:09,EMERGENCY_MODE,False,NONE,ba6e1072-9524-11ee-956e-9da2d070324c,39.331453,-82.126043
33369,2023-05-31 14:06:10,EMERGENCY_MODE,False,NONE,ba6e1072-9524-11ee-956e-9da2d070324c,39.331453,-82.126043
33419,2023-05-31 14:06:11,EMERGENCY_MODE,False,NONE,ba6e1072-9524-11ee-956e-9da2d070324c,39.331453,-82.126043


In [None]:
merged_notes_df = pd.merge(merged_df, df_meta, on="groupMetadataID")

### Mapping


In [None]:
# Define your map with your dataframe name first and keep everything else the same
fig = px.scatter_mapbox(merged_df, lat="latitude",
                        lon="longitude",
                        hover_data=["time", "drivingMode", "transitionType"],
                        color="drivingMode",
                        zoom=13,
                        height=1000,
                        width=1200,
                        # color_discrete_map={
                        #    "COMPLETE_MANUAL": "#0e3f75",
                        #    "COMPLETE_AUTO_DRIVE": "#009969",
                        #    "EMERGENCY_MODE": "#c12637",
                        # }
                        color_discrete_map={
                            "COMPLETE_MANUAL": "#601A4A",
                            "COMPLETE_AUTO_DRIVE": "#63ACBE",
                            "EMERGENCY_MODE": "#EE442F",
                        }

                        )

# Layout settings
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})

fig.update_traces(marker=dict(size=8))

# Display the map
fig.show()



