In [None]:
import json

import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

In [None]:
# Get arrival data and clean it

with open("data/arrivals_dump.json") as f:
    lines = f.readlines()
data = json.loads("\n".join([line.removesuffix("+") for line in ("[" + "".join(lines[3:-2])).split("\n")]))
arrival_frame = pd.DataFrame(data)
arrival_frame.set_index("id", inplace=True)
arrival_frame.reset_index(inplace=True)
datetime_cols = ["generatedOn", "arrivalAt", "createdAt", "updatedAt"]
for col in datetime_cols:
    arrival_frame[col] = pd.to_datetime(arrival_frame[col])
arrival_frame.info()

In [None]:
# Get vehicle data and clean it

with open("data/vehicle_dump.json") as f:
    lines = f.readlines()
data = json.loads("\n".join([line.removesuffix("+") for line in ("[" + "".join(lines[3:-2])).split("\n")]))
vehicle_frame = pd.DataFrame(data)
vehicle_frame.set_index("id", inplace=True)
vehicle_frame.reset_index(inplace=True)
datetime_cols = ["createdAt", "updatedAt"]
for col in datetime_cols:
    vehicle_frame[col] = pd.to_datetime(vehicle_frame[col])
vehicle_frame.info()

In [None]:
# Plot arrival estimates

frame = arrival_frame
sns.set_theme(style="darkgrid")
g = sns.relplot(
    data=frame,
    col=frame["generatedOn"] > "2023-07-21",
    x="generatedOn",
    y="arrivalAt",
    hue="stopId",
    aspect=1.3,
    facet_kws={'sharey': False, 'sharex': False}
)
g.axes[0, 0].set_title("2023-07-20")
g.axes[0, 1].set_title("2023-07-21")
_ = g.figure.suptitle("Arrival estimates across all stops", y=1.05)

In [None]:
# Plot vehicle location data

frame = vehicle_frame
sns.set_theme(style="darkgrid")
g = sns.jointplot(
    data=frame,
    x="lat",
    y="long",
)
_ = g.figure.suptitle("All data", y=1.05)
g = sns.displot(
    data=frame,
    x=frame["createdAt"].dt.date,
    aspect = 1.5
)
_ = g.figure.suptitle("Distribution of date", y=1.05)
g = sns.jointplot(
    data=frame.loc[frame["createdAt"].dt.date.astype(str) == '2023-07-18'],
    x="lat",
    y="long",
)
_ = g.figure.suptitle("Location data for 2023-07-18", y=1.05)
g = sns.displot(
    data=frame.loc[frame["createdAt"].dt.date.astype(str) == '2023-07-18'],
    x=frame["createdAt"].dt.hour,
)
_ = g.figure.suptitle("Distribution of hour created for 2023-07-18", y=1.05)