In [30]:
import pandas as pd
import numpy as np

runtime = pd.read_parquet("../raw-data/runtimeSeveralRoutesOctWithLags_all.gzip")

# Generate prediction result for demo data

Form a table with:

- Rows representing the active trips at "2022-10-26 08:30:16".
- Columns containing information of:
  - Route
  - Direction
  - Current stop ID and stop name
  - Stop IDs and stop names 11-20 stops ahead

In [95]:
# Select 
query = "(routeId.isin(['21', '47', '33'])) & (serviceDate == '2022-10-26')"
runtime_sel = runtime.query(query).copy()

In [96]:
# All the trips on this day with their time range
trips = runtime_sel.groupby('tripId')["observedToStopArrivalTime"].agg(["min", "max"])

In [97]:
desired_time = "2022-10-26 08:30:16"

trips_sel = trips.query("min < @desired_time < max").copy()
tripIds_sel = trips_sel.index.tolist()

In [98]:
runtime_used = runtime_sel.query("tripId in @tripIds_sel").copy()

In [99]:
runtime_used["observedFromStopDepartureTime"] = runtime_used[
  "observedToStopArrivalTime"
] - pd.to_timedelta(
  runtime_used["observedRuntimeSeconds"], 
  unit='s'
)

In [100]:
# First, understand what the next 10-20 stops are for each instance
runtime_used = runtime_used[[
    "routeId", "directionId", "tripId", "toStopId",
    "toStopName", "toStopSequence",
    "observedFromStopDepartureTime", "observedToStopArrivalTime",
]]

In [101]:
sorted = runtime_used.sort_values(
    ["routeId", "directionId", "tripId", "toStopSequence"]
)

In [102]:
for step in range(11, 21):
  sorted[f"next_{step}_stopId"] = sorted.groupby([
    "routeId", "directionId", "tripId"
  ])["toStopId"].shift(-step)
  sorted[f"next_{step}_stopName"] = sorted.groupby([
    "routeId", "directionId", "tripId"
  ])["toStopName"].shift(-step)

In [103]:
# Now select the instances that fall within the desired timeframe
final = sorted.query(
  "observedFromStopDepartureTime < @desired_time < observedToStopArrivalTime"
).copy()

Next, join prediction results

In [89]:
prediction = pd.read_csv("../raw-data/demo-prediction-results.csv")

In [92]:
prediction = prediction[
  ["routeId", "directionId", "tripId", "toStopId"] +
  [col for col in prediction.columns if col.startswith("pred_")] 
]

In [117]:
# data types:
prediction.toStopId = prediction.toStopId.astype(str)
prediction.routeId = prediction.routeId.astype(str)
prediction.directionId = prediction.directionId.astype(str)
prediction.tripId = prediction.tripId.astype("int32")


In [127]:
with_prediction = final

In [128]:
joinby_cols = ["routeId", "directionId", "tripId"]

for step in range(11, 21):
  to_join = prediction[joinby_cols + ["toStopId", f"pred_{step}"]].copy().rename(
    columns={
      f"pred_{step}": f"pred_{step}_prediction", 
      "toStopId": f"next_{step}_stopId"
    }
  )
  with_prediction = with_prediction.merge(
    to_join,
    how="left",
    left_on=joinby_cols + [f"next_{step}_stopId"],
    right_on=joinby_cols + [f"next_{step}_stopId"]
  )

In [130]:
with_prediction.to_csv("../raw-data/demo-prediction.csv", index=False)

# Generate demo bus location (fake TransitView) data

Should preduce data that look like this:

```
{
  "bus": [
    {
      "lat": "39.952599999999997",
      "lng": "-75.165199999999999",
      "route_id": "21",
      "trip": "203196",
      "Direction": "EastBound",
      "next_stop_id": null,
      "next_stop_name": null,
      "next_stop_sequence": null
    },
    {
      "lat": "39.952843000000001",
      "lng": "-75.191413999999995",
      "route_id": "21",
      "trip": "203297",
      "Direction": "WestBound",
      "next_stop_id": "21361",
      "next_stop_name": "Walnut St & 36th St",
      "next_stop_sequence": 34
    },
  ]
}
```

In [169]:
realtime = final[[
  "routeId", "directionId", "tripId", "toStopId", "toStopName", "toStopSequence"
]].rename(columns={
  "toStopId": "next_stop_id",
  "toStopName": "next_stop_name",
  "toStopSequence": "next_stop_sequence",
  "routeId": "route_id",
  "tripId": "trip"
})

In [145]:
directionDict = {
  21: { 0: "Eastbound", 1: "Westbound" },
  33: { 0: "Southbound", 1: "Northbound" },
  47: { 0: "Southbound", 1: "Northbound" },
}

In [171]:
realtime["Direction"] = realtime.apply(
  lambda row: directionDict[int(row.route_id)][int(row.directionId)], axis=1
)
realtime = realtime.drop("directionId", axis=1)

In [173]:
import geopandas as gpd
stops = gpd.read_file("../../db/stops-all.geojson")

stops.StopId = stops.StopId.astype(str)

In [174]:
stops = stops[["StopId", "Lon", "Lat"]].rename(columns={
  "StopId": "next_stop_id",
  "Lon": "lng",
  "Lat": "lat"
})

In [176]:
stops = stops.drop_duplicates(subset=["next_stop_id"])

In [177]:
realtime = realtime.merge(
  stops,
  how="left",
  on="next_stop_id"
)

In [186]:
import json

routes = ["21", "33", "47"]

for route in routes:
  subset = realtime.query("route_id == @route").copy()
  json_data = {"bus": subset.to_dict(orient="records")}
  
  with open(f"../../db/demo-transit-view/{route}.json", "w") as f:
    json.dump(json_data, f)

# Generate fake prediction data

In [195]:
with_prediction.head()

Unnamed: 0,routeId,directionId,tripId,toStopId,toStopName,toStopSequence,observedFromStopDepartureTime,observedToStopArrivalTime,next_11_stopId,next_11_stopName,next_12_stopId,next_12_stopName,next_13_stopId,next_13_stopName,next_14_stopId,next_14_stopName,next_15_stopId,next_15_stopName,next_16_stopId,next_16_stopName,next_17_stopId,next_17_stopName,next_18_stopId,next_18_stopName,next_19_stopId,next_19_stopName,next_20_stopId,next_20_stopName,pred_11_prediction,pred_12_prediction,pred_13_prediction,pred_14_prediction,pred_15_prediction,pred_16_prediction,pred_17_prediction,pred_18_prediction,pred_19_prediction,pred_20_prediction
0,33,0,64882,10255,Market St & 10th St,47,2022-10-26 08:29:03.941406250,2022-10-26 08:30:27,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,33,0,64883,31348,19th St & JFK Blvd,41,2022-10-26 08:30:04.046875000,2022-10-26 08:30:57,14930.0,Market St & 4th St,21318.0,Market St & 3rd St,10288.0,Market St & 2nd St,7657.0,Market St & Front St,593.0,Penn's Landing - 1,,,,,,,,,,,False,,,,,,,,,
2,33,1,64900,2769,19th St & Oxford St,38,2022-10-26 08:29:58.558593750,2022-10-26 08:30:46,3160.0,22nd St & Sedgley Av,3131.0,22nd St & Huntingdon St,596.0,22nd St & Lehigh Av,3163.0,22nd St & Somerset St,3109.0,22nd St & Cambria St,3133.0,22nd St & Indiana Av,3116.0,22nd St & Clearfield St,597.0,22nd St & Allegheny Av,2847.0,22nd St & Westmoreland St,3170.0,22nd St & Tioga St,False,False,False,False,False,False,,,,
3,33,1,64907,3163,22nd St & Somerset St,53,2022-10-26 08:29:49.937500000,2022-10-26 08:30:37,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,47,0,69540,724,6th St & Erie Av,27,2022-10-26 08:28:38.394531250,2022-10-26 08:30:36,16267.0,6th St & Huntingdon St,26048.0,6th St & Cumberland St,16268.0,6th St & York St,16269.0,6th St & Dauphin St,16270.0,6th St & Susquehanna Av,21133.0,Susquehanna Av & 7th St,725.0,8th St & Susquehanna Av - FS,30192.0,8th St & Diamond St,16273.0,8th St & Norris St,16274.0,8th St & Berks St,False,False,False,False,False,False,False,,,


In [197]:
for row in with_prediction.iterrows():
  data = row[1]
  route = data.routeId
  direction = data.directionId
  trip = data.tripId
  stop_ids = [data[f"next_{step}_stopId"] for step in range(11, 21)]
  stop_names = [data[f"next_{step}_stopName"] for step in range(11, 21)]
  predictions = [data[f"pred_{step}_prediction"] for step in range(11, 21)]
  json_data = {
    "prediction": [{
      "stop_id": stop_id, "stop_name": stop_name, "prediction": prediction 
    } for stop_id, stop_name, prediction in zip(stop_ids, stop_names, predictions)]
  }
  with open(f"../../db/demo-prediction/{route}-{direction}-{trip}.json", "w") as f:
    json.dump(json_data, f)