In [30]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
runtime = pd.read_parquet("../raw-data/runtimeSeveralRoutesOctWithLags_all.gzip")

# Generate prediction result for demo data

Form a table with:

- Rows representing the active trips at "2022-10-26 08:30:16".
- Columns containing information of:
  - Route
  - Direction
  - Current stop ID and stop name
  - Stop IDs and stop names 11-20 stops ahead

In [95]:
# Select 
query = "(routeId.isin(['21', '47', '33'])) & (serviceDate == '2022-10-26')"
runtime_sel = runtime.query(query).copy()

In [96]:
# All the trips on this day with their time range
trips = runtime_sel.groupby('tripId')["observedToStopArrivalTime"].agg(["min", "max"])

In [97]:
desired_time = "2022-10-26 08:30:16"

trips_sel = trips.query("min < @desired_time < max").copy()
tripIds_sel = trips_sel.index.tolist()

In [98]:
runtime_used = runtime_sel.query("tripId in @tripIds_sel").copy()

In [99]:
runtime_used["observedFromStopDepartureTime"] = runtime_used[
  "observedToStopArrivalTime"
] - pd.to_timedelta(
  runtime_used["observedRuntimeSeconds"], 
  unit='s'
)

In [100]:
# First, understand what the next 10-20 stops are for each instance
runtime_used = runtime_used[[
    "routeId", "directionId", "tripId", "toStopId",
    "toStopName", "toStopSequence",
    "observedFromStopDepartureTime", "observedToStopArrivalTime",
]]

In [101]:
sorted = runtime_used.sort_values(
    ["routeId", "directionId", "tripId", "toStopSequence"]
)

In [102]:
for step in range(11, 21):
  sorted[f"next_{step}_stopId"] = sorted.groupby([
    "routeId", "directionId", "tripId"
  ])["toStopId"].shift(-step)
  sorted[f"next_{step}_stopName"] = sorted.groupby([
    "routeId", "directionId", "tripId"
  ])["toStopName"].shift(-step)

In [103]:
# Now select the instances that fall within the desired timeframe
final = sorted.query(
  "observedFromStopDepartureTime < @desired_time < observedToStopArrivalTime"
).copy()

Next, join prediction results

In [89]:
prediction = pd.read_csv("../raw-data/demo-prediction-results.csv")

In [92]:
prediction = prediction[
  ["routeId", "directionId", "tripId", "toStopId"] +
  [col for col in prediction.columns if col.startswith("pred_")] 
]

In [117]:
# data types:
prediction.toStopId = prediction.toStopId.astype(str)
prediction.routeId = prediction.routeId.astype(str)
prediction.directionId = prediction.directionId.astype(str)
prediction.tripId = prediction.tripId.astype("int32")


In [127]:
with_prediction = final

In [128]:
joinby_cols = ["routeId", "directionId", "tripId"]

for step in range(11, 21):
  to_join = prediction[joinby_cols + ["toStopId", f"pred_{step}"]].copy().rename(
    columns={
      f"pred_{step}": f"pred_{step}_prediction", 
      "toStopId": f"next_{step}_stopId"
    }
  )
  with_prediction = with_prediction.merge(
    to_join,
    how="left",
    left_on=joinby_cols + [f"next_{step}_stopId"],
    right_on=joinby_cols + [f"next_{step}_stopId"]
  )

In [130]:
with_prediction.to_csv("../raw-data/demo-prediction.csv", index=False)

# Generate demo bus location (fake TransitView) data

Should preduce data that look like this:

```
{
  "bus": [
    {
      "lat": "39.952599999999997",
      "lng": "-75.165199999999999",
      "route_id": "21",
      "trip": "203196",
      "Direction": "EastBound",
      "next_stop_id": null,
      "next_stop_name": null,
      "next_stop_sequence": null
    },
    {
      "lat": "39.952843000000001",
      "lng": "-75.191413999999995",
      "route_id": "21",
      "trip": "203297",
      "Direction": "WestBound",
      "next_stop_id": "21361",
      "next_stop_name": "Walnut St & 36th St",
      "next_stop_sequence": 34
    },
  ]
}
```

In [169]:
realtime = final[[
  "routeId", "directionId", "tripId", "toStopId", "toStopName", "toStopSequence"
]].rename(columns={
  "toStopId": "next_stop_id",
  "toStopName": "next_stop_name",
  "toStopSequence": "next_stop_sequence",
  "routeId": "route_id",
  "tripId": "trip"
})

In [145]:
directionDict = {
  21: { 0: "Eastbound", 1: "Westbound" },
  33: { 0: "Southbound", 1: "Northbound" },
  47: { 0: "Southbound", 1: "Northbound" },
}

In [171]:
realtime["Direction"] = realtime.apply(
  lambda row: directionDict[int(row.route_id)][int(row.directionId)], axis=1
)
realtime = realtime.drop("directionId", axis=1)

In [173]:
import geopandas as gpd
stops = gpd.read_file("../../db/stops-all.geojson")

stops.StopId = stops.StopId.astype(str)

In [174]:
stops = stops[["StopId", "Lon", "Lat"]].rename(columns={
  "StopId": "next_stop_id",
  "Lon": "lng",
  "Lat": "lat"
})

In [176]:
stops = stops.drop_duplicates(subset=["next_stop_id"])

In [177]:
realtime = realtime.merge(
  stops,
  how="left",
  on="next_stop_id"
)

In [186]:
import json

routes = ["21", "33", "47"]

for route in routes:
  subset = realtime.query("route_id == @route").copy()
  json_data = {"bus": subset.to_dict(orient="records")}
  
  with open(f"../../db/demo-transit-view/{route}.json", "w") as f:
    json.dump(json_data, f)

In [181]:
realtime.query("route_id == '21'")

Unnamed: 0,route_id,trip,next_stop_id,next_stop_name,next_stop_sequence,Direction,lng,lat
20,21,60643,593,Penn's Landing - 1,71,Eastbound,-75.140231,39.948716
21,21,60644,6067,Chestnut St & 23rd St,48,Eastbound,-75.178276,39.952591
22,21,60645,516,Chestnut St & 44th St,31,Eastbound,-75.209866,39.956488
23,21,60702,6062,Chestnut St & 18th St,53,Eastbound,-75.170462,39.951602
24,21,60773,21390,Market St & Chatham Rd,72,Westbound,-75.256477,39.962175
25,21,60774,434,Walnut St & 52nd St,53,Westbound,-75.225394,39.956987


In [182]:
json_data = {"bus": realtime.to_dict(orient="records")}

In [183]:
import json

{'bus': [{'route_id': '33',
   'trip': 64882,
   'next_stop_id': '10255',
   'next_stop_name': 'Market St & 10th St',
   'next_stop_sequence': 47,
   'Direction': 'Southbound',
   'lng': -75.157213,
   'lat': 39.951512},
  {'route_id': '33',
   'trip': 64883,
   'next_stop_id': '31348',
   'next_stop_name': '19th St & JFK Blvd',
   'next_stop_sequence': 41,
   'Direction': 'Southbound',
   'lng': -75.17142,
   'lat': 39.954559},
  {'route_id': '33',
   'trip': 64900,
   'next_stop_id': '2769',
   'next_stop_name': '19th St & Oxford St',
   'next_stop_sequence': 38,
   'Direction': 'Northbound',
   'lng': -75.166113,
   'lat': 39.978199},
  {'route_id': '33',
   'trip': 64907,
   'next_stop_id': '3163',
   'next_stop_name': '22nd St & Somerset St',
   'next_stop_sequence': 53,
   'Direction': 'Northbound',
   'lng': -75.167178,
   'lat': 39.997113},
  {'route_id': '47',
   'trip': 69540,
   'next_stop_id': '724',
   'next_stop_name': '6th St & Erie Av',
   'next_stop_sequence': 27,
   '