In [595]:
%pip install geopandas

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [596]:
import os, glob, io, zipfile, webbrowser
import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString, Point
from pathlib import Path
import folium
from folium.plugins import MarkerCluster

In [597]:
FOLDER = Path("./bus_gtfs")  # or change to another working path
print("FOLDER exists?", FOLDER.exists())

FOLDER exists? True


In [598]:
### Verify the paths found in FOLDER
zip_paths = sorted(FOLDER.glob("gtfs_*.zip"))
print("Found:", [p.name for p in zip_paths])
assert zip_paths, f"No GTFS zips found in {FOLDER}/gtfs_*.zip"


Found: ['gtfs_b.zip', 'gtfs_busco.zip', 'gtfs_bx.zip', 'gtfs_m.zip', 'gtfs_q.zip', 'gtfs_si.zip']


In [599]:
# Set the pattern of the zipped filenames
ZIP_PATTERN = "gtfs_*.zip"
REQUIRED_FILES = ["shapes.txt", "stops.txt", "routes.txt", "trips.txt"]
buckets = {k: [] for k in REQUIRED_FILES}

zips = sorted(glob.glob(os.path.join(FOLDER, ZIP_PATTERN)))
assert zips, f"No GTFS zips found in {FOLDER}/{ZIP_PATTERN}"

for zp in zips:
    feed_name = os.path.splitext(os.path.basename(zp))[0]  # e.g., 'gtfs_m'
    with zipfile.ZipFile(zp) as z:
        names = set(z.namelist())
        for fn in REQUIRED_FILES:
            if fn in names:
                df = pd.read_csv(z.open(fn), dtype=str, low_memory=False)
                df["borough_feed"] = feed_name
                buckets[fn].append(df)
            else:
                print(f"[WARN] {fn} missing in {feed_name}")


In [600]:
# concat and normalize dtypes
shapes = pd.concat(buckets["shapes.txt"], ignore_index=True)
stops  = pd.concat(buckets["stops.txt"],  ignore_index=True)
routes = pd.concat(buckets["routes.txt"], ignore_index=True)
trips  = pd.concat(buckets["trips.txt"],  ignore_index=True)


In [601]:
# cast numeric columns
for col in ["shape_pt_lat", "shape_pt_lon"]:
    shapes[col] = shapes[col].astype(float)
shapes["shape_pt_sequence"] = shapes["shape_pt_sequence"].astype(int)

stops["stop_lat"] = stops["stop_lat"].astype(float)
stops["stop_lon"] = stops["stop_lon"].astype(float)


In [602]:
# make a collision-proof shape key (shape_id can repeat across feeds)
shapes["shape_uid"] = shapes["borough_feed"] + "_" + shapes["shape_id"]

In [603]:
# Mapping for shapes and route labels (short/long name)
# Merge trips to routes
shape2route = (
    trips[["route_id", "shape_id", "borough_feed"]].dropna()
    .drop_duplicates(["shape_id", "borough_feed"])
    .merge(
        routes[["route_id", "route_short_name", "route_long_name", "route_color", "borough_feed"]],
        on=["route_id", "borough_feed"], how="left"
    )
)
shape2route["shape_uid"] = shape2route["borough_feed"] + "_" + shape2route["shape_id"]


In [604]:
# build LineStrings per shapes (shape_uid)
shapes_sorted = shapes.sort_values(["shape_uid", "shape_pt_sequence"])
lines = (
    shapes_sorted
      .groupby("shape_uid")[["shape_pt_lon", "shape_pt_lat"]]
      .apply(lambda df: LineString(df.to_numpy()))
      .to_frame("geometry")
      .reset_index()
)



In [605]:
# Merge shapes with routes geodataframe 
routes_gdf = gpd.GeoDataFrame(lines, geometry="geometry", crs="EPSG:4326")
routes_gdf = (
    routes_gdf
    .merge(
        shape2route[["shape_uid", "route_id", "route_short_name", "route_long_name", "route_color", "borough_feed"]],
        on="shape_uid", how="left"
    )
)


In [606]:
# filter for few specific routes if needed (specially If the map feels slow)
CUNY_buses = ["M15", "M98", "M101", "M103", "BX10", "BX28", "BX22", "BX25", "B11", "B41", "B49", "B103", "S93", "S61", "S94", "S59", "Q25","Q34","Q17","Q44+", "SIM7", "M20", "M9", "M22", "B1", "B49"]
routes_gdf = routes_gdf[routes_gdf["route_id"].isin(CUNY_buses)]

# Get stops GeoDataFrame (keep borough_feed to avoid ID ambiguity)
stops_gdf = gpd.GeoDataFrame(
    stops[["stop_id", "stop_name", "stop_lat", "stop_lon", "borough_feed"]],
    geometry=gpd.points_from_xy(stops["stop_lon"], stops["stop_lat"]),
    crs="EPSG:4326"
)


In [607]:
# Create base folium map

m = folium.Map(tiles="cartodbpositron", zoom_start=11, prefer_canvas=True)

folium.TileLayer("cartodbpositron", overlay=True).add_to(m)




<folium.raster_layers.TileLayer at 0x1aa1a12b890>

In [608]:
# Fit to route bounds
minx, miny, maxx, maxy = routes_gdf.total_bounds
m.fit_bounds([[miny, minx], [maxy, maxx]])



In [609]:

# Create explicit panes so stops are ABOVE routes
folium.map.CustomPane("routes", z_index=400).add_to(m)
# folium.map.CustomPane("stops",  z_index=650).add_to(m)


<folium.map.CustomPane at 0x1aa0ce95950>

In [610]:
# draw each shape (LineString) as a polyline
def line_to_latlon_coords(geom):
    # geom is a shapely LineString or MultiLineString
    if geom.geom_type == "LineString":
        return [(lat, lon) for lon, lat in geom.coords]
    elif geom.geom_type == "MultiLineString":
        coords = []
        for part in geom.geoms:
            coords.extend([(lat, lon) for lon, lat in part.coords])
        return coords
    else:
        return []


In [611]:
# color by route (simple cycle)
# Challenge: Use route_color from routes_gdf
palette = [
    "red","blue","green","purple","orange","darkred","lightred","mediumgreen",
    "darkblue","darkgreen","cadetblue","darkpurple","brown","pink","lightblue",
    "lightgreen","gray","navy","lightgray", "maroon", "mediumyellow"
]
color_map = {
  "M15": "purple",
  "M98": "purple",
  "M101": "purple",
  "M103": "purple",
  "BX10": "gold",
  "BX28": "gold",
  "BX22": "gold",
  "BX25": "gold",
  "B11": "maroon",
  "B41": "maroon",
  "B49": "maroon",
  "B103": "maroon",
  "S93": "lightblue",
  "S61": "lightblue",
  "S94": "lightblue",
  "S59": "lightblue",
  "Q25": "red",
  "Q34": "red",
  "Q17": "red",
  "Q44+": "red",
  "SIM7": "orange",
  "M20": "orange",
  "M9": "orange",
  "M22": "orange",
  "B1": "blue"
}


In [612]:
# list of ace routes until i get wifi lol
ace_routes = set(["BX28", "B41",  "M101", "M15"])

nrml_route = folium.FeatureGroup(name="Normal Routes")
ace_route = folium.FeatureGroup(name="ACE Routes")

In [613]:
# Tooltip fields if present
tooltip_fields = [f for f in ["route_id","route_long_name"] if f in routes_gdf.columns]

for i, row in routes_gdf.iterrows():
    route = row.get("route_id") or row.get("route_short_name") or "route"
    
    coords = line_to_latlon_coords(row.geometry)
    if coords and route in ace_routes:
        folium.PolyLine(
            locations=coords,
            color=color_map[route],
            weight=2,
            opacity=.5,
            tooltip=f"ACE Route ID: {route}",
        ).add_to(ace_route)
    elif coords:
        folium.PolyLine(
            locations=coords,
            color=color_map[route],
            weight=2,
            opacity=0.9,
            tooltip=f"Route ID: {route}",
        ).add_to(nrml_route)

ace_route.add_to(m)
nrml_route.add_to(m)

<folium.map.FeatureGroup at 0x1aa1a12bdd0>

In [614]:
# Hunter
folium.Marker(
    location=[40.7678, -73.9645],
   #  tooltip="Click me!",
    popup="Hunter College",
    icon=folium.Icon(color="purple"),
).add_to(m)

# Lehman
folium.Marker(
    location=[40.8729, -73.8945],
   #  tooltip="Click me!",
    popup="Lehman College",
    icon=folium.Icon(color="yellow"),
).add_to(m)

# BMCC
folium.Marker(
    location=[40.7179, -74.0120],
   #  tooltip="Click me!",
    popup="Borough of Manhattan Community College",
    icon=folium.Icon(color="orange"),
).add_to(m)

#Brooklyn College
folium.Marker(
    location=[40.6309, -73.9515],
   #  tooltip="Click me!",
    popup="Brooklyn College",
    icon=folium.Icon(color="maroon"),
).add_to(m)

# Kingsborough Community College
folium.Marker(
    location=[40.5787, -73.9351],
   #  tooltip="Click me!",
    popup="Kingsborough Community College",
    icon=folium.Icon(color="blue"),
).add_to(m)

# Queens College
folium.Marker(
    location=[40.7367, -73.8203],
   #  tooltip="Click me!",
    popup="Queens College",
    icon=folium.Icon(color="red"),
).add_to(m)

# College of Staten Island
folium.Marker(
    location=[40.6022, -74.1504],
   #  tooltip="Click me!",
    popup="College of Staten Island",
    icon=folium.Icon(color="lightblue"),
).add_to(m)

  icon=folium.Icon(color="yellow"),
  icon=folium.Icon(color="maroon"),


<folium.map.Marker at 0x1aa0ce95f90>

In [None]:
ace_violation_ds = "MTA_Bus_Automated_Camera_Enforcement_Violations__Beginning_October_2019_20250918.csv"

#georeference?

# GET A SMALLER CSV FILE DAMN!!!!!
violations = pd.read_csv(ace_violation_ds, dtype={'Vehicle ID': str, 'First Occurrence':str, "Last Occurrence": str, "Violation Status": str,           
"Violation Type":str, 'Bus Route ID': str, 'Stop Name': str})


# B11 = violations[violations['Bus Route ID'] == 'B11']

violations.dropna(subset=['Bus Route ID'], inplace=True)
violations = violations[violations['Bus Route ID'].isin(CUNY_buses)]

Unnamed: 0,Violation ID,Vehicle ID,First Occurrence,Last Occurrence,Violation Status,Violation Type,Bus Route ID,Violation Latitude,Violation Longitude,Stop ID,Stop Name,Bus Stop Latitude,Bus Stop Longitude,Violation Georeference,Bus Stop Georeference
3502547,320280727,d0ea36bdbe16a5f585ca7c429f52e5ed25a243d9e2dd76...,05/28/2022 02:05:24 PM,05/28/2022 02:12:05 PM,VIOLATION ISSUED,MOBILE BUS LANE,,40.720833,-73.989316,405320,ALLEN ST/GRAND ST,40.717171,-73.991222,POINT (-73.989316 40.7208326666667),POINT (-73.991222 40.717171)
3502548,320280726,4bb426b65c6cc2cd84b8341a3c1110d01fb7485d90c4fe...,05/28/2022 02:05:19 PM,05/28/2022 02:12:02 PM,VIOLATION ISSUED,MOBILE BUS LANE,,40.720637,-73.989413,405320,ALLEN ST/GRAND ST,40.717171,-73.991222,POINT (-73.9894125 40.7206366666667),POINT (-73.991222 40.717171)
3502549,320280722,d124ee51ac69c43e8c4ffaa6b2b5509f8ae384b9749c8f...,05/28/2022 04:44:02 PM,05/28/2022 05:08:02 PM,EXEMPT - EMERGENCY VEHICLE,MOBILE BUS LANE,,40.730797,-73.982646,903093,1 AV/E 1 ST,40.724012,-73.987802,POINT (-73.9826458333333 40.7307973333333),POINT (-73.987802 40.724012)
3502550,320280715,1a6645f96834b5c90554123335c8c1799c5a18180e2f64...,05/28/2022 07:35:14 AM,05/28/2022 07:52:34 AM,VIOLATION ISSUED,MOBILE BUS LANE,,40.635362,-73.947982,303477,NOSTRAND AV/NEWKIRK AV,40.640085,-73.948517,POINT (-73.9479825 40.6353616666667),POINT (-73.948517 40.640085)
3502551,320280714,b66fa433bb53693528021bc441267c07e45ca482979f49...,05/28/2022 07:35:21 AM,05/28/2022 07:52:40 AM,VIOLATION ISSUED,MOBILE BUS LANE,,40.634748,-73.947907,303477,NOSTRAND AV/NEWKIRK AV,40.640085,-73.948517,POINT (-73.9479075 40.6347476666667),POINT (-73.948517 40.640085)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3769215,219521764,85bafa84779ae3e45b751483eb1479ad732d9c41c30a9f...,11/14/2019 08:57:21 AM,11/14/2019 10:47:21 AM,TECHNICAL ISSUE/OTHER,MOBILE BUS LANE,,40.677394,-73.952795,901280,ROGERS AV/SAINT JOHNS PLACE,40.671592,-73.952889,POINT (-73.95279467 40.67739367),POINT (-73.952889 40.671592)
3769216,219521754,f827fd57d750b9adb11c22905b21cbabe0196c877fdd6a...,11/14/2019 10:47:20 AM,11/14/2019 10:57:59 AM,TECHNICAL ISSUE/OTHER,MOBILE BUS LANE,,40.677238,-73.952687,901280,ROGERS AV/SAINT JOHNS PLACE,40.671592,-73.952889,POINT (-73.95268667 40.67723833),POINT (-73.952889 40.671592)
3769217,219521737,05d72d9df3226a0c434811ce4e75417d967cd28d8ff111...,11/14/2019 10:08:36 AM,11/14/2019 10:47:19 AM,TECHNICAL ISSUE/OTHER,MOBILE BUS LANE,,40.677201,-73.952760,901280,ROGERS AV/SAINT JOHNS PLACE,40.671592,-73.952889,POINT (-73.95275967 40.67720133),POINT (-73.952889 40.671592)
3771007,218854922,b56b50d3e645d802969e020a2c5a3955aed62e58083b2f...,11/08/2019 03:12:14 PM,11/08/2019 03:43:11 PM,TECHNICAL ISSUE/OTHER,MOBILE BUS LANE,,40.677344,-73.952704,901280,ROGERS AV/SAINT JOHNS PLACE,40.671592,-73.952889,POINT (-73.95270383 40.67734433),POINT (-73.952889 40.671592)


In [None]:
B11

In [None]:
violations['First Occurrence'] = pd.to_datetime(violations['First Occurrence'], format='%m/%d/%Y %I:%M:%S %p')
violations['Last Occurrence'] = pd.to_datetime(violations['Last Occurrence'], format='%m/%d/%Y %I:%M:%S %p')

In [None]:
start_date = "2025-08-06"
end_date = "2025-08-20"

violations = violations[(violations['First Occurrence'] >= start_date) & (violations['First Occurrence'] <= end_date)]


In [None]:
# get different violation types
# make a heatmap

violations.head(9)

In [None]:
violation_type = violations["Violation Type"].unique()

print(violation_type)

In [None]:
violation_status = violations["Violation Status"].unique()

print(violation_status)

In [None]:
# violations.apply(lambda row:folium.CircleMarker(location=[row["Violation Latitude"], row["Violation Longitude"]], 
#                                                radius=10, popup=f"{row['Violation Type']} Status: {row["Violation Status"]}").add_to(m), axis=1)

In [None]:
rows, columns = violations.shape
print(f"Rows: {rows}, Columns: {columns}")

In [None]:
# ['MOBILE BUS STOP' 'MOBILE BUS LANE' 'MOBILE DOUBLE PARKED']
bus_stop = violations[violations["Violation Type"] == 'MOBILE BUS STOP']
bus_lane = violations[violations["Violation Type"] == 'MOBILE BUS LANE']
double_parked = violations[violations["Violation Type"] == 'MOBILE DOUBLE PARKED'] 

In [None]:
from folium.plugins import HeatMap

heat_data = [[row['Violation Latitude'], row['Violation Longitude']] for index, row in bus_stop.iterrows()]

HeatMap(heat_data, radius=15, blur=15, max_val=10).add_to(folium.FeatureGroup(name='Bus Stop Violations', show=False).add_to(m))

In [None]:
heat_data = [[row['Violation Latitude'], row['Violation Longitude']] for index, row in bus_lane.iterrows()]

HeatMap(heat_data, radius=15, blur=15, max_val=10).add_to(folium.FeatureGroup(name='Bus Lane Violation', show=False).add_to(m))

In [None]:
heat_data = [[row['Violation Latitude'], row['Violation Longitude']] for index, row in double_parked.iterrows()]

HeatMap(heat_data, radius=15, blur=15, max_val=10).add_to(folium.FeatureGroup(name='Double Parked Violation', show=False).add_to(m))

In [None]:
folium.LayerControl().add_to(m)

In [None]:
m