In [858]:
%pip install geopandas

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [859]:
import os, glob, io, zipfile, webbrowser
import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString, Point
from pathlib import Path
import folium
from folium.plugins import MarkerCluster

In [860]:
ace_violation_ds = "MTA_Bus_Automated_Camera_Enforcement_Violations__Beginning_October_2019_20250918.csv"

violations = pd.read_csv(ace_violation_ds, dtype={'Vehicle ID': str, 'First Occurrence':str, "Last Occurrence": str, "Violation Status": str,           
"Violation Type":str, 'Bus Route ID': str, 'Stop Name': str})

print(violations['Violation Status'].value_counts())

Violation Status
VIOLATION ISSUED                2312878
TECHNICAL ISSUE/OTHER            320912
EXEMPT - EMERGENCY VEHICLE       286253
DRIVER/VEHICLE INFO MISSING      273968
EXEMPT - COMMERCIAL UNDER 20     257374
EXEMPT - BUS/PARATRANSIT         190192
EXEMPT - OTHER                   136991
Name: count, dtype: int64


In [None]:
print(len(violations))

In [None]:
FOLDER = Path("./bus_gtfs")  # or change to another working path
print("FOLDER exists?", FOLDER.exists())

In [None]:
### Verify the paths found in FOLDER
zip_paths = sorted(FOLDER.glob("gtfs_*.zip"))
print("Found:", [p.name for p in zip_paths])
assert zip_paths, f"No GTFS zips found in {FOLDER}/gtfs_*.zip"


In [None]:
# Set the pattern of the zipped filenames
ZIP_PATTERN = "gtfs_*.zip"
REQUIRED_FILES = ["shapes.txt", "stops.txt", "routes.txt", "trips.txt"]
buckets = {k: [] for k in REQUIRED_FILES}

zips = sorted(glob.glob(os.path.join(FOLDER, ZIP_PATTERN)))
assert zips, f"No GTFS zips found in {FOLDER}/{ZIP_PATTERN}"

for zp in zips:
    feed_name = os.path.splitext(os.path.basename(zp))[0]  # e.g., 'gtfs_m'
    with zipfile.ZipFile(zp) as z:
        names = set(z.namelist())
        for fn in REQUIRED_FILES:
            if fn in names:
                df = pd.read_csv(z.open(fn), dtype=str, low_memory=False)
                df["borough_feed"] = feed_name
                buckets[fn].append(df)
            else:
                print(f"[WARN] {fn} missing in {feed_name}")


In [None]:
# concat and normalize dtypes
shapes = pd.concat(buckets["shapes.txt"], ignore_index=True)
stops  = pd.concat(buckets["stops.txt"],  ignore_index=True)
routes = pd.concat(buckets["routes.txt"], ignore_index=True)
trips  = pd.concat(buckets["trips.txt"],  ignore_index=True)


In [None]:
# cast numeric columns
for col in ["shape_pt_lat", "shape_pt_lon"]:
    shapes[col] = shapes[col].astype(float)
shapes["shape_pt_sequence"] = shapes["shape_pt_sequence"].astype(int)

stops["stop_lat"] = stops["stop_lat"].astype(float)
stops["stop_lon"] = stops["stop_lon"].astype(float)


In [None]:
# make a collision-proof shape key (shape_id can repeat across feeds)
shapes["shape_uid"] = shapes["borough_feed"] + "_" + shapes["shape_id"]

In [None]:
# Mapping for shapes and route labels (short/long name)
# Merge trips to routes
shape2route = (
    trips[["route_id", "shape_id", "borough_feed"]].dropna()
    .drop_duplicates(["shape_id", "borough_feed"])
    .merge(
        routes[["route_id", "route_short_name", "route_long_name", "route_color", "borough_feed"]],
        on=["route_id", "borough_feed"], how="left"
    )
)
shape2route["shape_uid"] = shape2route["borough_feed"] + "_" + shape2route["shape_id"]


In [None]:
# build LineStrings per shapes (shape_uid)
shapes_sorted = shapes.sort_values(["shape_uid", "shape_pt_sequence"])
lines = (
    shapes_sorted
      .groupby("shape_uid")[["shape_pt_lon", "shape_pt_lat"]]
      .apply(lambda df: LineString(df.to_numpy()))
      .to_frame("geometry")
      .reset_index()
)



In [None]:
# Merge shapes with routes geodataframe 
routes_gdf = gpd.GeoDataFrame(lines, geometry="geometry", crs="EPSG:4326")
routes_gdf = (
    routes_gdf
    .merge(
        shape2route[["shape_uid", "route_id", "route_short_name", "route_long_name", "route_color", "borough_feed"]],
        on="shape_uid", how="left"
    )
)


In [None]:
# filter for few specific routes if needed (specially If the map feels slow)
CUNY_buses = ["M15", "M98", "M101", "M103", "BX10", "BX28", "BX22", "BX25", "B11", "B41", "B49", "B103", "S93", "S61", "S94", "S59", "Q25","Q34","Q17","Q44+", "SIM7", "M20", "M9", "M22", "B1", "B49"]
routes_gdf = routes_gdf[routes_gdf["route_id"].isin(CUNY_buses)]

# Get stops GeoDataFrame (keep borough_feed to avoid ID ambiguity)
stops_gdf = gpd.GeoDataFrame(
    stops[["stop_id", "stop_name", "stop_lat", "stop_lon", "borough_feed"]],
    geometry=gpd.points_from_xy(stops["stop_lon"], stops["stop_lat"]),
    crs="EPSG:4326"
)


In [None]:
# Create base folium map

m = folium.Map(tiles="cartodbpositron", zoom_start=11, prefer_canvas=True)

folium.TileLayer("cartodbpositron", overlay=True).add_to(m)




In [None]:
# Fit to route bounds
minx, miny, maxx, maxy = routes_gdf.total_bounds
m.fit_bounds([[miny, minx], [maxy, maxx]])



In [None]:

# Create explicit panes so stops are ABOVE routes
folium.map.CustomPane("routes", z_index=400).add_to(m)
# folium.map.CustomPane("stops",  z_index=650).add_to(m)


In [None]:
# draw each shape (LineString) as a polyline
def line_to_latlon_coords(geom):
    # geom is a shapely LineString or MultiLineString
    if geom.geom_type == "LineString":
        return [(lat, lon) for lon, lat in geom.coords]
    elif geom.geom_type == "MultiLineString":
        coords = []
        for part in geom.geoms:
            coords.extend([(lat, lon) for lon, lat in part.coords])
        return coords
    else:
        return []


In [None]:
# color by route (simple cycle)
# Challenge: Use route_color from routes_gdf
palette = [
    "red","blue","green","purple","orange","darkred","lightred","mediumgreen",
    "darkblue","darkgreen","cadetblue","darkpurple","brown","pink","lightblue",
    "lightgreen","gray","navy","lightgray", "maroon", "mediumyellow"
]
color_map = {
  "M15": "purple",
  "M98": "purple",
  "M101": "purple",
  "M103": "purple",
  "BX10": "gold",
  "BX28": "gold",
  "BX22": "gold",
  "BX25": "gold",
  "B11": "maroon",
  "B41": "maroon",
  "B49": "maroon",
  "B103": "maroon",
  "S93": "lightblue",
  "S61": "lightblue",
  "S94": "lightblue",
  "S59": "lightblue",
  "Q25": "red",
  "Q34": "red",
  "Q17": "red",
  "Q44+": "red",
  "SIM7": "orange",
  "M20": "orange",
  "M9": "orange",
  "M22": "orange",
  "B1": "blue"
}


In [None]:
# list of ace routes until i get wifi lol
ace_routes = pd.read_csv('MTA_Bus_Automated_Camera_Enforced_Routes__Beginning_October_2019_20250921.csv', dtype={"Route":str, "Program":str})
ace_routes = ace_routes[ace_routes['Program'] == 'ACE']
ace_routes = ace_routes['Route'].unique()

nrml_route = folium.FeatureGroup(name="Normal Routes")
ace_route = folium.FeatureGroup(name="ACE Routes")

In [None]:
# Tooltip fields if present
tooltip_fields = [f for f in ["route_id","route_long_name"] if f in routes_gdf.columns]

for i, row in routes_gdf.iterrows():
    route = row.get("route_id") or row.get("route_short_name") or "route"
    
    coords = line_to_latlon_coords(row.geometry)
    if coords and route in ace_routes:
        folium.PolyLine(
            locations=coords,
            color=color_map[route],
            weight=2,
            opacity=.5,
            tooltip=f"ACE Route ID: {route}",
        ).add_to(ace_route)
    elif coords:
        folium.PolyLine(
            locations=coords,
            color=color_map[route],
            weight=2,
            opacity=0.9,
            tooltip=f"Route ID: {route}",
        ).add_to(nrml_route)

ace_route.add_to(m)
nrml_route.add_to(m)

In [None]:
# Hunter
folium.Marker(
    location=[40.7678, -73.9645],
   #  tooltip="Click me!",
    popup="Hunter College",
    icon=folium.Icon(color="purple"),
).add_to(m)

# Lehman
folium.Marker(
    location=[40.8729, -73.8945],
   #  tooltip="Click me!",
    popup="Lehman College",
    icon=folium.Icon(color="yellow"),
).add_to(m)

# BMCC
folium.Marker(
    location=[40.7179, -74.0120],
   #  tooltip="Click me!",
    popup="Borough of Manhattan Community College",
    icon=folium.Icon(color="orange"),
).add_to(m)

#Brooklyn College
folium.Marker(
    location=[40.6309, -73.9515],
   #  tooltip="Click me!",
    popup="Brooklyn College",
    icon=folium.Icon(color="maroon"),
).add_to(m)

# Kingsborough Community College
folium.Marker(
    location=[40.5787, -73.9351],
   #  tooltip="Click me!",
    popup="Kingsborough Community College",
    icon=folium.Icon(color="blue"),
).add_to(m)

# Queens College
folium.Marker(
    location=[40.7367, -73.8203],
   #  tooltip="Click me!",
    popup="Queens College",
    icon=folium.Icon(color="red"),
).add_to(m)

# College of Staten Island
folium.Marker(
    location=[40.6022, -74.1504],
   #  tooltip="Click me!",
    popup="College of Staten Island",
    icon=folium.Icon(color="lightblue"),
).add_to(m)

In [None]:
ace_violation_ds = "MTA_Bus_Automated_Camera_Enforcement_Violations__Beginning_October_2019_20250918.csv"

#georeference?

# GET A SMALLER CSV FILE DAMN!!!!!
violations = pd.read_csv(ace_violation_ds, dtype={'Vehicle ID': str, 'First Occurrence':str, "Last Occurrence": str, "Violation Status": str,           
"Violation Type":str, 'Bus Route ID': str, 'Stop Name': str})


# B11 = violations[violations['Bus Route ID'] == 'B11']

violations.dropna(subset=['Bus Route ID'], inplace=True)
violations = violations[violations['Bus Route ID'].isin(CUNY_buses)]

In [None]:
violations['First Occurrence'] = pd.to_datetime(violations['First Occurrence'], format='%m/%d/%Y %I:%M:%S %p')
violations['Last Occurrence'] = pd.to_datetime(violations['Last Occurrence'], format='%m/%d/%Y %I:%M:%S %p')

In [None]:
# start_date = "2025-08-06"
# end_date = "2025-08-20"

# violations = violations[(violations['First Occurrence'] >= start_date) & (violations['First Occurrence'] <= end_date)]


In [None]:
# get different violation types
# make a heatmap

violations.head(9)

In [None]:
violation_type = violations["Violation Type"].unique()

print(violation_type)

In [None]:
violation_status = violations["Violation Status"].unique()

print(violation_status)

In [None]:
rows, columns = violations.shape
print(f"Rows: {rows}, Columns: {columns}")

In [None]:
# ['MOBILE BUS STOP' 'MOBILE BUS LANE' 'MOBILE DOUBLE PARKED']
emergency = violations[violations["Violation Status"] == 'EXEMPT - EMERGENCY VEHICLE']
bus_para = violations[violations["Violation Status"] == 'EXEMPT - BUS/PARATRANSIT']
commercial = violations[violations["Violation Status"] == 'EXEMPT - COMMERCIAL UNDER 20'] 
other = violations[violations["Violation Status"] == 'EXEMPT - OTHER'] 

emergency.head(9)


# double_parked = violations[violations["Violation Status"] == 'TECHNICAL ISSUE/OTHER']
# double_parked = violations[violations["Violation Status"] == 'DRIVER/VEHICLE INFO MISSING'] 
# double_parked = violations[violations["Violation Status"] == 'EXEMPT - COMMERCIAL UNDER 20']  
# double_parked = violations[violations["Violation Status"] == 'VIOLATION ISSUED']  

In [None]:
# Hunter
folium.Marker(
    location=[40.7678, -73.9645],
   #  tooltip="Click me!",
    popup="Hunter College",
    icon=folium.Icon(color="purple"),
).add_to(m)

# Lehman
folium.Marker(
    location=[40.8729, -73.8945],
   #  tooltip="Click me!",
    popup="Lehman College",
    icon=folium.Icon(color="yellow"),
).add_to(m)

# BMCC
folium.Marker(
    location=[40.7179, -74.0120],
   #  tooltip="Click me!",
    popup="Borough of Manhattan Community College",
    icon=folium.Icon(color="orange"),
).add_to(m)

#Brooklyn College
folium.Marker(
    location=[40.6309, -73.9515],
   #  tooltip="Click me!",
    popup="Brooklyn College",
    icon=folium.Icon(color="maroon"),
).add_to(m)

# Kingsborough Community College
folium.Marker(
    location=[40.5787, -73.9351],
   #  tooltip="Click me!",
    popup="Kingsborough Community College",
    icon=folium.Icon(color="blue"),
).add_to(m)

# Queens College
folium.Marker(
    location=[40.7367, -73.8203],
   #  tooltip="Click me!",
    popup="Queens College",
    icon=folium.Icon(color="red"),
).add_to(m)

# College of Staten Island
folium.Marker(
    location=[40.6022, -74.1504],
   #  tooltip="Click me!",
    popup="College of Staten Island",
    icon=folium.Icon(color="lightblue"),
).add_to(m)

In [None]:
from folium.plugins import HeatMap

heat_data = [[row['Violation Latitude'], row['Violation Longitude']] for index, row in emergency.iterrows()]

HeatMap(heat_data, radius=15, blur=15, max_val=10).add_to(folium.FeatureGroup(name='Emergency Exemption', show=False).add_to(m))

In [None]:
heat_data = [[row['Violation Latitude'], row['Violation Longitude']] for index, row in bus_para.iterrows()]

HeatMap(heat_data, radius=15, blur=15, max_val=10).add_to(folium.FeatureGroup(name='Bus/Para Exemption', show=False).add_to(m))

In [None]:
heat_data = [[row['Violation Latitude'], row['Violation Longitude']] for index, row in other.iterrows()]

HeatMap(heat_data, radius=15, blur=15, max_val=10).add_to(folium.FeatureGroup(name='Other Exemption', show=False).add_to(m))

In [None]:
heat_data = [[row['Violation Latitude'], row['Violation Longitude']] for index, row in commercial.iterrows()]

HeatMap(heat_data, radius=15, blur=15, max_val=10).add_to(folium.FeatureGroup(name='Commercial Exemption', show=False).add_to(m))

In [None]:
folium.LayerControl().add_to(m)

In [None]:
m

In [None]:
print(violations['Violation Status'].value_counts())