In [16]:
import pandas as pd # data wrangler library, dataframes are used to display and manipulate data
import seaborn as sns # data graphing library, built on top of matplotlib
import matplotlib.pyplot as plt # graphing library, used for titles and customization
import urllib.parse # library to parse URLs for querying
import folium # library to create interactive maps
import folium.plugins as plugins # plugins for folium, used for clustering map points
import geopandas as gpd # library to handle geospatial data

In [17]:
def encode_soql_query(query:str) -> str:
    # just in case there are any new lines in the query, replaces them with spaces
    while '\n' in query:
        query = query.replace('\n', ' ')
    
    # encodes the query to be URL friendly
    parsed_query = urllib.parse.quote(query)
    
    # return query with prefix needed for soql queries
    return '?$query=' + parsed_query

In [18]:
ace_violations_api = "https://data.ny.gov/resource/kh8p-hcbm.csv"

In [19]:
latest_violations_query = """
SELECT  * 
WHERE violation_status = 'VIOLATION ISSUED'
order by last_occurrence DESC
LIMIT 500
"""

encoded_query = encode_soql_query(latest_violations_query)
api_query = ace_violations_api + encoded_query

In [20]:
violations_df = pd.read_csv(api_query)
violations_df.head()

Unnamed: 0,violation_id,vehicle_id,first_occurrence,last_occurrence,violation_status,violation_type,bus_route_id,violation_latitude,violation_longitude,stop_id,stop_name,bus_stop_latitude,bus_stop_longitude,violation_georeference,bus_stop_georeference
0,488479043,94d2d34b5fd7eff7ff5bebb262409c0630419eee5490e8...,2025-08-13T08:02:49.000,2025-08-13T09:36:21.000,VIOLATION ISSUED,MOBILE BUS LANE,M15+,40.754109,-73.965926,401701,1 AV/MITCHELL PL,40.753591,-73.966311,POINT (-73.965926 40.754109),POINT (-73.966311 40.753591)
1,488478069,50b892e1365b3d9819eb9a47632568d95414abfabbe745...,2025-08-13T09:17:08.000,2025-08-13T09:31:48.000,VIOLATION ISSUED,MOBILE BUS LANE,M15+,40.769208,-73.958054,401756,2 AV/E 78 ST,40.772922,-73.955434,POINT (-73.958054 40.769208),POINT (-73.955434 40.772922)
2,488477643,c83fa1cbb96db6ed7ca17e96297213fac1601fa4a72b98...,2025-08-13T09:20:43.000,2025-08-13T09:30:03.000,VIOLATION ISSUED,MOBILE BUS LANE,M101,40.769135,-73.961101,402688,3 AV/E 67 ST,40.767113,-73.962513,POINT (-73.961101 40.769135),POINT (-73.962513 40.767113)
3,488477681,549f0fdcd4bf9d233edc9420cbaa69aac740c1dad6d64d...,2025-08-13T09:19:43.000,2025-08-13T09:29:39.000,VIOLATION ISSUED,MOBILE BUS LANE,B82+,40.609278,-73.957117,300470,AVENUE P/OCEAN AV,40.610932,-73.954068,POINT (-73.957117 40.609278),POINT (-73.954068 40.610932)
4,488477700,bec69fb8644705a826a91d8052f6646661d224724aa241...,2025-08-13T09:22:05.000,2025-08-13T09:28:11.000,VIOLATION ISSUED,MOBILE BUS LANE,B25,40.68808,-73.978874,302367,FULTON ST/ASHLAND PL,40.68785,-73.97845,POINT (-73.978874 40.68808),POINT (-73.97845 40.68785)


In [21]:
geometry = gpd.points_from_xy(violations_df.bus_stop_longitude, violations_df.bus_stop_latitude)
geo_df = gpd.GeoDataFrame(
    violations_df, geometry=geometry
)
geo_df.head()

Unnamed: 0,violation_id,vehicle_id,first_occurrence,last_occurrence,violation_status,violation_type,bus_route_id,violation_latitude,violation_longitude,stop_id,stop_name,bus_stop_latitude,bus_stop_longitude,violation_georeference,bus_stop_georeference,geometry
0,488479043,94d2d34b5fd7eff7ff5bebb262409c0630419eee5490e8...,2025-08-13T08:02:49.000,2025-08-13T09:36:21.000,VIOLATION ISSUED,MOBILE BUS LANE,M15+,40.754109,-73.965926,401701,1 AV/MITCHELL PL,40.753591,-73.966311,POINT (-73.965926 40.754109),POINT (-73.966311 40.753591),POINT (-73.96631 40.75359)
1,488478069,50b892e1365b3d9819eb9a47632568d95414abfabbe745...,2025-08-13T09:17:08.000,2025-08-13T09:31:48.000,VIOLATION ISSUED,MOBILE BUS LANE,M15+,40.769208,-73.958054,401756,2 AV/E 78 ST,40.772922,-73.955434,POINT (-73.958054 40.769208),POINT (-73.955434 40.772922),POINT (-73.95543 40.77292)
2,488477643,c83fa1cbb96db6ed7ca17e96297213fac1601fa4a72b98...,2025-08-13T09:20:43.000,2025-08-13T09:30:03.000,VIOLATION ISSUED,MOBILE BUS LANE,M101,40.769135,-73.961101,402688,3 AV/E 67 ST,40.767113,-73.962513,POINT (-73.961101 40.769135),POINT (-73.962513 40.767113),POINT (-73.96251 40.76711)
3,488477681,549f0fdcd4bf9d233edc9420cbaa69aac740c1dad6d64d...,2025-08-13T09:19:43.000,2025-08-13T09:29:39.000,VIOLATION ISSUED,MOBILE BUS LANE,B82+,40.609278,-73.957117,300470,AVENUE P/OCEAN AV,40.610932,-73.954068,POINT (-73.957117 40.609278),POINT (-73.954068 40.610932),POINT (-73.95407 40.61093)
4,488477700,bec69fb8644705a826a91d8052f6646661d224724aa241...,2025-08-13T09:22:05.000,2025-08-13T09:28:11.000,VIOLATION ISSUED,MOBILE BUS LANE,B25,40.68808,-73.978874,302367,FULTON ST/ASHLAND PL,40.68785,-73.97845,POINT (-73.978874 40.68808),POINT (-73.97845 40.68785),POINT (-73.97845 40.68785)


In [22]:
map = folium.Map(location=[40.730610, -73.935242], tiles="CartoDB Positron", zoom_start=11)

In [23]:
map


In [24]:
# unique colors
unique_stops = geo_df.bus_route_id.nunique()
color_palette = sns.color_palette("hls", unique_stops).as_hex()
unique_values = geo_df.bus_route_id.unique()
color_map = dict(zip(unique_values, color_palette))

In [25]:
geo_df_list = [[point.xy[1][0], point.xy[0][0]] for point in geo_df.geometry]

# Iterate through list and add a marker for each violation
i = 0
for coordinates in geo_df_list:
    # Place the markers with the popup labels and data
    map.add_child(
        folium.Marker(
            location=coordinates,
            popup=f"""
            Route: {geo_df.bus_route_id[i]} <br>
            Stop Name: {geo_df.stop_name[i]} <br>
            Violation Type: {geo_df.violation_type[i]} <br>
            """,
            icon=folium.Icon(color='white', icon_color = color_map[geo_df.bus_route_id[i]], icon="info-sign"),
        )
    )
    i = i + 1

In [26]:
map

In [27]:
from shapely.geometry import LineString
from pathlib import Path

In [28]:
FOLDER = Path("../data/gtfs_subway")
print(f"Loading GTFS data from: {FOLDER.resolve()}")

shapes_path = FOLDER / "shapes.txt"
stops_path = FOLDER / "stops.txt"
routes_path = FOLDER / "routes.txt"
trips_path = FOLDER / "trips.txt"

shapes = pd.read_csv(shapes_path, dtype=str, low_memory=False)
stops = pd.read_csv(stops_path, dtype=str, low_memory=False)
routes = pd.read_csv(routes_path, dtype=str, low_memory=False)
trips = pd.read_csv(trips_path, dtype=str, low_memory=False)
print("Successfully loaded GTFS text files.")

feed_name = "subway"
for df in [shapes, stops, routes, trips]:
    df["feed_name"] = feed_name

for col in ["shape_pt_lat", "shape_pt_lon"]:
    shapes[col] = shapes[col].astype(float)
shapes["shape_pt_sequence"] = shapes["shape_pt_sequence"].astype(int)
stops["stop_lat"] = stops["stop_lat"].astype(float)
stops["stop_lon"] = stops["stop_lon"].astype(float)

shapes["shape_uid"] = shapes["feed_name"] + "_" + shapes["shape_id"]
trips["shape_uid"] = trips["feed_name"] + "_" + trips["shape_id"]

shape_to_route = trips.merge(
    routes, on=["route_id", "feed_name"]
).drop_duplicates("shape_uid")

lines = (
    shapes.sort_values(["shape_uid", "shape_pt_sequence"])
    .groupby("shape_uid")[["shape_pt_lon", "shape_pt_lat"]]
    .apply(lambda df: LineString(df.to_numpy()))
    .to_frame("geometry")
    .reset_index()
)

routes_gdf = gpd.GeoDataFrame(lines, geometry="geometry", crs="EPSG:4326").merge(
    shape_to_route, on="shape_uid", how="left"
)

stops_gdf = gpd.GeoDataFrame(
    stops,
    geometry=gpd.points_from_xy(stops["stop_lon"], stops["stop_lat"]),
    crs="EPSG:4326"
)

folium.map.CustomPane("routes", z_index=400).add_to(map)
folium.map.CustomPane("stops", z_index=650).add_to(map)

folium.GeoJson(
    routes_gdf,
    name="Subway Routes",
    style_function=lambda feature: {
        'color': f"#{feature['properties'].get('route_color', '000000')}",
        'weight': 3,
        'opacity': 0.7,
    },
    tooltip=folium.GeoJsonTooltip(fields=["route_short_name", "route_long_name"]),
    pane="routes"
).add_to(map)

folium.LayerControl(collapsed=False).add_to(map)

Loading GTFS data from: C:\Users\drodr\coding\MTA-MHC-Datatahon\data\gtfs_subway
Successfully loaded GTFS text files.


<folium.map.LayerControl at 0x1b4571b8190>

In [29]:
map

In [30]:
map.save('../data/cleaned/latest_violations_map.html')