In [49]:
import pandas as pd 
import geopandas as gpd
from shapely.geometry import Point

In [57]:
# ACE Violations from 2023 Jan 01 12:00:00 AM to 2023 June 30 12:00:00 AM (6 Month Period)
df = pd.read_csv("Datasets/ACE_Violations_Jan-Aug2023.csv")

In [58]:
print(df.head(3))

   Violation ID                                         Vehicle ID  \
0     373687616  d1e3f8e8c8c1756ca7f9e2cf69d09ae20ddd07ff7fa934...   
1     373494041  a7491a0a20a5b042412f2e706c737ed14e9aab009696d6...   
2     373493941  ab6829022aff77c11808799667448ad3b0949d849f7079...   

         First Occurrence         Last Occurrence  Violation Status  \
0  06/16/2023 10:16:52 AM  06/16/2023 10:26:04 AM  VIOLATION ISSUED   
1  06/27/2023 10:10:32 AM  06/27/2023 10:24:01 AM  VIOLATION ISSUED   
2  06/27/2023 08:22:24 AM  06/27/2023 08:27:26 AM  VIOLATION ISSUED   

    Violation Type Bus Route ID  Violation Latitude  Violation Longitude  \
0  MOBILE BUS LANE         BX36           40.849195           -73.917176   
1  MOBILE BUS LANE         M14+           40.722795           -73.976605   
2  MOBILE BUS LANE         M14+           40.722012           -73.977398   

   Stop ID               Stop Name  Bus Stop Latitude  Bus Stop Longitude  \
0   100151  UNIVERSITY AV/W 176 ST          40.84921

In [59]:
# Convert violations DataFrame → GeoDataFrame
gdf = gpd.GeoDataFrame(
    df,
    geometry=gpd.points_from_xy(df["Violation Longitude"], df["Violation Latitude"]),
    crs="EPSG:4326"  # Tells geopandas how coordinates interperted on earth
)

In [60]:
# Loading in the boundaries for congestion zone in Central Business District
region = gpd.read_file("map.geojson")

In [61]:
# Getting the violations that are in congestion zone only
violations_in_region = gpd.sjoin(gdf, region, how="inner", predicate="within")

In [62]:
# Printing the basic information we can gather from this dataset
unique_buses = violations_in_region["Bus Route ID"].unique()
print("Total violations in dataset:", len(df))
print("Violations inside region:", len(violations_in_region))
print("Buses with violations in CBD:", unique_buses)
for bus in unique_buses:
    bus_violations = violations_in_region[violations_in_region["Bus Route ID"] == bus]
    print(f"Total violations on {bus} route: {len(bus_violations)}")

Total violations in dataset: 97003
Violations inside region: 7669
Buses with violations in CBD: ['M14+' 'M15+' 'M23+' 'M34+']
Total violations on M14+ route: 2529
Total violations on M15+ route: 4832
Total violations on M23+ route: 195
Total violations on M34+ route: 113


In [63]:
import folium

# Get the center of the region to focus the map
center_lat = violations_in_region["Violation Latitude"].mean()
center_lon = violations_in_region["Violation Longitude"].mean()

# Create a folium map
m = folium.Map(location=[center_lat, center_lon], zoom_start=14)

# Add polygon boundary (CBD)
folium.GeoJson("map.geojson", name="CBD").add_to(m)

# Add violations as points
for idx, row in violations_in_region.iterrows():
    folium.CircleMarker(
        location=[row["Violation Latitude"], row["Violation Longitude"]],
        radius=3,
        color='blue',
        fill=True,
        fill_opacity=0.5
    ).add_to(m)

# Display map
m