In [53]:
#Hello

In [58]:
#Import Libraries
import pandas as pd
import os

In [59]:
# Path to data folder relative to the notebook
DATA_DIR = os.path.join("..", "data", "raw")
ace_violations = pd.read_csv(os.path.join(DATA_DIR, "ACE_violations.csv"))
#ace_violations.head()

In [None]:
import json
import math

features = []

for _, row in ace_violations.iterrows():
    try:
        v_lat, v_lon = row["Violation Latitude"], row["Violation Longitude"]

        # Skip rows with bad coords
        if (
            pd.isna(v_lat) or pd.isna(v_lon) or
            not (math.isfinite(v_lat) and math.isfinite(v_lon))
        ):
            continue

        # Properties (convert everything to string to avoid JSON issues)
        props = {col: str(row[col]) for col in [
            "Violation ID",
            "Vehicle ID",
            "First Occurrence",
            "Last Occurrence",
            "Violation Status",
            "Violation Type",
            "Bus Route ID",
            "Stop ID",
            "Stop Name"
        ] if col in row}

        feature = {
            "type": "Feature",
            "properties": props,
            "geometry": {
                "type": "Point",
                "coordinates": [float(v_lon), float(v_lat)]
            }
        }
        features.append(feature)
    except Exception as e:
        print(f"Skipping row due to error: {e}")

geojson = {
    "type": "FeatureCollection",
    "features": features
}

with open("/Users/danielbrown/Desktop/datathon_project/data/processed/violations.geojson", "w", encoding="utf-8") as f:
    json.dump(geojson, f, indent=2, ensure_ascii=False)

print(f"✅ Saved {len(features)} point features to violations_points.geojson")



In [None]:
# Preview first 3 features
preview = {
    "type": "FeatureCollection",
    "features": features[:3]
}

#print(json.dumps(preview, indent=2, ensure_ascii=False))


In [None]:
#Bus Speeds

In [67]:
#data/raw/bus_speed_2020_2024.csv
#data/raw/bus_speed_2025.csv

In [71]:
# Path to data folder relative to the notebook
DATA_DIR = os.path.join("..", "data", "raw")

In [73]:
segment_speed_2020_2024 = pd.read_csv(os.path.join(DATA_DIR, "segment_speed_2023_2024.csv"))
segment_speed_2025 = pd.read_csv(os.path.join(DATA_DIR, "segment_speed_2025.csv"))
# Concatenate vertically (stack rows)
segment_speed_all = pd.concat([segment_speed_2020_2024, segment_speed_2025], ignore_index=True)

# Optional: check the combined shape
print(segment_speed_all.shape)
print(segment_speed_all.head())

(19480, 24)
   Year  Month               Timestamp Day of Week  Hour of Day Route ID  \
0  2023      3  03/01/2023 09:00:00 AM      Friday            9    BX12+   
1  2023      3  03/01/2023 05:00:00 PM      Friday           17    BX12+   
2  2023      3  03/01/2023 08:00:00 AM      Friday            8    BX12+   
3  2023      3  03/01/2023 06:00:00 PM      Friday           18    BX12+   
4  2023      3  03/01/2023 07:00:00 AM      Friday            7    BX12+   

  Direction Borough Route Type  Stop Order  ...  Next Timepoint Stop ID  \
0         W   Bronx        SBS           1  ...                  103255   
1         W   Bronx        SBS           1  ...                  103255   
2         W   Bronx        SBS           1  ...                  103255   
3         W   Bronx        SBS           1  ...                  103255   
4         W   Bronx        SBS           1  ...                  103255   

  Next Timepoint Stop Name  Next Timepoint Stop Latitude  \
0       EDSON AV/BAR

In [76]:
import pandas as pd

# Aggregate by Year, Hour of Day, Start Stop, End Stop
agg_df = segment_speed_all.groupby(
    ["Year", "Hour of Day", "Timepoint Stop Georeference", "Next Timepoint Stop Georeference"],
    as_index=False
).agg({
    "Road Distance": "sum",
    "Average Travel Time": "sum"
})

# Compute weighted average speed
agg_df["Average Speed"] = (agg_df["Road Distance"] / agg_df["Average Travel Time"])*60

# Optional: inspect
print(agg_df.head())


   Year  Hour of Day   Timepoint Stop Georeference  \
0  2023            5  POINT (-73.826752 40.853265)   
1  2023            5  POINT (-73.826763 40.853265)   
2  2023            5  POINT (-73.827298 40.865989)   
3  2023            5  POINT (-73.827301 40.852176)   
4  2023            5    POINT (-73.83312 40.86805)   

  Next Timepoint Stop Georeference  Road Distance  Average Travel Time  \
0     POINT (-73.827623 40.865666)         62.145           237.370062   
1     POINT (-73.827623 40.865666)         20.730            76.575798   
2       POINT (-73.83312 40.86805)         43.380           307.586292   
3     POINT (-73.920569 40.867837)        375.534          1969.783128   
4     POINT (-73.827301 40.852176)         73.980           308.396682   

   Average Speed  
0      15.708384  
1      16.242730  
2       8.462016  
3      11.438843  
4      14.393151  
