In [1]:
# --- Step 1: Setup and Load Data ---

# Core libraries
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 120)

# --- Load Capital Bikeshare Data ---
# Replace with your local path or URL
bike_path = "../../Data/sample_dc_bikeshare/SAMPLE_DC_BIKE.csv"
bike = pd.read_csv(bike_path)

# Inspect structure
print("Bikeshare data shape:", bike.shape)
print(bike.head())

# --- Load Pedestrian & Cycle Counts Data ---
ped_path = "../../Data/sample_dc_foot/dc_foot_data.csv"
ped = pd.read_csv(ped_path)

print("\nPedestrian data shape:", ped.shape)
print(ped.head())


  bike = pd.read_csv(bike_path)


Bikeshare data shape: (2749881, 14)
            ride_id  rideable_type           started_at             ended_at             start_station_name  \
0  13A48BD20CC3DD85   classic_bike  2021-01-08 17:58:48  2021-01-08 18:25:23   Connecticut Ave & Yuma St NW   
1  CA68C580B32EEE66  electric_bike  2021-01-08 22:05:51  2021-01-08 22:23:52                  8th & D St NW   
2  5A925307814D5C2B   classic_bike  2021-01-21 17:20:31  2021-01-21 17:23:24  Wilson Blvd. & N. Vermont St.   
3  B6B2DB54A2B233EE   classic_bike  2021-01-28 10:06:30  2021-01-28 10:20:54  Franklin St & S Washington St   
4  921A142D5C8A9759   classic_bike  2021-01-11 12:09:45  2021-01-11 12:20:15      St. Asaph & Montgomery St   

  start_station_id               end_station_name end_station_id  start_lat  start_lng    end_lat    end_lng  \
0          31318.0    Georgia Ave & Emerson St NW        31405.0  38.947156 -77.065115  38.949662 -77.027333   
1          31270.0                 14th & D St SE        31663.0  38.8948

In [None]:
from bikeshare_pedestrian_analysis import main
# =============================================================================
# Run analysis - pedestrian coords are built-in!
# =============================================================================
analyzer = main(bike, ped, distance_threshold=800)

# =============================================================================
# Access results
# =============================================================================
print("\n" + "="*70)
print("RESULTS SUMMARY")
print("="*70)

# Correlation results
print(f"\nAnalyzed {len(analyzer.correlation_results)} pedestrian locations")
print(f"Mean correlation: {analyzer.correlation_results['pearson_r'].mean():.3f}")
print(f"Median correlation: {analyzer.correlation_results['pearson_r'].median():.3f}")

# Top 5 correlations
print("\nTop 5 strongest correlations:")
top5 = analyzer.correlation_results.nlargest(5, 'pearson_r')
for _, row in top5.iterrows():
    print(f"  {row['ped_station'][:50]:50s} r={row['pearson_r']:.3f}")


Cleaning bikeshare data...
Bikeshare data cleaned: 2416630 trips
Cleaning pedestrian data...
Pedestrian data cleaned: 404766 observations

Processing station coordinates...
Loaded 1538 cached geocodes

Creating spatial matches (threshold: 800m)...
  Bikeshare stations with coordinates: 1520
  Pedestrian counters with coordinates: 18
  Found 276 spatial matches
    16 pedestrian stations matched
    89 bikeshare stations matched

  Example matches:
    Wharf Classic - Maine Ave Cycle Track    <-> 4th & M St SW                            (553m)
    Wharf Classic - Maine Ave Cycle Track    <-> L'Enfant Plaza / 7th & C St SW           (585m)
    Wharf Classic - Maine Ave Cycle Track    <-> 4th & C St SW                            (658m)

Computing correlations...

Aggregating bikeshare data to hourly resolution...
  Created hourly aggregation: 1515397 station-hour combinations

Correlation analysis complete for 7 locations

Saved correlation summary plot to RQ1_PLOTS/correlation_summary.pn