In [1]:
# --- Step 1: Setup and Load Data ---

# Core libraries
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 120)

# --- Load Capital Bikeshare Data ---
# Replace with your local path or URL
bike_path = "../datasets/SAMPLE_NYC_BIKE.csv"
bike = pd.read_csv(bike_path)

# Inspect structure
print("Bikeshare data shape:", bike.shape)
print(bike.head())

# --- Load Pedestrian & Cycle Counts Data ---
ped_path = "../datasets/Bi-Annual_Pedestrian_Counts.csv"
ped = pd.read_csv(ped_path)

print("\nPedestrian data shape:", ped.shape)
print(ped.head())


  bike = pd.read_csv(bike_path)


Bikeshare data shape: (6000000, 14)
            ride_id  rideable_type               started_at                 ended_at       start_station_name  \
0  ACCC919B5A3CD9AD  electric_bike  2025-01-01 14:52:26.542  2025-01-01 14:59:53.427          W 20 St & 7 Ave   
1  1FABDF3EE40FCB0E   classic_bike  2025-01-10 05:03:13.646  2025-01-10 05:13:13.331          W 20 St & 7 Ave   
2  88F0F3CFCBC79652   classic_bike  2025-01-13 13:40:17.630  2025-01-13 13:47:05.817  St James Pl & Oliver St   
3  6FDE4E191D58E453  electric_bike  2025-01-10 08:29:16.996  2025-01-10 08:34:49.360  St James Pl & Oliver St   
4  E9B03B9F77A85455  electric_bike  2025-01-11 18:59:48.427  2025-01-11 19:13:21.292          E 33 St & 1 Ave   

  start_station_id         end_station_name end_station_id  start_lat  start_lng    end_lat    end_lng member_casual  \
0          6182.02          E 10 St & 2 Ave        5746.02  40.742388 -73.997262  40.729708 -73.986598        casual   
1          6182.02          E 25 St & 1 Ave  

In [2]:
from bikeshare_pedestrian_analysis_nyc import main
# =============================================================================
# Run analysis - pedestrian coords are built-in!
# =============================================================================
analyzer = main(bike, ped, distance_threshold=1600)

# =============================================================================
# Access results
# =============================================================================
print("\n" + "="*70)
print("RESULTS SUMMARY")
print("="*70)

# Correlation results
print(f"\nAnalyzed {len(analyzer.correlation_results)} pedestrian locations")
print(f"Mean correlation: {analyzer.correlation_results['pearson_r'].mean():.3f}")
print(f"Median correlation: {analyzer.correlation_results['pearson_r'].median():.3f}")

# Top 5 correlations
print("\nTop 5 strongest correlations:")
top5 = analyzer.correlation_results.nlargest(5, 'pearson_r')
for _, row in top5.iterrows():
    print(f"  {row['ped_station'][:50]:50s} r={row['pearson_r']:.3f}")


Cleaning bikeshare data...
Bikeshare data cleaned: 5977425 trips
Cleaning NYC pedestrian data...
NYC pedestrian data cleaned: 9,690 rows (long format)

Processing station coordinates (using provided lat/lon data)...

Creating spatial matches (threshold: 1600m)...
    76 pedestrian stations matched
    1896 bikeshare stations matched

>>> Calling prepare_pedestrian_long() now...

Preparing pedestrian data in long format...
ped_long prepared:
          station_name   latitude  longitude  datetime  pedestrian_count
0             Broadway  40.879199 -73.904591  May07_AM              1189
1    East 161st Street  40.826628 -73.921884  May07_AM              1511
2    East Fordham Road  40.862155 -73.895358  May07_AM              1832
3   East Gun Hill Road  40.881287 -73.878925  May07_AM               764
4  East Tremont Avenue  40.844637 -73.889564  May07_AM               650
Total rows: 9690

Computing correlations...

Aggregating bikeshare data to hourly resolution...
  Created hourly aggr