In [13]:
import requests


NYC_OPEN_DATA_URLS = {
    "nyc_bike_lanes": "https://data.cityofnewyork.us/resource/mzxg-pwib.geojson",
}

In [14]:
import os
from supabase import create_client
import dotenv
dotenv.load_dotenv()
url = os.getenv("SUPABASE_URL")
key = os.getenv("SUPABASE_ANON_KEY")
supabase = create_client(url, key)

def upload_to_supabase(file_path, bucket_name):
    # Read the file
    with open(file_path, "rb") as file:
        # Upload the file to the specified bucket
        response = supabase.storage.from_(bucket_name).upload(file_path, file)
        return response

# Example usage
# upload_to_supabase('./layer_outputs/nj_rail_stations.pmtiles', 'your_bucket_name')

In [15]:
import requests
import json
import subprocess
import os


def nyc_opendata_to_pmtiles(name, geojson_url):
    """
    Downloads GeoJSON data from NYC Open Data portal and converts to PMTiles.
    
    Args:
        name: Output filename (without extension)
        geojson_url: Direct URL to the GeoJSON endpoint
    """
    print(f"Downloading {name} from NYC Open Data...")
    
    # NYC Open Data supports pagination via $limit and $offset parameters
    all_features = []
    offset = 0
    limit = 50000  # NYC Open Data allows up to 50,000 records per request
    
    while True:
        # Add pagination parameters to URL
        paginated_url = f"{geojson_url}?$limit={limit}&$offset={offset}"
        
        print(f"  Fetching records {offset} to {offset + limit}...")
        response = requests.get(paginated_url)
        response.raise_for_status()
        
        geojson_data = response.json()
        features = geojson_data.get('features', [])
        
        if not features:
            break
        
        all_features.extend(features)
        print(f"  Total features collected: {len(all_features)}")
        
        # If we got fewer features than the limit, we've reached the end
        if len(features) < limit:
            break
        
        offset += limit
    
    # Construct complete GeoJSON
    complete_geojson = {
        "type": "FeatureCollection",
        "features": all_features
    }
    
    print(f"✓ Downloaded {len(all_features)} total features")
    
    # Create output directory
    os.makedirs("./geo_layers", exist_ok=True)
    
    # Save GeoJSON
    geojson_path = f"./geo_layers/{name}.geojson"
    with open(geojson_path, "w") as f:
        json.dump(complete_geojson, f)
    
    print(f"✓ Saved to {geojson_path}")
    
    # Convert to PMTiles using tippecanoe
    pmtiles_path = f"./geo_layers/{name}.pmtiles"
    
    print(f"Converting to PMTiles...")
    subprocess.run(
        [
            "tippecanoe",
            "-o",
            pmtiles_path,
            "-zg",
            "--drop-densest-as-needed",
            geojson_path,
            "--force",
        ],
        check=True
    )
    
    print(f"✓ Created {pmtiles_path}")


# # Download NYC bike lanes
# nyc_opendata_to_pmtiles(
#     "nyc_bike_lanes",
#     "https://data.cityofnewyork.us/resource/mzxg-pwib.geojson"
# )

In [18]:
BUCKET_NAME = "citi-bike-data-bucket"

for layer_name, url in NYC_OPEN_DATA_URLS.items():
    nyc_opendata_to_pmtiles(layer_name, url)
    file_path = f"./geo_layers/{layer_name}.pmtiles"
    upload_to_supabase(file_path, BUCKET_NAME)

Downloading nyc_bike_lanes from NYC Open Data...
  Fetching records 0 to 50000...
  Total features collected: 28983
✓ Downloaded 28983 total features
✓ Saved to ./geo_layers/nyc_bike_lanes.geojson
Converting to PMTiles...


For layer 0, using name "nyc_bike_lanes"
28983 features, 4139298 bytes of geometry and attributes, 341046 bytes of string pool, 0 bytes of vertices, 0 bytes of nodes
Choosing a maxzoom of -z11 for features typically 297 feet (91 meters) apart, and at least 50 feet (15 meters) apart
Choosing a maxzoom of -z12 for resolution of about 65 feet (20 meters) within features
  98.6%  12/1206/1539  
  100.0%  12/1203/1543  

✓ Created ./geo_layers/nyc_bike_lanes.pmtiles
