# NYC Taxi Zones Heatmap

Geografische Visualisierung der Pickup- und Dropoff-Häufigkeiten nach Taxi-Zonen

## Setup

In [5]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import geopandas as gpd
import json

# Plotly Renderer für Notebooks konfigurieren
import plotly.io as pio
pio.renderers.default = "jupyterlab"

## Daten laden

In [6]:
# Taxi-Daten laden
df = pd.read_csv('../data/Taxi_final_1M_clean.csv')
zones = pd.read_csv('../taxi_zones.csv')

# GeoJSON für NYC Taxi Zones laden
print("Lade GeoJSON-Daten...")
gdf = gpd.read_file('../NYC-Taxi-Zones.geojson')

print(f"Datensätze geladen:")
print(f"  - Taxi-Fahrten: {len(df):,}")
print(f"  - Taxi-Zonen (CSV): {len(zones)}")
print(f"  - Taxi-Zonen (GeoJSON): {len(gdf)}")
print(f"\nGeoJSON Spalten: {list(gdf.columns)}")

Lade GeoJSON-Daten...
Datensätze geladen:
  - Taxi-Fahrten: 967,493
  - Taxi-Zonen (CSV): 265
  - Taxi-Zonen (GeoJSON): 263

GeoJSON Spalten: ['shape_area', 'objectid', 'shape_leng', 'location_id', 'zone', 'borough', 'geometry']


## Daten aggregieren und mergen

In [7]:
# Pickup und Dropoff Counts aggregieren
pickup_all = df['PULocationID'].value_counts().reset_index()
pickup_all.columns = ['LocationID', 'pickup_count']

dropoff_all = df['DOLocationID'].value_counts().reset_index()
dropoff_all.columns = ['LocationID', 'dropoff_count']

# Mit taxi_zones.csv joinen für Borough und Zone Namen
pickup_merged = pickup_all.merge(zones, on='LocationID', how='left')
dropoff_merged = dropoff_all.merge(zones, on='LocationID', how='left')

print(f"Pickup Locations: {len(pickup_merged)} eindeutige Zonen")
print(f"Dropoff Locations: {len(dropoff_merged)} eindeutige Zonen")

Pickup Locations: 246 eindeutige Zonen
Dropoff Locations: 259 eindeutige Zonen


In [8]:
# LocationID Feld im GeoJSON identifizieren
if 'LocationID' in gdf.columns:
    location_field = 'LocationID'
elif 'location_id' in gdf.columns:
    location_field = 'location_id'
elif 'objectid' in gdf.columns:
    location_field = 'objectid'
else:
    location_field = gdf.select_dtypes(include=['int64', 'int32']).columns[0]

print(f"Verwende Feld '{location_field}' für den Merge")

# Typen angleichen (zu int konvertieren)
gdf[location_field] = gdf[location_field].astype(int)
pickup_merged['LocationID'] = pickup_merged['LocationID'].astype(int)
dropoff_merged['LocationID'] = dropoff_merged['LocationID'].astype(int)

# Merge mit GeoDataFrame durchführen
gdf_pickup = gdf.merge(
    pickup_merged,
    left_on=location_field,
    right_on='LocationID',
    how='left'
)

gdf_dropoff = gdf.merge(
    dropoff_merged,
    left_on=location_field,
    right_on='LocationID',
    how='left'
)

# NaN mit 0 füllen (Zonen ohne Trips)
gdf_pickup['pickup_count'] = gdf_pickup['pickup_count'].fillna(0).astype(int)
gdf_dropoff['dropoff_count'] = gdf_dropoff['dropoff_count'].fillna(0).astype(int)

print(f"\nMerge erfolgreich:")
print(f"  - Pickup GeoDataFrame: {len(gdf_pickup)} Zonen")
print(f"  - Dropoff GeoDataFrame: {len(gdf_dropoff)} Zonen")
print(f"  - Summe Pickups: {gdf_pickup['pickup_count'].sum():,}")
print(f"  - Summe Dropoffs: {gdf_dropoff['dropoff_count'].sum():,}")

Verwende Feld 'location_id' für den Merge

Merge erfolgreich:
  - Pickup GeoDataFrame: 263 Zonen
  - Dropoff GeoDataFrame: 263 Zonen
  - Summe Pickups: 959,080
  - Summe Dropoffs: 955,576


## Pickup Heatmap

In [9]:
# GeoDataFrame zu GeoJSON konvertieren
geojson_pickup = json.loads(gdf_pickup.to_json())

# Quadratwurzel-Skalierung für bessere Farbbalance
gdf_pickup['pickup_count_sqrt'] = np.sqrt(gdf_pickup['pickup_count'])

fig_pickup = px.choropleth_mapbox(
    gdf_pickup,
    geojson=geojson_pickup,
    locations=gdf_pickup.index,
    color='pickup_count_sqrt',
    color_continuous_scale='YlOrRd',
    range_color=[0, gdf_pickup['pickup_count_sqrt'].quantile(0.95)],  # Cap bei 95. Perzentil
    mapbox_style='carto-positron',
    center={'lat': 40.7128, 'lon': -74.0060},
    zoom=9.5,
    opacity=0.7,
    hover_name='zone' if 'zone' in gdf_pickup.columns else 'Zone',
    hover_data={
        'borough' if 'borough' in gdf_pickup.columns else 'Borough': True,
        'pickup_count': ':,.0f',
        'pickup_count_sqrt': False,
    },
    labels={'pickup_count_sqrt': 'Anzahl Pickups'},
    title='<b>NYC Yellow Taxi Pickup Locations</b><br>Häufigkeit nach Taxi-Zone'
)

fig_pickup.update_layout(
    height=700,
    margin={'r': 0, 't': 60, 'l': 0, 'b': 0},
    font=dict(size=12),
    title_font_size=16
)

# Als HTML speichern
fig_pickup.write_html('../output/pickup_heatmap.html')
print("Pickup-Karte gespeichert: output/pickup_heatmap.html")


*choropleth_mapbox* is deprecated! Use *choropleth_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



Pickup-Karte gespeichert: output/pickup_heatmap.html


## Dropoff Heatmap

In [10]:
# GeoDataFrame zu GeoJSON konvertieren
geojson_dropoff = json.loads(gdf_dropoff.to_json())

# Quadratwurzel-Skalierung für bessere Farbbalance
gdf_dropoff['dropoff_count_sqrt'] = np.sqrt(gdf_dropoff['dropoff_count'])

fig_dropoff = px.choropleth_mapbox(
    gdf_dropoff,
    geojson=geojson_dropoff,
    locations=gdf_dropoff.index,
    color='dropoff_count_sqrt',
    color_continuous_scale='YlGnBu',
    range_color=[0, gdf_dropoff['dropoff_count_sqrt'].quantile(0.95)],  # Cap bei 95. Perzentil
    mapbox_style='carto-positron',
    center={'lat': 40.7128, 'lon': -74.0060},
    zoom=9.5,
    opacity=0.7,
    hover_name='zone' if 'zone' in gdf_dropoff.columns else 'Zone',
    hover_data={
        'borough' if 'borough' in gdf_dropoff.columns else 'Borough': True,
        'dropoff_count': ':,.0f',
        'dropoff_count_sqrt': False,
    },
    labels={'dropoff_count_sqrt': 'Anzahl Dropoffs'},
    title='<b>NYC Yellow Taxi Dropoff Locations</b><br>Häufigkeit nach Taxi-Zone'
)

fig_dropoff.update_layout(
    height=700,
    margin={'r': 0, 't': 60, 'l': 0, 'b': 0},
    font=dict(size=12),
    title_font_size=16
)

# Als HTML speichern
fig_dropoff.write_html('../output/dropoff_heatmap.html')
print("Dropoff-Karte gespeichert: output/dropoff_heatmap.html")


*choropleth_mapbox* is deprecated! Use *choropleth_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



Dropoff-Karte gespeichert: output/dropoff_heatmap.html


## Top 10 Pickup und Dropoff Zonen

In [11]:
# Top 10 Pickup Zonen
top_pickup = gdf_pickup[gdf_pickup['pickup_count'] > 0].nlargest(10, 'pickup_count')[[
    'zone', 'borough', 'pickup_count'
]]
print("Top 10 Pickup Zonen:")
print(top_pickup.to_string(index=False))

print("\n" + "="*60 + "\n")

# Top 10 Dropoff Zonen
top_dropoff = gdf_dropoff[gdf_dropoff['dropoff_count'] > 0].nlargest(10, 'dropoff_count')[[
    'zone', 'borough', 'dropoff_count'
]]
print("Top 10 Dropoff Zonen:")
print(top_dropoff.to_string(index=False))

Top 10 Pickup Zonen:
                        zone   borough  pickup_count
                 JFK Airport    Queens         49758
       Upper East Side South Manhattan         45624
              Midtown Center Manhattan         45006
       Upper East Side North Manhattan         41096
                Midtown East Manhattan         34569
Penn Station/Madison Sq West Manhattan         33505
           LaGuardia Airport    Queens         33296
   Times Sq/Theatre District Manhattan         32216
         Lincoln Square East Manhattan         31993
                 Murray Hill Manhattan         28849


Top 10 Dropoff Zonen:
                     zone   borough  dropoff_count
    Upper East Side North Manhattan          42838
    Upper East Side South Manhattan          41187
           Midtown Center Manhattan          37826
Times Sq/Theatre District Manhattan          29670
              Murray Hill Manhattan          28907
             Midtown East Manhattan          27706
      Lincoln S