In [1]:
import pandas as pd
import json
import numpy as np

In [2]:
df = pd.read_csv('nyc-zip-codes.csv')

In [3]:
df

Unnamed: 0,Borough,Neighborhood,ZipCode
0,Bronx,Central Bronx,10453
1,Bronx,Central Bronx,10457
2,Bronx,Central Bronx,10460
3,Bronx,Bronx Park and Fordham,10458
4,Bronx,Bronx Park and Fordham,10467
...,...,...,...
173,Staten Island,South Shore,10312
174,Staten Island,Stapleton and St. George,10301
175,Staten Island,Stapleton and St. George,10304
176,Staten Island,Stapleton and St. George,10305


In [4]:
cdta_mapping = {
    "Bronx": {
        "Bronx Park and Fordham": "BX07",
        "Central Bronx": "BX06",
        "High Bridge and Morrisania": "BX05",
        "Hunts Point and Mott Haven": "BX01",
        "Kingsbridge and Riverdale": "BX08",
        "Northeast Bronx": "BX12",
        "Southeast Bronx": "BX09"
    },
    "Brooklyn": {
        "Borough Park": "BK12",
        "Bushwick and Williamsburg": "BK04",
        "Canarsie and Flatlands": "BK18",
        "Central Brooklyn": "BK08",
        "East New York and New Lots": "BK05",
        "Flatbush": "BK14",
        "Greenpoint": "BK01",
        "Northwest Brooklyn": "BK02",
        "Southern Brooklyn": "BK15",
        "Southwest Brooklyn": "BK10",
        "Sunset Park": "BK07"
    },
    "Manhattan": {
        "Central Harlem": "MN10",
        "Chelsea and Clinton": "MN04",
        "East Harlem": "MN11",
        "Gramercy Park and Murray Hill": "MN06",
        "Greenwich Village and Soho": "MN02",
        "Inwood and Washington Heights": "MN12",
        "Lower East Side": "MN03",
        "Lower Manhattan": "MN01",
        "Upper East Side": "MN08",
        "Upper West Side": "MN07"
    },
    "Queens": {
        "Central Queens": "QN08",
        "Jamaica": "QN12",
        "North Queens": "QN07",
        "Northeast Queens": "QN11",
        "Northwest Queens": "QN01",
        "Rockaways": "QN14",
        "Southeast Queens": "QN13",
        "Southwest Queens": "QN09",
        "West Central Queens": "QN05",
        "West Queens": "QN04"
    },
    "Staten Island": {
        "Mid-Island": "SI02",
        "Port Richmond": "SI01",
        "South Shore": "SI03",
        "Stapleton and St. George": "SI01"
    }
}

In [5]:
# Map CDTA
df['CDTA'] = df.apply(lambda row: cdta_mapping[row['Borough']][row['Neighborhood']], axis=1)


In [6]:
# Group by Borough, Neighborhood, and CDTA
grouped = df.groupby(['Borough', 'Neighborhood', 'CDTA'])['ZipCode'].unique().reset_index()

In [7]:
# Convert to list
grouped['ZipCode'] = grouped['ZipCode'].apply(lambda x: x.tolist())

In [8]:
grouped

Unnamed: 0,Borough,Neighborhood,CDTA,ZipCode
0,Bronx,Bronx Park and Fordham,BX07,"[10458, 10467, 10468]"
1,Bronx,Central Bronx,BX06,"[10453, 10457, 10460]"
2,Bronx,High Bridge and Morrisania,BX05,"[10451, 10452, 10456]"
3,Bronx,Hunts Point and Mott Haven,BX01,"[10454, 10455, 10459, 10474]"
4,Bronx,Kingsbridge and Riverdale,BX08,"[10463, 10471]"
5,Bronx,Northeast Bronx,BX12,"[10466, 10469, 10470, 10475]"
6,Bronx,Southeast Bronx,BX09,"[10461, 10462, 10464, 10465, 10472, 10473]"
7,Brooklyn,Borough Park,BK12,"[11204, 11218, 11219, 11230]"
8,Brooklyn,Bushwick and Williamsburg,BK04,"[11206, 11221, 11237]"
9,Brooklyn,Canarsie and Flatlands,BK18,"[11234, 11236, 11239]"


In [9]:
result = {}
for borough in grouped['Borough'].unique():
    borough_data = grouped[grouped['Borough'] == borough]
    result[borough] = {}
    for neighborhood in borough_data['Neighborhood'].unique():
        neighborhood_data = borough_data[borough_data['Neighborhood'] == neighborhood]
        result[borough][neighborhood] = {}
        for _, row in neighborhood_data.iterrows():
            result[borough][neighborhood][row['CDTA']] = row['ZipCode']


In [11]:
# Save 
with open('borough_neighbourhood.json', 'w') as f:
    json.dump(result, f, indent=2)