Make sure all required directories are here

In [1]:
from pathlib import Path

original_path = Path('./../data/1_original')
intermediate_path = Path('./../data/2_intermediate')
prepared_path = Path('./../data/3_prepared')
result_path = Path('./../data/4_result')

original_path.mkdir(parents=True, exist_ok=True)
intermediate_path.mkdir(parents=True, exist_ok=True)
prepared_path.mkdir(parents=True, exist_ok=True)
result_path.mkdir(parents=True, exist_ok=True)

borough_boundaries_path = intermediate_path / 'Borough_Boundaries.geojson'
brooklyn_border_path = intermediate_path / 'brooklyn_border.geojson'
brooklyn_borders_all_path = intermediate_path / 'brooklyn_borders_all.geojson'

Download all files

In [2]:
import requests

files = {
    'https://data.cityofnewyork.us/api/views/bkjd-kr4k/rows.csv?accessType=DOWNLOAD':'brooklyn_schools.csv',
    'https://www1.nyc.gov/assets/finance/downloads/pdf/rolling_sales/rollingsales_brooklyn.xls': 'rollingsales_brooklyn.xls',
    'https://data.cityofnewyork.us/api/views/uip8-fykc/rows.csv?accessType=DOWNLOAD':'NYPD_Arrest_Data_Year_to_Date.csv',
    'https://data.cityofnewyork.us/api/views/5ucz-vwe8/rows.csv?accessType=DOWNLOAD': 'NYPD_Shooting_Incident_Data_Year_To_Date.csv',
    'https://data.cityofnewyork.us/api/geospatial/tqmj-j8zm?method=export&format=Shapefile':'Borough_Boundaries.zip',
    'https://data.cityofnewyork.us/download/i8iw-xf4u/application%2Fzip':'ZIP_codes.zip'
     }

for url, dest_name in files.items():
    dest_path = original_path / dest_name
    if dest_path.exists():
         print(f"'{dest_name}' is present")    
    else:
        print(f"Downloading '{dest_name}'")    
        response = requests.get(url, allow_redirects=True)

        with open(dest_path, 'wb') as file:
            file.write(response.content)

print("done")


'brooklyn_schools.csv' is present
'rollingsales_brooklyn.xls' is present
'NYPD_Arrest_Data_Year_to_Date.csv' is present
'NYPD_Shooting_Incident_Data_Year_To_Date.csv' is present
Downloading 'Borough_Boundaries.zip'
Downloading 'ZIP_codes.zip'
done


Unzip all zip archives and delete the archive

In [3]:
import zipfile
from pathlib import Path

def unzip(file: Path):
    unziped_dir = file.parent / file.stem
    if not unziped_dir.exists():
        print(f"unzipping '{file.name}'")
        with zipfile.ZipFile(file, 'r') as zip_ref:
            zip_ref.extractall(unziped_dir)
    else:
        print(f'{unziped_dir} already exists')


for path in original_path.glob('*.zip'):
    if path.is_file() and path.suffix == '.zip':
        unzip(path)
        path.unlink()
        

../data/1_original/Borough_Boundaries already exists
../data/1_original/ZIP_codes already exists


Convert the shape files to geo-jsons

In [4]:
from pathlib import Path
import geopandas as gpd

# copied from https://geopandas.org/projections.html#setting-a-projection 
def shapefile2geojson(infile, outfile, fieldname='geometry'):
    '''Translate a shapefile to GEOJSON.'''
    shp_file = gpd.read_file(infile)
    shp_file.to_crs(epsg=4326).to_file(outfile, driver='GeoJSON')


for shp_file_path in original_path.glob('**/*.shp'):
    dest_file_path = intermediate_path / f"{shp_file_path.parent.name}.geojson"
    if not dest_file_path.exists():
        print(f"converting '{dest_file_path.name}'")
        shapefile2geojson(str(shp_file_path), str(dest_file_path))
    else:
        print(f"{dest_file_path.name} already exists")

print("done")

Borough_Boundaries.geojson already exists
ZIP_codes.geojson already exists
done


Create a geo-json file with the border of Brooklyn

In [5]:
import json
from shapely.geometry.polygon import Polygon

def get_largest_polygon(polygons):
    """ Pick the polygon with the largest area.
    ATTENTION: do not pass None or empty polygons in!
    ATTENTION: This is calculated on a plane, coordinates are not consideret to be polar coordinates!
    For the case at hand this works fine because the most of brooklyn is one large polygon and all others are small islands"""
    areas = [Polygon(bds[0]).area for bds in polygons]
    index_of_largest_polygon = areas.index(max(areas))
    return polygons[index_of_largest_polygon]


def write_geojson(polygon, name, file_path):
    """ Writes a geojson file with one feature, a polygon"""
    geom = {'type': 'Polygon', 'coordinates':polygon}
    result = {
                'type': 'Feature',
                'properties':{'name': name},
                'geometry': geom
            }

    with open(file_path, 'w') as fp:
        json.dump(result, fp)

if (brooklyn_border_path.exists()):
    print(f"{brooklyn_border_path.name} exists")
else:
    print(f"extracting {borough_boundaries_path.name}")
    with open(borough_boundaries_path) as file:
        data = json.load(file)
        for feature in data['features']:

            # pick Brooklyn
            if feature['properties']['boro_name'] == 'Brooklyn':

                # pick the largest polygon
                largest = get_largest_polygon(feature['geometry']['coordinates'])
                write_geojson(largest, "largest polygon of Brooklyn", brooklyn_border_path)
                break

print("done")

brooklyn_border.geojson exists
done


Bonus cell: show all polygons from Brooklyn, the 'main' one green and the other red

In [6]:
# write all_brooklyn_borders.geojson
with open(borough_boundaries_path) as file:
    data = json.load(file)
    for feature in data['features']:
        if feature['properties']['boro_name'] == 'Brooklyn':
            new_features = []

            i = 0
            for coordinate in feature['geometry']['coordinates']:
                geom = {'type': 'Polygon', 'coordinates': coordinate}
                new_feature = {'type': 'Feature', 'properties':{'number':i}, 'geometry':geom}
                
                new_features.append(new_feature)          
                i = i+1
            new = {
                'type': 'FeatureCollection',
                'features':new_features
                }

            
            with open(brooklyn_borders_all_path, 'w') as fp:
                json.dump(new, fp)


import folium

m = folium.Map(
    location=(40.64558018290456, -73.94975979616603), 
    zoom_start=12,
    tiles='Stamen Toner'
) 

def style_function(x):
    number = x['properties']['number']
    return {
        'color': '#339933' if number==27 else '#CC0000',
        'opacity': 0.5,
        'weigth': 1,
        'fillOpacity': 0.5,
        'fill':'#00FF00'
    }

folium.GeoJson(
    str(brooklyn_borders_all_path),
    name = "All Brooklyn borders",
    tooltip=folium.features.GeoJsonTooltip(fields=['number'], aliases=['#']),
    style_function=style_function
).add_to(m) 

m           