# Download Datasets

This downloads the different datasets

You will need an APP Token https://dev.socrata.com/docs/app-tokens.html



In [1]:
import os
from datetime import datetime
import requests
from dotenv import load_dotenv
import math
import json
import zipfile

## Datasets below

The datasets below all come from https://data.nola.gov/

In [2]:
datasets = [
    #("streets.geojson","https://data.nola.gov/resource/fdkj-rjrv.geojson"),
    #("greenLocations.geojson","https://data.nola.gov/resource/n4z3-4iyi.geojson")
    ('Neighborhoods.geojson', 'https://data.nola.gov/resource/exvn-jeh2.geojson')

]



In [3]:
rawDownload = [
    ("WATER2.zip","https://pubs.usgs.gov/of/1998/of98-805/lpdata/arcview/water.zip"),

    #('UpperDeltaTileIndex.CPG', 'https://rockyweb.usgs.gov/vdelivery/Datasets/Staged/Elevation/metadata/LA_UpperDeltaPlain_2015/LA_UpperDeltaPlain_2017/shapefiles/Tile_Index/TileLayout_UpperDeltaPlainLA_Lidar_NAD83_2011_UTM15_meters.CPG'), 
    #('UpperDeltaTileIndex.dbf', 'https://rockyweb.usgs.gov/vdelivery/Datasets/Staged/Elevation/metadata/LA_UpperDeltaPlain_2015/LA_UpperDeltaPlain_2017/shapefiles/Tile_Index/TileLayout_UpperDeltaPlainLA_Lidar_NAD83_2011_UTM15_meters.dbf'), 
    #('UpperDeltaTileIndex.prj', 'https://rockyweb.usgs.gov/vdelivery/Datasets/Staged/Elevation/metadata/LA_UpperDeltaPlain_2015/LA_UpperDeltaPlain_2017/shapefiles/Tile_Index/TileLayout_UpperDeltaPlainLA_Lidar_NAD83_2011_UTM15_meters.prj'), 
    #('UpperDeltaTileIndex.shp', 'https://rockyweb.usgs.gov/vdelivery/Datasets/Staged/Elevation/metadata/LA_UpperDeltaPlain_2015/LA_UpperDeltaPlain_2017/shapefiles/Tile_Index/TileLayout_UpperDeltaPlainLA_Lidar_NAD83_2011_UTM15_meters.shp'), 
    #('UpperDeltaTileIndex.xml', 'https://rockyweb.usgs.gov/vdelivery/Datasets/Staged/Elevation/metadata/LA_UpperDeltaPlain_2015/LA_UpperDeltaPlain_2017/shapefiles/Tile_Index/TileLayout_UpperDeltaPlainLA_Lidar_NAD83_2011_UTM15_meters.shp.xml'), 
    #('UpperDeltaTileIndex.shx', 'https://rockyweb.usgs.gov/vdelivery/Datasets/Staged/Elevation/metadata/LA_UpperDeltaPlain_2015/LA_UpperDeltaPlain_2017/shapefiles/Tile_Index/TileLayout_UpperDeltaPlainLA_Lidar_NAD83_2011_UTM15_meters.shx'), 
    ('NolaTileIndex.zip','https://rockyweb.usgs.gov/vdelivery/Datasets/Staged/Elevation/metadata/LA_2021GreaterNewOrleans_C22/LA_2021GNO_1_C22/spatial_metadata/USGS/USGS_LA_2021GNO_1_C22_TileIndex.zip')
]

In [4]:
def lp(v):
    print(f"[{datetime.now()}] {v}")

lp("Starting...")

[2025-03-24 14:53:13.984176] Starting...


In [5]:
lp(f"load_dotenv:  {load_dotenv()}")

[2025-03-24 14:53:13.988116] load_dotenv:  True


In [6]:
lp("Loading APP Token")
appToken = os.environ.get("DATA_NOLA_GOV_APPTOKEN")
lp(f"App token {'*' * len(appToken)} loaded")

[2025-03-24 14:53:13.991322] Loading APP Token
[2025-03-24 14:53:13.991607] App token ************************* loaded


In [7]:
dataPath = f"{os.curdir}{os.sep}datasets{os.sep}"
lp(f"Path:  {dataPath}")

[2025-03-24 14:53:13.995812] Path:  ./datasets/


In [8]:
if not os.path.exists(dataPath):
    lp(f"Creating path {dataPath}")
    os.makedirs(dataPath)

else:
    lp(f"{dataPath} already exists")
    

[2025-03-24 14:53:14.000359] ./datasets/ already exists


In [9]:
lp("Creating sessions")
session = requests.Session()
session.headers.update({
    'X-App-Token':appToken,
    'Accept':'application/json'
})

[2025-03-24 14:53:14.004139] Creating sessions


In [10]:
def getDataSet(outFile, url, limit = 1000, maxCycle = math.inf):


    if os.path.exists(f"{dataPath}{outFile}"):
        lp(f"{dataPath}{outFile} already exists delete this file if you wish to redownload it")
        return False


    lp(f"Writting to {dataPath}{outFile}")
    

    totalLen = 0
    lastLen = 1
    offset = 0
    cycle = 0
    fullData = { "type": "FeatureCollection", "features":[]}
    while lastLen > 0 and cycle < maxCycle:
    
        lp(f"Getting content from {url}  limit={limit} offset={offset}")
        resp = session.get(url, params={'$limit':limit,'$offset':offset,'$order':':id'})
        resp.raise_for_status()

        
        outData = json.loads(resp.text)

        
        fullData['features'] += outData['features']

        lastLen = len(outData['features'])
        totalLen += lastLen

        lp(f"{len(resp.content)} bytes downloaded {lastLen} features, total features downloaded {totalLen}")
        

        cycle += 1
        offset += limit


    with open(f"{dataPath}{outFile}", "w") as f:
        json.dump(fullData,f)

    lp(f"Finished writing {totalLen} rows to {outFile}")
    
    return fullData
    

In [11]:
for dataset in datasets:
    resp = getDataSet(*dataset)

[2025-03-24 14:53:14.014812] ./datasets/Neighborhoods.geojson already exists delete this file if you wish to redownload it


In [12]:
for dl in rawDownload:
    
    if os.path.exists(f"{dataPath}{dl[0]}"):
        lp(f"{dataPath}{dl[0]} already exists delete this file if you wish to redownload it")
        continue

    
    lp(f"Downloading {dl[1]} -> {dl[0]}")
    resp = requests.get(dl[1])
    resp.raise_for_status()

    lp(f"{len(resp.content)} bytes downloaded")
    
    with open(f"{dataPath}{dl[0]}", "wb") as f:
        f.write(resp.content)
    

[2025-03-24 14:53:14.021592] ./datasets/WATER2.zip already exists delete this file if you wish to redownload it
[2025-03-24 14:53:14.021810] ./datasets/NolaTileIndex.zip already exists delete this file if you wish to redownload it


In [13]:
for dl in rawDownload:
    if dl[0][-4:] == '.zip':
        lp(f"Extracting {dl[0]}")
        with zipfile.ZipFile(f"{dataPath}{dl[0]}",'r') as zf:
            zf.extractall(dataPath)
            

[2025-03-24 14:53:14.025879] Extracting WATER2.zip
[2025-03-24 14:53:14.030517] Extracting NolaTileIndex.zip


In [14]:
lp('Done')

[2025-03-24 14:53:14.046062] Done
