# Download Datasets

This downloads the different datasets

You will need an APP Token https://dev.socrata.com/docs/app-tokens.html



In [1]:
import os
from datetime import datetime
import requests
from dotenv import load_dotenv
import math
import json

In [2]:
datasets = [
    ("paradeRoutes.geojson","https://data.nola.gov/resource/tkeu-9s5e.geojson"),
    ("toiletLocation.geojson","https://data.nola.gov/resource/ekd9-snag.geojson"),
    ("recyclingLocation.geojson","https://data.nola.gov/resource/955d-h748.geojson"),
    ("lostChildren.geojson", "https://data.nola.gov/resource/diq2-v2kc.geojson"),
    #("neighborhoods.geojson","https://data.nola.gov/resource/c8ew-p2c8.geojson"),
    ("streets.geojson","https://data.nola.gov/resource/fdkj-rjrv.geojson")
    

]



In [3]:
def lp(v):
    print(f"[{datetime.now()}] {v}")

lp("Starting...")

[2025-02-20 18:40:50.688891] Starting...


In [4]:
lp(f"load_dotenv:  {load_dotenv()}")

[2025-02-20 18:40:50.693983] load_dotenv:  True


In [5]:
lp("Loading APP Token")
appToken = os.environ.get("DATA_NOLA_GOV_APPTOKEN")
lp(f"App token {'*' * len(appToken)} loaded")

[2025-02-20 18:40:50.697431] Loading APP Token
[2025-02-20 18:40:50.697597] App token ************************* loaded


In [6]:
dataPath = f"{os.curdir}{os.sep}datasets{os.sep}"
lp(f"Path:  {dataPath}")

[2025-02-20 18:40:50.701010] Path:  ./datasets/


In [7]:
if not os.path.exists(dataPath):
    lp(f"Creating path {dataPath}")
    os.makedirs(dataPath)

else:
    lp(f"{dataPath} already exists")
    

[2025-02-20 18:40:50.705272] ./datasets/ already exists


In [8]:
lp("Creating sessions")
session = requests.Session()
session.headers.update({
    'X-App-Token':appToken,
    'Accept':'application/json'
})

[2025-02-20 18:40:50.710186] Creating sessions


In [11]:
def getDataSet(outFile, url, limit = 1000, maxCycle = math.inf):


    if os.path.exists(f"{dataPath}{outFile}"):
        lp(f"{dataPath}{outFile} already exists delete this file if you wish to redownload it")
        return False


    lp(f"Writting to {dataPath}{outFile}")
    

    totalLen = 0
    lastLen = 1
    offset = 0
    cycle = 0
    fullData = { "type": "FeatureCollection", "features":[]}
    while lastLen > 0 and cycle < maxCycle:
    
        lp(f"Getting content from {url}  limit={limit} offset={offset}")
        resp = session.get(url, params={'$limit':limit,'$offset':offset,'$order':':id'})
        resp.raise_for_status()

        
        outData = json.loads(resp.text)

        
        fullData['features'] += outData['features']

        lastLen = len(outData['features'])
        totalLen += lastLen

        lp(f"{len(resp.content)} bytes downloaded {lastLen} features, total features downloaded {totalLen}")
        

        cycle += 1
        offset += limit


    with open(f"{dataPath}{outFile}", "w") as f:
        json.dump(fullData,f)

    lp(f"Finished writing {totalLen} bytes to {outFile}")
    
    return fullData
    

In [12]:
for dataset in datasets:
    resp = getDataSet(*dataset)

[2025-02-20 18:45:02.486595] ./datasets/paradeRoutes.geojson already exists delete this file if you wish to redownload it
[2025-02-20 18:45:02.487321] ./datasets/toiletLocation.geojson already exists delete this file if you wish to redownload it
[2025-02-20 18:45:02.487341] ./datasets/recyclingLocation.geojson already exists delete this file if you wish to redownload it
[2025-02-20 18:45:02.487353] ./datasets/lostChildren.geojson already exists delete this file if you wish to redownload it
[2025-02-20 18:45:02.487369] Writting to ./datasets/streets.geojson
[2025-02-20 18:45:02.487379] Getting content from https://data.nola.gov/resource/fdkj-rjrv.geojson  limit=1000 offset=0
[2025-02-20 18:45:03.565776] 980510 bytes downloaded 1000 features, total features downloaded 1000
[2025-02-20 18:45:03.565855] Getting content from https://data.nola.gov/resource/fdkj-rjrv.geojson  limit=1000 offset=1000
[2025-02-20 18:45:04.155641] 978556 bytes downloaded 1000 features, total features downloaded 2

In [None]:
lp("Done")