# Download Datasets

This downloads the different datasets

You will need an APP Token https://dev.socrata.com/docs/app-tokens.html



In [1]:
import os
from datetime import datetime
import requests
from dotenv import load_dotenv
import math
import json

## Datasets below

The datasets below all come from https://data.nola.gov/

In [2]:
datasets = [
    ("paradeRoutes.geojson","https://data.nola.gov/resource/tkeu-9s5e.geojson"),
    ("toiletLocation.geojson","https://data.nola.gov/resource/ekd9-snag.geojson"),
    ("recyclingLocation.geojson","https://data.nola.gov/resource/955d-h748.geojson"),
    ("lostChildren.geojson", "https://data.nola.gov/resource/diq2-v2kc.geojson"),
    #("neighborhoods.geojson","https://data.nola.gov/resource/c8ew-p2c8.geojson"),
    ("streets.geojson","https://data.nola.gov/resource/fdkj-rjrv.geojson"),
    ("firstAidLocations.geojson","https://data.nola.gov/resource/qa73-irdh.geojson"),
    ("neighborhoodDemographics.geojson","https://data.nola.gov/resource/2w4d-xaw2.geojson")
    

]



In [3]:
rawDownload = [
    ("MississippiRiver.zip","https://pubs.usgs.gov/of/1998/of98-805/lpdata/arcview/water.zip")

]

In [4]:
def lp(v):
    print(f"[{datetime.now()}] {v}")

lp("Starting...")

[2025-02-20 21:27:58.205300] Starting...


In [5]:
lp(f"load_dotenv:  {load_dotenv()}")

[2025-02-20 21:27:58.209184] load_dotenv:  True


In [6]:
lp("Loading APP Token")
appToken = os.environ.get("DATA_NOLA_GOV_APPTOKEN")
lp(f"App token {'*' * len(appToken)} loaded")

[2025-02-20 21:27:58.212583] Loading APP Token
[2025-02-20 21:27:58.212792] App token ************************* loaded


In [7]:
dataPath = f"{os.curdir}{os.sep}datasets{os.sep}"
lp(f"Path:  {dataPath}")

[2025-02-20 21:27:58.216648] Path:  ./datasets/


In [8]:
if not os.path.exists(dataPath):
    lp(f"Creating path {dataPath}")
    os.makedirs(dataPath)

else:
    lp(f"{dataPath} already exists")
    

[2025-02-20 21:27:58.220204] ./datasets/ already exists


In [9]:
lp("Creating sessions")
session = requests.Session()
session.headers.update({
    'X-App-Token':appToken,
    'Accept':'application/json'
})

[2025-02-20 21:27:58.224133] Creating sessions


In [10]:
def getDataSet(outFile, url, limit = 1000, maxCycle = math.inf):


    if os.path.exists(f"{dataPath}{outFile}"):
        lp(f"{dataPath}{outFile} already exists delete this file if you wish to redownload it")
        return False


    lp(f"Writting to {dataPath}{outFile}")
    

    totalLen = 0
    lastLen = 1
    offset = 0
    cycle = 0
    fullData = { "type": "FeatureCollection", "features":[]}
    while lastLen > 0 and cycle < maxCycle:
    
        lp(f"Getting content from {url}  limit={limit} offset={offset}")
        resp = session.get(url, params={'$limit':limit,'$offset':offset,'$order':':id'})
        resp.raise_for_status()

        
        outData = json.loads(resp.text)

        
        fullData['features'] += outData['features']

        lastLen = len(outData['features'])
        totalLen += lastLen

        lp(f"{len(resp.content)} bytes downloaded {lastLen} features, total features downloaded {totalLen}")
        

        cycle += 1
        offset += limit


    with open(f"{dataPath}{outFile}", "w") as f:
        json.dump(fullData,f)

    lp(f"Finished writing {totalLen} rows to {outFile}")
    
    return fullData
    

In [11]:
for dataset in datasets:
    resp = getDataSet(*dataset)

[2025-02-20 21:27:58.235282] ./datasets/paradeRoutes.geojson already exists delete this file if you wish to redownload it
[2025-02-20 21:27:58.235336] ./datasets/toiletLocation.geojson already exists delete this file if you wish to redownload it
[2025-02-20 21:27:58.235351] ./datasets/recyclingLocation.geojson already exists delete this file if you wish to redownload it
[2025-02-20 21:27:58.235362] ./datasets/lostChildren.geojson already exists delete this file if you wish to redownload it
[2025-02-20 21:27:58.235372] ./datasets/streets.geojson already exists delete this file if you wish to redownload it
[2025-02-20 21:27:58.235382] ./datasets/firstAidLocations.geojson already exists delete this file if you wish to redownload it
[2025-02-20 21:27:58.235391] ./datasets/neighborhoodDemographics.geojson already exists delete this file if you wish to redownload it


In [14]:
for dl in rawDownload:
    
    if os.path.exists(f"{dataPath}{dl[0]}"):
        lp(f"{dataPath}{dl[0]} already exists delete this file if you wish to redownload it")
        continue

    
    lp(f"Downloading {dl[1]} -> {dl[0]}")
    resp = requests.get(dl[1])
    resp.raise_for_status()

    lp(f"{len(resp.content)} bytes downloaded")
    
    with open(f"{dataPath}{dl[0]}", "wb") as f:
        f.write(resp.content)
    

[2025-02-20 21:28:28.692993] ./datasets/MississippiRiver.zip already exists delete this file if you wish to redownload it


In [13]:
lp("Done")

[2025-02-20 21:27:58.245496] Done
