# Download Datasets

This downloads the different datasets

You will need an APP Token https://dev.socrata.com/docs/app-tokens.html



In [1]:
import os
from datetime import datetime
import requests
from dotenv import load_dotenv
import math
import json

## Datasets below

The datasets below all come from https://data.nola.gov/

In [2]:
datasets = [
    
    ("Parcels.geojson","https://data.nola.gov/resource/v9q5-fz7t.geojson")

]



In [3]:
def lp(v):
    print(f"[{datetime.now()}] {v}")

lp("Starting...")

[2025-03-23 11:13:28.605250] Starting...


In [4]:
lp(f"load_dotenv:  {load_dotenv()}")

[2025-03-23 11:13:28.609126] load_dotenv:  True


In [5]:
lp("Loading APP Token")
appToken = os.environ.get("DATA_NOLA_GOV_APPTOKEN")
lp(f"App token {'*' * len(appToken)} loaded")

[2025-03-23 11:13:28.612398] Loading APP Token
[2025-03-23 11:13:28.612654] App token ************************* loaded


In [6]:
dataPath = f"{os.curdir}{os.sep}datasets{os.sep}"
lp(f"Path:  {dataPath}")

[2025-03-23 11:13:28.616627] Path:  ./datasets/


In [7]:
if not os.path.exists(dataPath):
    lp(f"Creating path {dataPath}")
    os.makedirs(dataPath)

else:
    lp(f"{dataPath} already exists")
    

[2025-03-23 11:13:28.619850] ./datasets/ already exists


In [8]:
lp("Creating sessions")
session = requests.Session()
session.headers.update({
    'X-App-Token':appToken,
    'Accept':'application/json'
})

[2025-03-23 11:13:28.622924] Creating sessions


In [9]:
def getDataSet(outFile, url, limit = 1000, maxCycle = math.inf):


    if os.path.exists(f"{dataPath}{outFile}"):
        lp(f"{dataPath}{outFile} already exists delete this file if you wish to redownload it")
        return False


    lp(f"Writting to {dataPath}{outFile}")
    

    totalLen = 0
    lastLen = 1
    offset = 0
    cycle = 0
    fullData = { "type": "FeatureCollection", "features":[]}
    while lastLen > 0 and cycle < maxCycle:
    
        lp(f"Getting content from {url}  limit={limit} offset={offset}")
        resp = session.get(url, params={'$limit':limit,'$offset':offset,'$order':':id'})
        resp.raise_for_status()

        
        outData = json.loads(resp.text)

        
        fullData['features'] += outData['features']

        lastLen = len(outData['features'])
        totalLen += lastLen

        lp(f"{len(resp.content)} bytes downloaded {lastLen} features, total features downloaded {totalLen}")
        

        cycle += 1
        offset += limit


    with open(f"{dataPath}{outFile}", "w") as f:
        json.dump(fullData,f)

    lp(f"Finished writing {totalLen} rows to {outFile}")
    
    return fullData
    

In [10]:
for dataset in datasets:
    resp = getDataSet(*dataset)

[2025-03-23 11:13:28.635466] ./datasets/Parcels.geojson already exists delete this file if you wish to redownload it


In [11]:
lp("Done")

[2025-03-23 11:13:28.639415] Done
