<a href="https://colab.research.google.com/github/CargoCultScientist/ny-opendata/blob/main/Task2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Task 2 — Active Corporations (NY Open Data)
# A) Print the 10 most recent additions (sorted by Initial DOS Filing Date)
# B) Download the full JSON snapshot to disk
#
# Requirements: store your credentials in Colab Secrets as:
#   NY_API_KEY_ID        → the API Key ID
#   NY_API_KEY_SECRET    → the API Key Secret

from google.colab import userdata
from requests.auth import HTTPBasicAuth
import requests, json
from datetime import datetime
import pandas as pd # Import pandas for table output

# ---- Secrets and config ------------------------------------------------------
API_ID = userdata.get("NY_API_KEY_ID")
API_SECRET = userdata.get("NY_API_KEY_SECRET")
if not API_ID or not API_SECRET:
    raise RuntimeError("Missing secrets. Add NY_API_KEY_ID and NY_API_KEY_SECRET in Runtime → Secrets.")

auth = HTTPBasicAuth(API_ID, API_SECRET)

DOMAIN = "data.ny.gov"
DATASET_ID = "n9v6-gdp6"  # Active Corporations: Beginning 1800

FIELDS = [
    "dos_id",
    "current_entity_name",
    "initial_dos_filing_date",
    "county",
    "jurisdiction",
    "entity_type",
]

# ---- A) Fetch 10 newest by filing date (dataset stores date as text MM/DD/YYYY) -----------
# Use SoQL to parse the text date to a real timestamp on the server, then order DESC.
resource_url = f"https://{DOMAIN}/resource/{DATASET_ID}.json"
params = {
    "$select": ",".join(FIELDS),
    "$order": "initial_dos_filing_date DESC",
    "$limit": 10,
}

resp = requests.get(resource_url, params=params, auth=auth, timeout=60)
resp.raise_for_status()
rows = resp.json()

# Create a pandas DataFrame for better display
df = pd.DataFrame(rows)

# Print results as a table
print("10 newest by Initial DOS Filing Date:")
display(df)

# Save the 10 records to a small JSON file (handy for submission or checks)
with open("/content/latest_10_active_corporations.json", "w", encoding="utf-8") as f:
    json.dump(rows, f, ensure_ascii=False, indent=2)
print("Saved: /content/latest_10_active_corporations.json")

# ---- B) Download full dataset snapshot ---------------------------------------
# This endpoint returns the entire dataset export as JSON.
snapshot_url = f"https://{DOMAIN}/api/views/{DATASET_ID}/rows.json?accessType=DOWNLOAD"
out_path = "/content/active_corporations_latest.json"

with requests.get(snapshot_url, auth=auth, stream=True, timeout=300) as r2:
    r2.raise_for_status()
    with open(out_path, "wb") as f:
        for chunk in r2.iter_content(chunk_size=1 << 20):
            if chunk:
                f.write(chunk)

print(f"Saved full dataset snapshot to: {out_path}")

10 newest by Initial DOS Filing Date:


Unnamed: 0,dos_id,current_entity_name,initial_dos_filing_date,county,jurisdiction,entity_type
0,7743016,104 SAG NO MORE LLC,2025-10-25T00:00:00.000,Essex,New York,DOMESTIC LIMITED LIABILITY COMPANY
1,7742952,1780 SK LLC,2025-10-25T00:00:00.000,Westchester,New York,DOMESTIC LIMITED LIABILITY COMPANY
2,7742958,1908 DELI INC.,2025-10-25T00:00:00.000,Nassau,New York,DOMESTIC BUSINESS CORPORATION
3,7742945,1914 HUTCHINSON PKWY LLC,2025-10-25T00:00:00.000,Westchester,New York,DOMESTIC LIMITED LIABILITY COMPANY
4,7742966,2514 DELI INC.,2025-10-25T00:00:00.000,Nassau,New York,DOMESTIC BUSINESS CORPORATION
5,7742980,312 STRATEGIES LLC,2025-10-25T00:00:00.000,New York,New York,DOMESTIC LIMITED LIABILITY COMPANY
6,7742949,44-46 ELMWOOD LLC,2025-10-25T00:00:00.000,Westchester,New York,DOMESTIC LIMITED LIABILITY COMPANY
7,7742938,44TH STREET FILMS LLC,2025-10-25T00:00:00.000,Queens,New York,DOMESTIC LIMITED LIABILITY COMPANY
8,7743026,88 JZC CHINA WOK LLC,2025-10-25T00:00:00.000,Kings,New York,DOMESTIC LIMITED LIABILITY COMPANY
9,7743040,A.M SATTAR CORPORATION,2025-10-25T00:00:00.000,Suffolk,New York,DOMESTIC BUSINESS CORPORATION


Saved: /content/latest_10_active_corporations.json
Saved full dataset snapshot to: /content/active_corporations_latest.json
