In [1]:
# Import Python packages and set from and to dates

import requests
import json
import pandas as pd
import numpy as np
import time


# ---- date window ----
FROM_DATE = "2025-08-01"   # YYYY-MM-DD
TO_DATE   = "2025-08-20"   # YYYY-MM-DD (today or last Friday, etc.)



In [4]:
# Import PTAB Proceedings

import time
BASE_PROCS = "https://developer.uspto.gov/ptab-api/proceedings"

def fetch_proceedings(from_date, to_date, page_size=200):
    all_results, start = [], 0
    while True:
        params = {
            "proceedingFromDate": from_date,
            "proceedingToDate": to_date,
            "recordStartNumber": start,
            "recordTotalQuantity": page_size,
        }
        r = requests.get(BASE_PROCS, params=params, timeout=60)
        r.raise_for_status()
        page = r.json().get("results", [])
        if not page:
            break
        all_results.extend(page)
        start += page_size
        time.sleep(0.02)  # polite throttle
    return {"results": all_results}

raw_procs = fetch_proceedings(FROM_DATE, TO_DATE)

# Normalize TC inside proceedings so it’s ready downstream
procs_df = pd.DataFrame(raw_procs.get("results", []))
print("Proceedings fetched:", len(procs_df))

if not procs_df.empty:
    # Coalesce any *TechnologyCenterNumber fields (e.g., appellant/respondent)
    tc_cols = [c for c in procs_df.columns if c.lower().endswith("technologycenternumber")]
    if tc_cols:
        procs_df["technologyCenterRaw"] = procs_df[tc_cols].bfill(axis=1).iloc[:, 0]
    else:
        procs_df["technologyCenterRaw"] = np.nan

    # Extract the 4-digit TC code
    procs_df["technologyCenterNumber"] = (
        procs_df["technologyCenterRaw"]
            .astype("string")
            .str.extract(r"(\d{4})", expand=False)
            .str.strip()
    )
else:
    # Ensure columns exist even if no rows (keeps downstream happy)
    procs_df["technologyCenterRaw"] = pd.Series(dtype="string")
    procs_df["technologyCenterNumber"] = pd.Series(dtype="string")

raw_procs = {"results": procs_df.to_dict(orient="records")}

Proceedings fetched: 199271


In [3]:
# Imports PTAB decisions

import time
BASE_DECS = "https://developer.uspto.gov/ptab-api/decisions"

def fetch_decisions(from_date, to_date, page_size=200):
    all_results, start = [], 0
    while True:
        params = {
            "decisionFromDate": from_date,
            "decisionToDate": to_date,
            "recordStartNumber": start,
            "recordTotalQuantity": page_size,
        }
        r = requests.get(BASE_DECS, params=params, timeout=60)
        r.raise_for_status()
        page = r.json().get("results", [])
        if not page:
            break
        all_results.extend(page)
        start += page_size
        time.sleep(0.02)
    return {"results": all_results}

raw_decs = fetch_decisions(FROM_DATE, TO_DATE)
     

In [5]:
# Save Json objects from both fetches separately in PTAB_Project_Data/Raw folder

from pathlib import Path
from datetime import datetime
import json

# 0) Make sure the fetch cells actually ran in THIS session
for name in ("raw_procs", "raw_decs"):
    if name not in globals():
        raise NameError(f"{name} is not defined. Run the fetch cells first (Cells 2 and 3).")

procs_count = len(raw_procs.get("results", []))
decs_count  = len(raw_decs.get("results", []))
print(f"Counts → proceedings: {procs_count} | decisions: {decs_count}")

# 1) Check where we are and where we're writing
from pathlib import Path
print("CWD:", Path.cwd().resolve())

outdir = Path("../PTAB_Project_Data/Raw")
print("Target Raw dir:", outdir.resolve())
outdir.mkdir(parents=True, exist_ok=True)

# 2) Save with timestamp so we don't overwrite silently
stamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")

procs_path = outdir / f"proceedings_{FROM_DATE}_to_{TO_DATE}_{stamp}.json"
decs_path  = outdir / f"decisions_{FROM_DATE}_to_{TO_DATE}_{stamp}.json"

with procs_path.open("w") as f:
    json.dump(raw_procs, f, indent=2)
with decs_path.open("w") as f:
    json.dump(raw_decs, f, indent=2)

print("✅ Saved to:", outdir.resolve())
print("  -", procs_path.name)
print("  -", decs_path.name)

# 3) List what's in that folder now (last few files)
print("Files in Raw:", [p.name for p in sorted(outdir.glob('*.json'))][-10:])

Counts → proceedings: 199271 | decisions: 488
CWD: /Users/rellu/Documents/PTAB_Project/ABB_Projects/AI-Builders-Bootcamp-5/PTAB_Project_Notebooks
Target Raw dir: /Users/rellu/Documents/PTAB_Project/ABB_Projects/AI-Builders-Bootcamp-5/PTAB_Project_Data/Raw
✅ Saved to: /Users/rellu/Documents/PTAB_Project/ABB_Projects/AI-Builders-Bootcamp-5/PTAB_Project_Data/Raw
  - proceedings_2025-08-01_to_2025-08-20_2025-08-21_091356.json
  - decisions_2025-08-01_to_2025-08-20_2025-08-21_091356.json
Files in Raw: ['decisions_2025-08-01_to_2025-08-20_2025-08-21_091356.json', 'proceedings_2025-08-01_to_2025-08-20_2025-08-21_091356.json']
