#### https://eproptax.saccounty.net/servicev2/eproptax.svc/rest/BillSummary?parcel=00100110050000
#### https://eproptax.saccounty.net/servicev2/eproptax.svc/rest/PaymentHistory?parcel=00100110050000

In [7]:
#!/usr/bin/env python3
import csv
import gzip
import json
import os
import sys
import time
from datetime import datetime
from pathlib import Path

import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

# ——— CONFIG ———
PARCELS_CSV       = r'C:\Users\Dewank Mahajan\Desktop\DKM Business\LowPropTax\AccessorOffice\Sacramento\ParcelCentroids.csv'
OUTPUT_DIR        = Path('scrape_output')
PARSE_OUTPUT_CSV  = 'parse_output.csv'
REQUESTS_PER_SEC  = 2
CURRENT_YEAR      = str(datetime.now().year)
USER_AGENT        = 'Mozilla/5.0 (compatible; CountyDataBot/1.0; +https://example.com/bot)'
TIMEOUT           = 30
# ——————————

# Ensure output dir exists
OUTPUT_DIR.mkdir(exist_ok=True)

# Session with retry/backoff
session = requests.Session()
session.headers.update({'User-Agent': USER_AGENT})
retries = Retry(
    total=5,
    backoff_factor=1,
    status_forcelist=[429, 500, 502, 503, 504, 403],
    allowed_methods=['GET']
)
session.mount('https://', HTTPAdapter(max_retries=retries))

def fetch_and_parse(apn, lon, lat):
    url = f'https://eproptax.saccounty.net/servicev2/eproptax.svc/rest/BillSummary?parcel={apn}'
    out_path = OUTPUT_DIR / f'{apn}.html'

    # 1) Scrape with retries
    try:
        resp = session.get(url, timeout=TIMEOUT)
        resp.raise_for_status()
    except Exception as e:
        print(f"[ERROR] {apn}: {e}")
        return None

    # Save raw
    with gzip.open(out_path, 'wt') as f:
        f.write(resp.text)

    # 2) Parse JSON
    try:
        data = json.loads(resp.text)
    except json.JSONDecodeError:
        print(f"[ERROR] {apn}: invalid JSON")
        return None

    address = data.get('GlobalData', {}).get('Address') or 'UNKNOWN'
    total = 0.0
    for bill in data.get('Bills', []):
        if bill.get('RollDate') == CURRENT_YEAR:
            amt = bill.get('BillAmount', '0').replace(',', '')
            try:
                total += float(amt)
            except ValueError:
                continue

    return {
        'address':  address,
        'apn':      apn,
        'longitude': lon,
        'latitude':  lat,
        'tax':      total if total>0 else -1,
        'county':   'SAC'
    }

def main():
    # Read parcels CSV by header
    with open(PARCELS_CSV, newline='') as f:
        reader = csv.DictReader(f)
        rows = list(reader)

    # Open output CSV
    with open(PARSE_OUTPUT_CSV, 'w', newline='') as fout:
        writer = csv.DictWriter(
            fout,
            fieldnames=['address','apn','longitude','latitude','tax','county']
        )
        writer.writeheader()

        for i, rec in enumerate(rows, 1):
            apn = rec.get('APN_MAIN')
            try:
                lon = float(rec['LONGITUDE'])
                lat = float(rec['LATITUDE'])
            except (KeyError, ValueError):
                # fallback to older column names if needed
                lon = float(rec.get('X_COORD', 0))
                lat = float(rec.get('Y_COORD', 0))

            if not apn:
                continue

            result = fetch_and_parse(apn, lon, lat)
            if result:
                writer.writerow(result)

            # throttle
            if i % REQUESTS_PER_SEC == 0:
                time.sleep(1)

            if i % 1000 == 0:
                print(f"Processed {i}/{len(rows)} parcels")

    print("Finished. Output →", PARSE_OUTPUT_CSV)

if __name__ == '__main__':
    main()


[ERROR] 7236100430000: HTTPSConnectionPool(host='eproptax.saccounty.net', port=443): Max retries exceeded with url: /servicev2/eproptax.svc/rest/BillSummary?parcel=7236100430000 (Caused by ResponseError('too many 500 error responses'))
[ERROR] 7236100410000: HTTPSConnectionPool(host='eproptax.saccounty.net', port=443): Max retries exceeded with url: /servicev2/eproptax.svc/rest/BillSummary?parcel=7236100410000 (Caused by ResponseError('too many 500 error responses'))
[ERROR] 7236100400000: HTTPSConnectionPool(host='eproptax.saccounty.net', port=443): Max retries exceeded with url: /servicev2/eproptax.svc/rest/BillSummary?parcel=7236100400000 (Caused by ResponseError('too many 500 error responses'))
[ERROR] 7236100390000: HTTPSConnectionPool(host='eproptax.saccounty.net', port=443): Max retries exceeded with url: /servicev2/eproptax.svc/rest/BillSummary?parcel=7236100390000 (Caused by ResponseError('too many 500 error responses'))
[ERROR] 7236100380000: HTTPSConnectionPool(host='epropta

KeyboardInterrupt: 

In [8]:
#!/usr/bin/env python3
import gzip
import json
import time
from datetime import datetime
from pathlib import Path

import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

# ——— CONFIG ———
APN               = '00100110050000'
LONGITUDE, LATITUDE = -121.4944, 38.5816   # example coords for Sacramento
OUTPUT_DIR        = Path('scrape_output_test')
USER_AGENT        = 'Mozilla/5.0 (compatible; CountyDataBot/1.0; +https://example.com/bot)'
TIMEOUT           = 30
CURRENT_YEAR      = str(datetime.now().year)
# ——————————

OUTPUT_DIR.mkdir(exist_ok=True)

# Session with retry/backoff
session = requests.Session()
session.headers.update({'User-Agent': USER_AGENT})
retries = Retry(
    total=5,
    backoff_factor=1,
    status_forcelist=[429, 500, 502, 503, 504, 403],
    allowed_methods=['GET']
)
session.mount('https://', HTTPAdapter(max_retries=retries))

def fetch_and_parse(apn, lon, lat):
    url = f'https://eproptax.saccounty.net/servicev2/eproptax.svc/rest/BillSummary?parcel={apn}'
    out_path = OUTPUT_DIR / f'{apn}.html'

    # Scrape
    resp = session.get(url, timeout=TIMEOUT)
    resp.raise_for_status()

    # Save raw
    with gzip.open(out_path, 'wt') as f:
        f.write(resp.text)

    # Parse JSON
    data = json.loads(resp.text)
    address = data.get('GlobalData', {}).get('Address') or 'UNKNOWN'

    total = 0.0
    for bill in data.get('Bills', []):
        if bill.get('RollDate') == CURRENT_YEAR:
            amt = bill.get('BillAmount', '0').replace(',', '')
            try:
                total += float(amt)
            except ValueError:
                pass

    return {
        'address':  address,
        'apn':      apn,
        'longitude': lon,
        'latitude':  lat,
        'tax':      total if total>0 else -1,
        'county':   'SAC'
    }

if __name__ == '__main__':
    result = fetch_and_parse(APN, LONGITUDE, LATITUDE)
    print(json.dumps(result, indent=2))


{
  "address": "231 JIBBOOM ST",
  "apn": "00100110050000",
  "longitude": -121.4944,
  "latitude": 38.5816,
  "tax": -1,
  "county": "SAC"
}
