In [7]:
#!/usr/bin/env python3
from __future__ import annotations

import csv
import json
import random
import re
import sys
import time
from dataclasses import dataclass, asdict
from typing import Any, Dict, List, Optional, Tuple
from urllib.parse import quote, urljoin

import requests
from bs4 import BeautifulSoup

# =========================================================
# Config
# =========================================================

ZILLOW_BASE = "https://www.zillow.com"

UA_LIST = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
]

def _rand_ua() -> str:
    return random.choice(UA_LIST)

def build_headers(referer: Optional[str] = None) -> Dict[str, str]:
    h = {
        "User-Agent": _rand_ua(),
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.9",
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
    }
    if referer:
        h["Referer"] = referer
    return h

def make_session(proxies: Optional[Dict[str, str]] = None, referer: Optional[str] = None) -> requests.Session:
    s = requests.Session()
    s.headers.update(build_headers(referer=referer))
    if proxies:
        s.proxies.update(proxies)
    return s

def prewarm_session(session: requests.Session, slug: str) -> None:
    try:
        session.get(ZILLOW_BASE, timeout=15)
        session.get(f"{ZILLOW_BASE}/{quote(slug)}/", timeout=20)
    except requests.RequestException:
        pass

# =========================================================
# Utilities
# =========================================================

def slugify_place(place: str) -> str:
    s = place.lower().strip()
    s = re.sub(r"[.,]", "", s)
    s = re.sub(r"\s+", "-", s)
    return s

def zillow_search_url(slug: str, category_path: str = "") -> str:
    path = f"/{quote(slug)}/"
    if category_path:
        path += f"{category_path.strip('/')}/"
    return f"{ZILLOW_BASE}{path}"

def _safe_float(x: Any) -> Optional[float]:
    if x is None:
        return None
    if isinstance(x, (int, float)):
        return float(x)
    if isinstance(x, str):
        s = re.sub(r"[^\d.\-]", "", x.replace(",", ""))
        try:
            return float(s) if s not in {"", "-", "--"} else None
        except ValueError:
            return None
    return None

def _safe_int(x: Any) -> Optional[int]:
    f = _safe_float(x)
    return int(f) if f is not None else None

def _money_to_float(s: Optional[str]) -> Optional[float]:
    if not s:
        return None
    s = s.strip().replace(",", "")
    m = re.search(r"[-+]?\$?([0-9]+(?:\.[0-9]+)?)", s)
    return float(m.group(1)) if m else None

def _norm_text(t: str) -> str:
    return re.sub(r"\s+", " ", t).strip()

def request_html(url: str, session: requests.Session, max_retries: int = 4, sleep_base: float = 1.4) -> str:
    last_status = None
    for attempt in range(1, max_retries + 1):
        try:
            resp = session.get(url, timeout=30)
            last_status = resp.status_code
            if resp.status_code == 200 and resp.text:
                return resp.text
            if resp.status_code in {403, 429}:
                session.headers.update(build_headers(referer=url))
                time.sleep(sleep_base * attempt + random.uniform(0.8, 2.2))
                continue
            time.sleep(0.6 * attempt + random.uniform(0.1, 0.5))
        except requests.RequestException:
            time.sleep(0.9 * attempt + random.uniform(0.1, 0.6))
    raise RuntimeError(f"GET {url} failed after {max_retries} attempts (last={last_status})")

# =========================================================
# Search page parsing (SRP)
# =========================================================

def _json_from_next_data(html: str) -> Optional[dict]:
    soup = BeautifulSoup(html, "html.parser")
    script = soup.find("script", id="__NEXT_DATA__")
    if script and script.text:
        try:
            return json.loads(script.text)
        except json.JSONDecodeError:
            return None
    return None

def _regex_searchPageState(html: str) -> Optional[dict]:
    m = re.search(r'"searchPageState"\s*:\s*(\{.*?\})\s*,\s*"isBot"', html, re.DOTALL)
    if not m:
        return None
    try:
        return json.loads(m.group(1))
    except Exception:
        return None

def _extract_cat1(next_data: dict, html: str) -> dict:
    if not isinstance(next_data, dict):
        next_data = {}
    props = next_data.get("props", {})
    page_props = props.get("pageProps", {}) if isinstance(props, dict) else {}
    search_page = (
        page_props.get("searchPageData")
        or page_props.get("componentProps")
        or page_props.get("__APOLLO_STATE__")
        or page_props
    )
    cat1 = None
    if isinstance(search_page, dict):
        cat1 = search_page.get("cat1") or page_props.get("searchPageState", {}).get("cat1")
    if not isinstance(cat1, dict):
        sps = _regex_searchPageState(html)
        if isinstance(sps, dict):
            cat1 = sps.get("cat1", {})
    return cat1 if isinstance(cat1, dict) else {}

def _next_url_from_dom(html: str) -> Optional[str]:
    soup = BeautifulSoup(html, "html.parser")
    a = soup.select_one('a[rel="next"][href]') or soup.select_one('a[title="Next page"][href]')
    if a and a.get("href"):
        return a["href"]
    for tag in soup.select("a[href]"):
        if (tag.get_text(strip=True) or "").lower() in {"next", "next page", ">"} and tag.get("href"):
            return tag["href"]
    return None

def extract_results(html: str) -> Tuple[List[dict], Optional[int], Optional[int], Optional[int], Optional[str]]:
    next_data = _json_from_next_data(html)
    cat1 = _extract_cat1(next_data or {}, html)

    sr = cat1.get("searchResults", {}) if isinstance(cat1, dict) else {}
    list_results = sr.get("listResults") or []
    map_results  = sr.get("mapResults") or []

    # de-dupe
    seen = set()
    combined = []
    for r in list_results + map_results:
        zpid = r.get("zpid")
        key = str(zpid) if zpid is not None else id(r)
        if key in seen:
            continue
        seen.add(key)
        combined.append(r)

    pag = sr.get("pagination", {}) if isinstance(sr, dict) else {}
    current_page = _safe_int(pag.get("currentPage"))
    total_pages  = _safe_int(pag.get("totalPages"))
    total_count  = sr.get("totalResultCount") if isinstance(sr.get("totalResultCount"), int) else None

    next_url = pag.get("nextUrl") if isinstance(pag, dict) else None
    if not next_url and isinstance(pag, dict):
        pages = pag.get("pages")
        if isinstance(pages, list):
            for p in pages:
                if isinstance(p, dict) and p.get("rel") == "next" and p.get("href"):
                    next_url = p["href"]
                    break
            if not next_url and current_page is not None:
                want = str(current_page + 1)
                for p in pages:
                    if isinstance(p, dict) and p.get("title") == want and p.get("href"):
                        next_url = p["href"]
                        break
    if not next_url:
        next_url = _next_url_from_dom(html)

    return combined, current_page, total_pages, total_count, next_url

# =========================================================
# Data models
# =========================================================

@dataclass
class Listing:
    zpid: Optional[str]
    address: Optional[str]
    city: Optional[str]
    state: Optional[str]
    zipcode: Optional[str]
    price: Optional[str]
    price_numeric: Optional[float]
    beds: Optional[float]
    baths: Optional[float]
    area_sqft: Optional[float]
    latitude: Optional[float]
    longitude: Optional[float]
    status_type: Optional[str]
    status_text: Optional[str]
    home_type: Optional[str]
    year_built: Optional[int]
    lot_area_value: Optional[float]
    lot_area_unit: Optional[str]
    days_on_zillow: Optional[float]
    url: Optional[str]
    img_url: Optional[str]

    @staticmethod
    def from_result(result: Dict[str, Any]) -> "Listing":
        detail_url = result.get("detailUrl")
        if detail_url and detail_url.startswith("/"):
            detail_url = f"{ZILLOW_BASE}{detail_url}"
        latlong = result.get("latLong") or {}
        price_raw = result.get("price") or result.get("unformattedPrice")
        price_num = _safe_float(price_raw) if not isinstance(price_raw, (int, float)) else float(price_raw)
        city = result.get("city") or result.get("addressCity")
        state = result.get("state") or result.get("addressState")
        zipcode = result.get("zipcode") or result.get("addressZipcode")
        baths = result.get("baths") or result.get("bathrooms")
        beds = result.get("beds") or result.get("bedrooms")
        area = result.get("area") or result.get("livingArea")
        home_type = result.get("homeType") or result.get("hdpData", {}).get("homeInfo", {}).get("homeType")
        year_built = result.get("yearBuilt") or result.get("hdpData", {}).get("homeInfo", {}).get("yearBuilt")
        lot_area_value = result.get("lotAreaValue") or result.get("hdpData", {}).get("homeInfo", {}).get("lotAreaValue")
        lot_area_unit = result.get("lotAreaUnit") or result.get("hdpData", {}).get("homeInfo", {}).get("lotAreaUnit")
        days_on = result.get("daysOnZillow") or result.get("timeOnZillow")

        return Listing(
            zpid=str(result.get("zpid")) if result.get("zpid") is not None else None,
            address=result.get("address") or result.get("addressStreet"),
            city=city, state=state, zipcode=zipcode,
            price=str(price_raw) if price_raw is not None else None,
            price_numeric=price_num,
            beds=_safe_float(beds), baths=_safe_float(baths),
            area_sqft=_safe_float(area),
            latitude=_safe_float(latlong.get("latitude")),
            longitude=_safe_float(latlong.get("longitude")),
            status_type=result.get("statusType"),
            status_text=(result.get("statusText") or result.get("variableData", {}).get("text")),
            home_type=home_type,
            year_built=_safe_int(year_built),
            lot_area_value=_safe_float(lot_area_value),
            lot_area_unit=lot_area_unit,
            days_on_zillow=_safe_float(days_on),
            url=detail_url, img_url=result.get("imgSrc"),
        )

# =========================================================
# Detail page parsing (JSON + DOM)
# =========================================================

def _get_next_data_soup(html: str) -> Optional[dict]:
    soup = BeautifulSoup(html, "html.parser")
    tag = soup.find("script", id="__NEXT_DATA__")
    if not tag or not tag.text:
        return None
    try:
        return json.loads(tag.text)
    except json.JSONDecodeError:
        return None

def _parse_gdp_client_cache(next_data: dict) -> Optional[dict]:
    """
    Inside __NEXT_DATA__:
      props.pageProps.componentProps.gdpClientCache  (stringified JSON)
    Returns the nested 'property' node if present.
    """
    try:
        props = next_data.get("props", {})
        page_props = props.get("pageProps", {}) or {}
        comp = page_props.get("componentProps", {}) or {}
        gdp_cache_str = comp.get("gdpClientCache")
        if not gdp_cache_str:
            return None
        cache_obj = json.loads(gdp_cache_str)
    except Exception:
        return None

    for _, v in cache_obj.items():
        if isinstance(v, dict) and "property" in v and isinstance(v["property"], dict):
            return v["property"]
    return None

def _flatten_reso_facts(reso_facts: dict) -> Dict[str, Any]:
    flat = {}
    def walk(prefix: str, obj: Any):
        if isinstance(obj, dict):
            for k,v in obj.items():
                walk(f"{prefix}.{k}" if prefix else k, v)
        elif isinstance(obj, list):
            flat[prefix] = "; ".join([str(x) for x in obj])
        else:
            flat[prefix] = obj
    walk("", reso_facts or {})
    return flat

def _grouped_facts_from_dom(soup: BeautifulSoup) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
    groups_out = []
    flat = {}
    for group in soup.select("[data-testid='category-group']"):
        group_heading = _norm_text(group.find("h3").get_text(" ", strip=True)) if group.find("h3") else None
        for cat in group.select("[data-testid='fact-category']"):
            cat_heading = _norm_text(cat.find("h6").get_text(" ", strip=True)) if cat.find("h6") else None
            facts = []
            for li in cat.select("ul li"):
                txt = _norm_text(li.get_text(" ", strip=True))
                if not txt: 
                    continue
                facts.append(txt)
                key = f"{group_heading or 'Facts'} | {cat_heading or 'Category'} | {len(facts)}"
                flat[key] = txt
            if facts:
                groups_out.append({
                    "group": group_heading, "category": cat_heading, "facts": facts
                })
    return groups_out, flat

def _parse_price_history_table(soup: BeautifulSoup) -> List[Dict[str, Any]]:
    out = []
    for tbl in soup.find_all("table"):
        thead = tbl.find("thead")
        if not thead:
            continue
        headers = [ _norm_text(th.get_text(" ", strip=True)).lower() for th in thead.find_all("th") ]
        if not headers:
            continue
        if not {"date","event","price"}.issubset(set([h.split()[0] for h in headers])):
            continue
        tbody = tbl.find("tbody")
        if not tbody:
            continue
        last_entry = None
        for tr in tbody.find_all("tr", recursive=False):
            tds = tr.find_all("td", recursive=False)
            # Handle the "Source:" rows (colspan=3)
            if len(tds) == 1 and (tds[0].get("colspan") == "3"):
                if last_entry is not None:
                    src = _norm_text(tds[0].get_text(" ", strip=True))
                    src = re.sub(r"^\s*Source:\s*", "", src, flags=re.I)
                    last_entry["source_text"] = src
                continue
            if len(tds) < 3:
                continue
            date_td, event_td, price_td = tds[:3]
            date_span = date_td.select_one('span[data-testid="date-info"]')
            date_text = _norm_text(date_span.get_text(strip=True) if date_span else date_td.get_text(" ", strip=True))
            event_text = _norm_text(event_td.get_text(" ", strip=True))
            price_span = price_td.select_one('span[class*="StyledPriceText"]')
            if price_span:
                price_text = _norm_text(price_span.get_text(strip=True))
            else:
                price_text = _norm_text(price_td.get_text(" ", strip=True))
                m = re.search(r"\$\s*[\d,]+", price_text)
                price_text = m.group(0) if m else price_text
            pct_span = price_td.select_one('[data-testid="percent-display"]')
            pct_text = _norm_text(pct_span.get_text(strip=True)) if pct_span else None
            psf_span = price_td.select_one('[class*="StyledSqftLine"]')
            psf_text = _norm_text(psf_span.get_text(strip=True)) if psf_span else None
            out.append({
                "date": date_text or None,
                "event": event_text or None,
                "price_text": price_text or None,
                "price_numeric": _money_to_float(price_text),
                "change_pct": pct_text,
                "price_per_sqft_text": psf_text,
                "price_per_sqft": _money_to_float(psf_text.split("/")[0]) if psf_text else None,
            })
            last_entry = out[-1]
        if out:
            break
    return out

def parse_detail_page(html: str, *, verbose: bool = False) -> Dict[str, Any]:
    soup = BeautifulSoup(html, "html.parser")
    next_data = _get_next_data_soup(html)

    out: Dict[str, Any] = {
        "address_full": None,
        "zestimate": None,
        "facts_groups": [],
        "facts_flat": {},
        "price_history": [],
        "photos": [],
    }

    # address headline best-effort
    h1 = soup.find(["h1", "h2"])
    if h1:
        out["address_full"] = _norm_text(h1.get_text(" ", strip=True))

    # JSON path
    prop = _parse_gdp_client_cache(next_data) if next_data else None
    if prop:
        # zestimate via adTargets
        adt = prop.get("adTargets") or {}
        zesti = adt.get("zestimate")
        try:
            out["zestimate"] = float(zesti) if zesti is not None else None
        except Exception:
            out["zestimate"] = zesti

        # photos
        photos = prop.get("responsivePhotos") or []
        out["photos"] = [p.get("url") for p in photos if isinstance(p, dict) and p.get("url")]

        # facts (resoFacts)
        reso = prop.get("resoFacts")
        if isinstance(reso, dict):
            out["facts_flat"] = _flatten_reso_facts(reso)

        # price history (if present)
        ph = prop.get("priceHistory")
        if isinstance(ph, list) and ph:
            rows = []
            for ev in ph:
                rows.append({
                    "date": ev.get("dateString") or ev.get("date"),
                    "event": ev.get("event") or ev.get("eventDescription"),
                    "price_text": ev.get("priceString") or ev.get("price"),
                    "price_numeric": (float(ev.get("price")) if isinstance(ev.get("price"), (int, float)) else _money_to_float(ev.get("priceString") or "")),
                })
            out["price_history"] = rows

    # DOM fallbacks
    if not out["facts_flat"] and soup.select("[data-testid='category-group']"):
        groups, flat = _grouped_facts_from_dom(soup)
        out["facts_groups"] = groups
        out["facts_flat"] = flat

    if not out["price_history"]:
        out["price_history"] = _parse_price_history_table(soup)

    if verbose:
        print(f"[facts] flat keys: {len(out['facts_flat'])} | price_history rows: {len(out['price_history'])}")

    return out

def scrape_detail(url: str, session: requests.Session, *, verbose: bool = False) -> Dict[str, Any]:
    html = request_html(url, session=session)
    return parse_detail_page(html, verbose=verbose)

# =========================================================
# Crawlers
# =========================================================

def scrape_region(
    place: Optional[str] = None,
    slug: Optional[str] = None,
    category_path: str = "houses",
    max_pages: Optional[int] = 100,
    delay_range: Tuple[float, float] = (2.3, 4.6),
    proxies: Optional[Dict[str, str]] = None,
    verbose: bool = True,
    stagnant_limit: int = 3,
) -> List[Listing]:
    if not slug:
        if not place:
            raise ValueError("Provide either place or slug")
        slug = slugify_place(place)

    referer = f"{ZILLOW_BASE}/{quote(slug)}/"
    session = make_session(proxies=proxies, referer=referer)
    prewarm_session(session, slug)

    url = zillow_search_url(slug, category_path=category_path)
    all_listings: Dict[str, Listing] = {}
    page = 1
    reported_total: Optional[int] = None
    last_count = 0
    stagnant_pages = 0

    while True:
        if max_pages is not None and page > max_pages:
            if verbose:
                print(f"[info] Reached max_pages={max_pages}. Stopping.")
            break

        if verbose:
            print(f"[fetch] {url}")

        html = request_html(url, session=session)
        results, current_page, total_pages, total_count, next_url = extract_results(html)

        if reported_total is None and total_count is not None:
            reported_total = total_count

        if not results:
            if verbose:
                print("[info] No results on this page. Stopping.")
            break

        for r in results:
            listing = Listing.from_result(r)
            key = listing.zpid or listing.url or f"p{page}-{len(all_listings)}"
            if key not in all_listings:
                all_listings[key] = listing

        current_total = len(all_listings)
        if verbose:
            shown_page = current_page if current_page is not None else page
            print(f"[page] {shown_page} -> collected: {current_total}")

        # Plateau early stop
        if current_total <= last_count:
            stagnant_pages += 1
            if verbose:
                print(f"[info] No growth on this page ({stagnant_pages}/{stagnant_limit}).")
            if stagnant_pages >= stagnant_limit:
                if verbose:
                    print(f"[info] Stopping early: no new listings for {stagnant_limit} consecutive pages.")
                break
        else:
            stagnant_pages = 0
            last_count = current_total

        # last-page guard
        if total_pages is not None and current_page is not None and current_page >= total_pages:
            if verbose:
                print(f"[info] Hit last page ({total_pages}).")
            break

        if not next_url:
            if verbose:
                print("[info] No nextUrl (JSON/DOM). Stopping.")
            break

        url = urljoin(ZILLOW_BASE, next_url)
        page += 1
        time.sleep(random.uniform(*delay_range))

    if verbose and reported_total:
        coverage = 100.0 * len(all_listings) / max(reported_total, 1)
        print(f"[summary] Collected {len(all_listings)} listings. Zillow reported total ~{reported_total}. Coverage ~{coverage:.1f}%.")

    return list(all_listings.values())

def scrape_details_for_listings(
    listings: List[Listing],
    delay_range: Tuple[float, float] = (1.6, 3.2),
    proxies: Optional[Dict[str, str]] = None,
    verbose: bool = True,
) -> List[Dict[str, Any]]:
    session = make_session(proxies=proxies)
    detailed_rows: List[Dict[str, Any]] = []

    for i, lst in enumerate(listings, 1):
        if not lst.url:
            continue
        if verbose:
            print(f"[detail {i}/{len(listings)}] {lst.url}")

        try:
            details = scrape_detail(lst.url, session=session, verbose=verbose)
        except Exception as e:
            if verbose:
                print(f"[warn] Failed details for {lst.url}: {e}")
            continue

        row = asdict(lst)
        row.update({
            "address_full": details.get("address_full") or lst.address,
            "zestimate": details.get("zestimate"),
            "price_history_json": json.dumps(details.get("price_history", []), ensure_ascii=False),
            "facts_flat_json": json.dumps(details.get("facts_flat", {}), ensure_ascii=False),
            "facts_groups_json": json.dumps(details.get("facts_groups", []), ensure_ascii=False),
            "photos_json": json.dumps(details.get("photos", []), ensure_ascii=False),
        })
        detailed_rows.append(row)
        time.sleep(random.uniform(*delay_range))

    return detailed_rows

# =========================================================
# Output
# =========================================================

def write_csv(rows: List[Dict[str, Any]] | List[Listing], out_path: str) -> None:
    if not rows:
        with open(out_path, "w", newline="", encoding="utf-8") as f:
            f.write("")
        return
    if isinstance(rows[0], Listing):
        rows = [asdict(r) for r in rows]  # type: ignore
    fieldnames = sorted({k for r in rows for k in r.keys()})
    with open(out_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for r in rows:  # type: ignore
            writer.writerow(r)

def write_jsonl(rows: List[Dict[str, Any]], out_path: str) -> None:
    with open(out_path, "w", encoding="utf-8") as f:
        for r in rows:
            f.write(json.dumps(r, ensure_ascii=False) + "\n")

# =========================================================
# CLI / Main (ignores unknown args so it works in notebooks)
# =========================================================

def main():
    # Simple, notebook-friendly arg parsing
    import argparse
    p = argparse.ArgumentParser(add_help=True)
    p.add_argument("--place", default="Champaign, IL")
    p.add_argument("--category", default="houses")
    p.add_argument("--max-pages", type=int, default=3)
    p.add_argument("--stagnant-limit", type=int, default=3)
    p.add_argument("--out-summary", default="listings_summary.csv")
    p.add_argument("--out-details-csv", default="listings_details.csv")
    p.add_argument("--out-details-jsonl", default="listings_details.jsonl")
    p.add_argument("--quiet", action="store_true")
    args, _unknown = p.parse_known_args()  # <- ignore Jupyter --f=...

    verbose = not args.quiet

    listings = scrape_region(
        place=args.place,
        category_path=args.category,
        max_pages=args.max_pages,
        delay_range=(2.3, 4.6),
        verbose=verbose,
        stagnant_limit=args.stagnant_limit,
    )

    details = scrape_details_for_listings(
        listings,
        delay_range=(1.2, 2.6),
        verbose=verbose,
    )

    write_csv(listings, args.out_summary)
    write_csv(details, args.out_details_csv)
    write_jsonl(details, args.out_details_jsonl)

    if verbose:
        print(f"[done] Summary rows: {len(listings)}  |  Detailed rows: {len(details)}")
        print(f"[files] {args.out_summary} | {args.out_details_csv} | {args.out_details_jsonl}")

if __name__ == "__main__":
    main()


[fetch] https://www.zillow.com/champaign-il/houses/
[page] 1 -> collected: 41
[fetch] https://www.zillow.com/champaign-il/houses/2_p/
[page] 2 -> collected: 82
[fetch] https://www.zillow.com/champaign-il/houses/3_p/
[page] 3 -> collected: 116
[info] Reached max_pages=3. Stopping.
[detail 1/116] https://www.zillow.com/homedetails/6-Regent-Ct-Champaign-IL-61820/3198843_zpid/
[facts] flat keys: 21 | price_history rows: 0
[detail 2/116] https://www.zillow.com/homedetails/1707-W-Old-Church-Rd-Champaign-IL-61822/3215783_zpid/
[facts] flat keys: 21 | price_history rows: 0
[detail 3/116] https://www.zillow.com/homedetails/7-Greencroft-Dr-Champaign-IL-61821/3227991_zpid/
[facts] flat keys: 21 | price_history rows: 0
[detail 4/116] https://www.zillow.com/homedetails/909-W-Union-St-Champaign-IL-61821/3224077_zpid/
[facts] flat keys: 21 | price_history rows: 0
[detail 5/116] https://www.zillow.com/homedetails/3207-Ridgewood-Dr-Champaign-IL-61821/3229626_zpid/
[facts] flat keys: 21 | price_history 