From ec036bb9891e479c1b0a27477a613cc52ac003f6 Mon Sep 17 00:00:00 2001 From: Zachary Hampton <69336300+ZacharyHampton@users.noreply.github.com> Date: Mon, 20 May 2024 12:13:30 -0700 Subject: [PATCH] - optimizations & updated realtor headers --- homeharvest/__init__.py | 4 +--- homeharvest/core/scrapers/__init__.py | 32 +++++++++++++++++---------- pyproject.toml | 2 +- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/homeharvest/__init__.py b/homeharvest/__init__.py index 117d56c..2bbacf8 100644 --- a/homeharvest/__init__.py +++ b/homeharvest/__init__.py @@ -49,12 +49,10 @@ def scrape_property( site = RealtorScraper(scraper_input) results = site.search() - properties_dfs = [process_result(result) for result in results] + properties_dfs = [df for result in results if not (df := process_result(result)).empty] if not properties_dfs: return pd.DataFrame() - properties_dfs = [df for df in properties_dfs if not df.empty] - with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py index 859aff9..8888e78 100644 --- a/homeharvest/core/scrapers/__init__.py +++ b/homeharvest/core/scrapers/__init__.py @@ -6,6 +6,7 @@ import uuid from ...exceptions import AuthenticationError from .models import Property, ListingType, SiteName +import json @dataclass @@ -71,18 +72,25 @@ def handle_location(self): ... @staticmethod def get_access_token(): - url = "https://graph.realtor.com/auth/token" - - payload = f'{{"client_app_id":"rdc_mobile_native,24.20.4.149916,iphone","device_id":"{str(uuid.uuid4()).upper()}","grant_type":"device_mobile"}}' - headers = { - "Host": "graph.realtor.com", - "x-client-version": "24.20.4.149916", - "accept": "*/*", - "content-type": "Application/json", - "user-agent": "Realtor.com/24.20.4.149916 CFNetwork/1410.0.3 Darwin/22.6.0", - "accept-language": "en-US,en;q=0.9", - } - response = requests.post(url, headers=headers, data=payload) + device_id = str(uuid.uuid4()).upper() + + response = requests.post( + "https://graph.realtor.com/auth/token", + headers={ + 'Host': 'graph.realtor.com', + 'Accept': '*/*', + 'Content-Type': 'Application/json', + 'X-Client-ID': 'rdc_mobile_native,iphone', + 'X-Visitor-ID': device_id, + 'X-Client-Version': '24.21.23.679885', + 'Accept-Language': 'en-US,en;q=0.9', + 'User-Agent': 'Realtor.com/24.21.23.679885 CFNetwork/1494.0.7 Darwin/23.4.0', + }, + data=json.dumps({ + "grant_type": "device_mobile", + "device_id": device_id, + "client_app_id": "rdc_mobile_native,24.21.23.679885,iphone" + })) data = response.json() diff --git a/pyproject.toml b/pyproject.toml index 500dfe1..221c02c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "homeharvest" -version = "0.3.27" +version = "0.3.28" description = "Real estate scraping library" authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/Bunsly/HomeHarvest"