Skip to content

Commit

Permalink
- base
Browse files Browse the repository at this point in the history
  • Loading branch information
ZacharyHampton committed Sep 15, 2023
1 parent 325a03e commit ed7e76e
Show file tree
Hide file tree
Showing 9 changed files with 350 additions and 0 deletions.
30 changes: 30 additions & 0 deletions homeharvest/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from .core.scrapers.redfin import RedfinScraper
from .core.scrapers.types import ListingType, Home
from .core.scrapers import ScraperInput
from .exceptions import InvalidSite, InvalidListingType


_scrapers = {
"redfin": RedfinScraper,
}


def scrape_property(
location: str,
listing_type: str = "for_sale", #: for_sale, for_rent, sold
site_name: str = "redfin",
) -> list[Home]: #: eventually, return pandas dataframe
if site_name not in _scrapers:
raise InvalidSite(f"Provided site, '{site_name}', does not exist.")

if listing_type.upper() not in ListingType.__members__:
raise InvalidListingType(f"Provided listing type, '{listing_type}', does not exist.")

scraper_input = ScraperInput(
location=location,
listing_type=ListingType[listing_type.upper()],
)

site = _scrapers[site_name](scraper_input)

return site.search()
25 changes: 25 additions & 0 deletions homeharvest/core/scrapers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from dataclasses import dataclass
import requests
from .types import Home, ListingType


@dataclass
class ScraperInput:
location: str
listing_type: ListingType
proxy_url: str | None = None


class Scraper:
def __init__(self, scraper_input: ScraperInput):
self.location = scraper_input.location
self.session = requests.Session()

if scraper_input.proxy_url:
self.session.proxies = {
"http": scraper_input.proxy_url,
"https": scraper_input.proxy_url,
}

def search(self) -> list[Home]:
...
6 changes: 6 additions & 0 deletions homeharvest/core/scrapers/redfin/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from .. import Scraper


class RedfinScraper(Scraper):
def __init__(self, scraper_input):
super().__init__(scraper_input)
23 changes: 23 additions & 0 deletions homeharvest/core/scrapers/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from dataclasses import dataclass
from enum import Enum


class ListingType(Enum):
FOR_SALE = "for_sale"
FOR_RENT = "for_rent"
SOLD = "sold"


@dataclass
class Address:
address_one: str
city: str
state: str
zip_code: str

address_two: str | None = None


@dataclass
class Home:
address: Address
Empty file.
8 changes: 8 additions & 0 deletions homeharvest/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
class InvalidSite(Exception):
"""Raised when a provided site is does not exist."""
pass


class InvalidListingType(Exception):
"""Raised when a provided listing type is does not exist."""
pass
245 changes: 245 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@ readme = "README.md"

[tool.poetry.dependencies]
python = "^3.10"
requests = "^2.31.0"


[tool.poetry.group.dev.dependencies]
pytest = "^7.4.2"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
9 changes: 9 additions & 0 deletions tests/test_redfin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from homeharvest import scrape_property


def test_redfin():
result = scrape_property(
location="85001"
)

assert result is not None

0 comments on commit ed7e76e

Please sign in to comment.