Skip to content

Commit

Permalink
Merge pull request #12 from Hardeepex/sweep/provide_me_all_css_select…
Browse files Browse the repository at this point in the history
…ors_for_content_1

Sweep: provide me all css selectors for content for selectolax (✓ Sandbox Passed)
  • Loading branch information
Hardeepex committed Jan 3, 2024
2 parents f0a2d7b + de65ed7 commit 9d3b669
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 1 deletion.
4 changes: 3 additions & 1 deletion docs/examples/tutorial/redflagdeals_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,16 @@
"dealer": "str",
"comments_count": "int",
},
extra_preprocessors=[CSS("div.list_item")],
extra_preprocessors=[CSS("div.list_item"), CSS("a.offer_image"), CSS("h2.offer_title"), CSS("p.offer_description")],
)

# Define the SchemaScraper for the single deal pages
deal_scraper = SchemaScraper(
{
"title": "str",
"url": "url",
"image": "url",
"description": "str",
"price": "float",
"regular_price": "float",
"details": "str",
Expand Down
46 changes: 46 additions & 0 deletions tests/live/test_new_scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import unittest

from scrapeghost import CSS, SchemaScraper


class TestNewScraper(unittest.TestCase):
def setUp(self):
self.scraper = SchemaScraper(
{
"url": "url",
"title": "str",
"image": "str",
"dealer": "str",
"comments_count": "int",
},
extra_preprocessors=[CSS("div.list_item"), CSS("a.offer_image"), CSS("h2.offer_title"), CSS("p.offer_description")],
)

def test_scrape(self):
test_webpage = """
<div class="list_item">
<a href="test_url" class="offer_image">
<img src="test_image.jpg">
</a>
<div class="list_item_body">
<p class="offer_dealer">Test Dealer</p>
<h2 class="offer_title">Test Title</h2>
<p class="offer_description">Test Description</p>
</div>
<ul class="list_item_counters">
<li class="offer_comment_counter">5</li>
</ul>
</div>
"""
result = self.scraper.scrape(test_webpage)
expected_data = {
"url": "test_url",
"title": "Test Title",
"image": "test_image.jpg",
"dealer": "Test Dealer",
"comments_count": 5,
}
self.assertEqual(result.data, expected_data)

if __name__ == "__main__":
unittest.main()

0 comments on commit 9d3b669

Please sign in to comment.