Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
python-version: ['3.8', '3.9', '3.10', '3.11']
steps:
- uses: actions/checkout@v3
- name: Set up Python
Expand All @@ -42,12 +42,6 @@ jobs:
- name: Build package
run: >-
make build
- name: Publish package to Test PyPI (always)
uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.TEST_PYPI_API_TOKEN }}
repository_url: https://test.pypi.org/legacy/
skip_existing: true
- name: Publish package to PyPI (only if pushing a tag)
if: startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@release/v1
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ Signup to ScrapingBee to [get your API key](https://app.scrapingbee.com/account/
'device': 'desktop',
# Use some data extraction rules
'extract_rules': {'title': 'h1'},
# Use AI to extract data from the page
'ai_extract_rules': {'product_name': 'The name of the product', 'price': 'The price in USD'},
# Wrap response in JSON
'json_response': False,
# Interact with the webpage you want to scrape
Expand Down
8 changes: 4 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@ certifi==2022.12.7
charset-normalizer==3.1.0
distlib==0.3.6
filelock==3.10.0
flake8==3.9.2
flake8==6.0.0
idna==3.4
iniconfig==2.0.0
mccabe==0.6.1
mccabe==0.7.0
more-itertools==9.1.0
packaging==23.0
platformdirs==3.1.1
pluggy==0.13.1
py==1.11.0
pycodestyle==2.7.0
pyflakes==2.3.1
pycodestyle==2.10.0
pyflakes==3.0.1
pytest==7.2.2
requests==2.28.2
six==1.16.0
Expand Down
2 changes: 1 addition & 1 deletion scrapingbee/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2.0.1"
__version__ = "2.0.2"
2 changes: 2 additions & 0 deletions scrapingbee/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ def process_params(params: dict) -> dict:
new_params[k] = process_cookies(v)
elif k == 'extract_rules':
new_params[k] = process_json_stringify_param(v, 'extract_rules')
elif k == 'ai_extract_rules':
new_params[k] = process_json_stringify_param(v, 'ai_extract_rules')
elif k == 'js_scenario':
new_params[k] = process_json_stringify_param(v, 'js_scenario')
else:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@
'Programming Language :: Python :: 3.11',
'Topic :: Software Development :: Libraries :: Python Modules',
],
python_requires='>=3.7',
python_requires='>=3.8',
install_requires=['requests'],
)
21 changes: 21 additions & 0 deletions tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,27 @@ def test_get_with_js_scenario(mock_session, client):
)


@mock.patch('scrapingbee.client.Session')
def test_get_with_ai_extract_rules(mock_session, client):
'''It should format the ai_extract_rules and add them to the url'''
client.get('https://httpbin.org', params={
'ai_extract_rules': {
"product_name": "The name of the product",
"price": "The price in USD"
}
})

mock_session.return_value.request.assert_called_with(
'GET',
'https://app.scrapingbee.com/api/v1/'
'?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org&'
'ai_extract_rules=%7B%22product_name%22%3A+%22The+name+of+the+product%22%2C+%22'
'price%22%3A+%22The+price+in+USD%22%7D',
data=None,
headers=DEFAULT_HEADERS,
)


@mock.patch('scrapingbee.client.Session')
def test_post(mock_session, client):
'''It should make a POST request with some data'''
Expand Down
9 changes: 8 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_process_headers():
"""It should add a Spb- prefix to header names"""
output = process_headers({"Accept-Language": "En-US"})
assert output == {
"User-Agent": "ScrapingBee-Python/2.0.1",
"User-Agent": "ScrapingBee-Python/2.0.2",
"Spb-Accept-Language": "En-US",
}

Expand Down Expand Up @@ -46,6 +46,13 @@ def test_process_js_scenario():
assert output == '{"instructions": [{"click": "#buttonId"}]}'


def test_process_ai_extract_rules():
"""It should format ai_extract_rules to a stringified JSON"""
output = process_json_stringify_param(
{"product_name": "The name of the product", "price": "The price in USD"}, "ai_extract_rules")
assert output == '{"product_name": "The name of the product", "price": "The price in USD"}'


def test_process_params():
"""It should keep boolean parameters"""
output = process_params({"render_js": True})
Expand Down