Skip to content

Commit

Permalink
Update tests (100% Coverage) (#284)
Browse files Browse the repository at this point in the history
* Fix python3 exception

* Increase code coverage to 100%

* (Makefile) Add requirements target

* Test latest Node.js and v4.5 on CI

* Add OSX to CI build matrix
  • Loading branch information
Dwayne authored and Anorov committed Aug 24, 2019
1 parent 449bb55 commit b889255
Show file tree
Hide file tree
Showing 7 changed files with 217 additions and 18 deletions.
19 changes: 18 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,28 @@ matrix:
- python: '3.7'
dist: xenial
sudo: true
- env: cfscrape_node='4.5'
- env: cfscrape_node='node'
- os: osx
language: node_js
node_js: node
cache:
directories:
- $HOME/Library/Caches/Homebrew
- /usr/local/Homebrew

cache: pip

before_install:
- |
if [ -n "${cfscrape_node}" ]; then
source ~/.nvm/nvm.sh
nvm install "${cfscrape_node}"
nvm use "${cfscrape_node}"
fi
install:
- node -e "console.log(process.versions);"
- node -p process.versions
- make

script: make ci
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ init:
pip install pipenv -U
pipenv install --dev

requirements:
pipenv lock -r > requirements.txt
pipenv lock --dev -r > requirements-dev.txt

test:
# This runs all of the tests, on both Python 2 and Python 3.
pipenv run tox --parallel auto
Expand Down
2 changes: 2 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ requests = "*"
pytest = "*"
# more_itertools is added to resolve a CI related issue with Pipenv + pytest
more_itertools = { version = ">=4.0.0", markers = "python_version >= '2.7'" }
# mock is added to resolve a CI related issue with Pipenv + pytest
mock = "*"
pytest-cov = "*"
pytest-xdist = "*"
pytest-forked = "*"
Expand Down
15 changes: 14 additions & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,23 @@ environment:
- PYTHON: 'C:/Python36-x64'
- PYTHON: 'C:/Python37'
- PYTHON: 'C:/Python37-x64'
cfscrape_node: '4.5'
- PYTHON: 'C:/Python37-x64'
cfscrape_node: 'node'

install:
- ps: >-
If ($env:cfscrape_node -ne $null) {
If ($env:cfscrape_node -Match "node") {
Install-Product node ""
}
Else {
Install-Product node $env:cfscrape_node
}
}
- 'set PATH=%PYTHON%;%PYTHON%/Scripts;%PATH%'
- 'python --version'
- 'node -e "console.log(process.versions);"'
- 'node -p process.versions'
- 'pip -V'
- 'pip install pipenv'
- 'pipenv install --dev'
Expand Down
3 changes: 2 additions & 1 deletion cfscrape/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def solve_cf_challenge(self, resp, **original_kwargs):
# please open a GitHub issue so I can update the code accordingly.
raise ValueError(
"Unable to parse Cloudflare anti-bot IUAM page: %s %s"
% (e.message, BUG_REPORT)
% (e, BUG_REPORT)
)

# Solve the Javascript challenge
Expand Down Expand Up @@ -282,6 +282,7 @@ def solve_challenge(self, body, domain):
% challenge
)

stderr = ''
try:
node = subprocess.Popen(
["node", "-e", js], stdout=subprocess.PIPE, stderr=subprocess.PIPE,
Expand Down
19 changes: 8 additions & 11 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class RedirectResponse(responses.CallbackResponse):
The request will error if it doesn't match a defined response.
"""

def __init__(self, callback=lambda request: None, **kwargs):
def __init__(self, callback=lambda request: None, location=None, **kwargs):
defaults = (('method', 'GET'),
('status', 302),
('headers', {'Location': '/'}),
Expand All @@ -62,6 +62,9 @@ def __init__(self, callback=lambda request: None, **kwargs):
for k, v in defaults:
kwargs.setdefault(k, v)

if location:
kwargs['headers']['Location'] = location

args = tuple(kwargs.pop(k) for k in ('status', 'headers', 'body'))
kwargs['callback'] = lambda request: callback(request) or args

Expand Down Expand Up @@ -121,7 +124,7 @@ def fixtures(filename):

# This fancy decorator wraps tests so the responses will be mocked.
# It could be called directly e.g. challenge_responses(*args)(test_func) -> wrapper
def challenge_responses(filename, jschl_answer):
def challenge_responses(filename, jschl_answer, redirect_to='/'):
# This function is called with the test_func and returns a new wrapper.
def challenge_responses_decorator(test):
@responses.activate
Expand All @@ -145,7 +148,9 @@ def on_redirect(request):
# We don't register the last response unless the redirect occurs
responses.add(DefaultResponse(url=url, body=requested_page))

responses.add(RedirectResponse(url=submit_uri, callback=on_redirect))
responses.add(RedirectResponse(
url=submit_uri, callback=on_redirect, location=redirect_to
))

return test(self, **cfscrape_kwargs)
return wrapper
Expand All @@ -165,14 +170,6 @@ def wrapper(self):
return recaptcha_responses_decorator


def server_error_response(test):
@responses.activate
def wrapper(self):
responses.add(DefaultResponse(url=url, status=500))
return test(self, **cfscrape_kwargs)
return wrapper


def cloudflare_cookies():
# Cloudflare cookie that should be set when challenge is presented
cfduid = Morsel()
Expand Down
173 changes: 169 additions & 4 deletions tests/test_cfscrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,20 @@
import cfscrape
import requests
import re
import os
import ssl
import responses
import subprocess

from sure import expect
from . import challenge_responses, recaptcha_responses, requested_page, url, \
cloudflare_cookies, server_error_response
cloudflare_cookies, DefaultResponse, ChallengeResponse, fixtures, \
cfscrape_kwargs


class TestCloudflareScraper:

@challenge_responses(filename='js_challenge_10_04_2019.html', jschl_answer='18.8766915031')
@challenge_responses(filename='js_challenge_10_04_2019.html', jschl_answer='18.8766915031', redirect_to=url)
def test_js_challenge_10_04_2019(self, **kwargs):
scraper = cfscrape.CloudflareScraper(**kwargs)
expect(scraper.get(url).content).to.equal(requested_page)
Expand Down Expand Up @@ -61,6 +65,150 @@ def test_cf_recaptcha_15_04_2019(self, **kwargs):
finally:
ssl.OPENSSL_VERSION_NUMBER = v

@responses.activate
def test_js_challenge_unable_to_identify(self):
body = fixtures('js_challenge_10_04_2019.html')
body = body.replace(b'setTimeout', b'')

responses.add(ChallengeResponse(url=url, body=body))

scraper = cfscrape.create_scraper(**cfscrape_kwargs)
message = re.compile(r'Unable to identify Cloudflare IUAM Javascript')
scraper.get.when.called_with(url) \
.should.have.raised(ValueError, message)

@responses.activate
def test_js_challenge_unexpected_answer(self):
body = fixtures('js_challenge_10_04_2019.html')
body = body.replace(b'\'; 121\'', b'a.value = "foobar"')

responses.add(ChallengeResponse(url=url, body=body))

scraper = cfscrape.create_scraper(**cfscrape_kwargs)
message = re.compile(r'Cloudflare IUAM challenge returned unexpected answer')
scraper.get.when.called_with(url) \
.should.have.raised(ValueError, message)

@responses.activate
def test_js_challenge_missing_pass(self):
body = fixtures('js_challenge_10_04_2019.html')
body = body.replace(b'name="pass"', b'')

responses.add(ChallengeResponse(url=url, body=body))

scraper = cfscrape.create_scraper(**cfscrape_kwargs)
message = re.compile(r'Unable to parse .* pass is missing from challenge form')
scraper.get.when.called_with(url) \
.should.have.raised(ValueError, message)

def test_js_challenge_subprocess_unknown_error(self, caplog):
def test(self, **kwargs):
__Popen = subprocess.Popen

# Temporarily disable this method to generate an exception
subprocess.Popen = None

try:
scraper = cfscrape.CloudflareScraper(**kwargs)
scraper.get.when.called_with(url) \
.should.have.raised(TypeError)
caplog.text.should.match(re.compile(r'Error executing Cloudflare IUAM Javascript'))
finally:
subprocess.Popen = __Popen

challenge_responses(
filename='js_challenge_10_04_2019.html', jschl_answer='18.8766915031'
)(test)(self)

def test_js_challenge_subprocess_system_error(self, caplog):
def test(self, **kwargs):
__Popen = subprocess.Popen

# Temporarily Mock subprocess method to raise an OSError
def mock(*args, **kwargs):
raise OSError('System Error')

subprocess.Popen = mock

try:
scraper = cfscrape.CloudflareScraper(**kwargs)
scraper.get.when.called_with(url) \
.should.have.raised(OSError, re.compile(r'System Error'))
caplog.text.should.equal('')
finally:
subprocess.Popen = __Popen

challenge_responses(
filename='js_challenge_10_04_2019.html', jschl_answer='18.8766915031'
)(test)(self)

def test_js_challenge_subprocess_non_zero(self, caplog):
def test(self, **kwargs):
__Popen = subprocess.Popen

# Temporarily Mock subprocess method to return non-zero exit code
def mock(*args, **kwargs):
def node(): pass
node.communicate = lambda: ('stdout', 'stderr')
node.returncode = 1
return node

subprocess.Popen = mock

try:
scraper = cfscrape.CloudflareScraper(**kwargs)
message = re.compile(r'non-zero exit status')
scraper.get.when.called_with(url) \
.should.have.raised(subprocess.CalledProcessError, message)
caplog.text.should.match(re.compile(r'Error executing Cloudflare IUAM Javascript'))
caplog.text.should_not.match(re.compile(r'Outdated Node.js detected'))
finally:
subprocess.Popen = __Popen

challenge_responses(
filename='js_challenge_10_04_2019.html', jschl_answer='18.8766915031'
)(test)(self)

def test_js_challenge_outdated_node(self, caplog):
def test(self, **kwargs):
__Popen = subprocess.Popen

# Temporarily Mock subprocess method to return non-zero exit code
def mock(*args, **kwargs):
def node(): pass
node.communicate = lambda: ('stdout', 'Outdated Node.js detected')
node.returncode = 1
return node

subprocess.Popen = mock

try:
scraper = cfscrape.CloudflareScraper(**kwargs)
message = re.compile(r'non-zero exit status')
scraper.get.when.called_with(url) \
.should.have.raised(subprocess.CalledProcessError, message)
caplog.text.should_not.match(re.compile(r'Error executing Cloudflare IUAM Javascript'))
caplog.text.should.match(re.compile(r'Outdated Node.js detected'))
finally:
subprocess.Popen = __Popen

challenge_responses(
filename='js_challenge_10_04_2019.html', jschl_answer='18.8766915031'
)(test)(self)

@challenge_responses(filename='js_challenge_10_04_2019.html', jschl_answer='18.8766915031')
def test_js_challenge_environment_error(self, **kwargs):
__path = os.environ['PATH']
# Temporarily unset PATH to hide Node.js
os.environ['PATH'] = ''
try:
scraper = cfscrape.CloudflareScraper(**kwargs)
message = re.compile(r'Missing Node.js runtime')
scraper.get.when.called_with(url) \
.should.have.raised(EnvironmentError, message)
finally:
os.environ['PATH'] = __path

@challenge_responses(filename='js_challenge_10_04_2019.html', jschl_answer='18.8766915031')
def test_get_cookie_string(self, **kwargs):
# get_cookie_string doesn't accept the delay kwarg.
Expand Down Expand Up @@ -140,12 +288,16 @@ def __init__(self, *args, **kwargs):
Test.get_tokens.when.called_with(url, **kwargs) \
.should.have.raised(ValueError, message)

@server_error_response
def test_get_tokens_request_error(self, **kwargs):
@responses.activate
def test_get_tokens_request_error(self, caplog):
# get_tokens doesn't accept the delay kwarg.
kwargs = cfscrape_kwargs.copy()
kwargs.pop('delay', None)

responses.add(DefaultResponse(url=url, status=500))
cfscrape.get_tokens.when.called_with(url, **kwargs) \
.should.have.raised(requests.HTTPError)
caplog.text.should.match(re.compile(r'Could not collect tokens'))

@challenge_responses(filename='js_challenge_10_04_2019.html', jschl_answer='18.8766915031')
def test_cloudflare_is_bypassed(self, **kwargs):
Expand All @@ -160,3 +312,16 @@ def __init__(self, *args, **kwargs):

scraper = Test(**kwargs)
scraper.cloudflare_is_bypassed(url).should.be.ok

def test_create_scraper_with_session(self):
session = requests.session()
session.headers = {'foo': 'bar'}
session.data = None

scraper = cfscrape.create_scraper(sess=session)
scraper.headers.should.equal(session.headers)
scraper.should_not.have.property('data')

session.data = {'bar': 'foo'}
scraper = cfscrape.create_scraper(sess=session)
scraper.data.should.equal(session.data)

0 comments on commit b889255

Please sign in to comment.