Update tests (100% Coverage) (#284)

* Fix python3 exception * Increase code coverage to 100% * (Makefile) Add requirements target * Test latest Node.js and v4.5 on CI * Add OSX to CI build matrix
Anorov · Aug 24, 2019 · b889255 · b889255
1 parent 449bb55
commit b889255
Show file tree

Hide file tree

Showing 7 changed files with 217 additions and 18 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -12,11 +12,28 @@ matrix:
         - python: '3.7'
           dist: xenial
           sudo: true
+        - env: cfscrape_node='4.5'
+        - env: cfscrape_node='node'
+        - os: osx
+          language: node_js
+          node_js: node
+          cache:
+            directories:
+              - $HOME/Library/Caches/Homebrew
+              - /usr/local/Homebrew
 
 cache: pip
 
+before_install:
+   - |
+     if [ -n "${cfscrape_node}" ]; then
+       source ~/.nvm/nvm.sh
+       nvm install "${cfscrape_node}"
+       nvm use "${cfscrape_node}"
+     fi
+
 install:
-  - node -e "console.log(process.versions);"
+  - node -p process.versions
   - make
 
 script: make ci

diff --git a/Makefile b/Makefile
@@ -4,6 +4,10 @@ init:
 	pip install pipenv -U
 	pipenv install --dev
 
+requirements:
+	pipenv lock -r > requirements.txt
+	pipenv lock --dev -r > requirements-dev.txt
+
 test:
 	# This runs all of the tests, on both Python 2 and Python 3.
 	pipenv run tox --parallel auto

diff --git a/Pipfile b/Pipfile
@@ -10,6 +10,8 @@ requests = "*"
 pytest = "*"
 # more_itertools is added to resolve a CI related issue with Pipenv + pytest
 more_itertools = { version = ">=4.0.0", markers = "python_version >= '2.7'" }
+# mock is added to resolve a CI related issue with Pipenv + pytest
+mock = "*"
 pytest-cov = "*"
 pytest-xdist = "*"
 pytest-forked = "*"

diff --git a/appveyor.yml b/appveyor.yml
@@ -15,10 +15,23 @@ environment:
     - PYTHON: 'C:/Python36-x64'
     - PYTHON: 'C:/Python37'
     - PYTHON: 'C:/Python37-x64'
+      cfscrape_node: '4.5'
+    - PYTHON: 'C:/Python37-x64'
+      cfscrape_node: 'node'
 
 install:
+  - ps: >-
+      If ($env:cfscrape_node -ne $null) {
+        If ($env:cfscrape_node -Match "node") {
+          Install-Product node ""
+        }
+        Else {
+          Install-Product node $env:cfscrape_node
+        }
+      }
+  - 'set PATH=%PYTHON%;%PYTHON%/Scripts;%PATH%'
   - 'python --version'
-  - 'node -e "console.log(process.versions);"'
+  - 'node -p process.versions'
   - 'pip -V'
   - 'pip install pipenv'
   - 'pipenv install --dev'

diff --git a/cfscrape/__init__.py b/cfscrape/__init__.py
@@ -174,7 +174,7 @@ def solve_cf_challenge(self, resp, **original_kwargs):
             # please open a GitHub issue so I can update the code accordingly.
             raise ValueError(
                 "Unable to parse Cloudflare anti-bot IUAM page: %s %s"
-                % (e.message, BUG_REPORT)
+                % (e, BUG_REPORT)
             )
 
         # Solve the Javascript challenge
@@ -282,6 +282,7 @@ def solve_challenge(self, body, domain):
             % challenge
         )
 
+        stderr = ''
         try:
             node = subprocess.Popen(
                 ["node", "-e", js], stdout=subprocess.PIPE, stderr=subprocess.PIPE,

diff --git a/tests/__init__.py b/tests/__init__.py
@@ -52,7 +52,7 @@ class RedirectResponse(responses.CallbackResponse):
         The request will error if it doesn't match a defined response.
     """
 
-    def __init__(self, callback=lambda request: None, **kwargs):
+    def __init__(self, callback=lambda request: None, location=None, **kwargs):
         defaults = (('method', 'GET'),
                     ('status', 302),
                     ('headers', {'Location': '/'}),
@@ -62,6 +62,9 @@ def __init__(self, callback=lambda request: None, **kwargs):
         for k, v in defaults:
             kwargs.setdefault(k, v)
 
+        if location:
+            kwargs['headers']['Location'] = location
+
         args = tuple(kwargs.pop(k) for k in ('status', 'headers', 'body'))
         kwargs['callback'] = lambda request: callback(request) or args
 
@@ -121,7 +124,7 @@ def fixtures(filename):
 
 # This fancy decorator wraps tests so the responses will be mocked.
 # It could be called directly e.g. challenge_responses(*args)(test_func) -> wrapper
-def challenge_responses(filename, jschl_answer):
+def challenge_responses(filename, jschl_answer, redirect_to='/'):
     # This function is called with the test_func and returns a new wrapper.
     def challenge_responses_decorator(test):
         @responses.activate
@@ -145,7 +148,9 @@ def on_redirect(request):
                 # We don't register the last response unless the redirect occurs
                 responses.add(DefaultResponse(url=url, body=requested_page))
 
-            responses.add(RedirectResponse(url=submit_uri, callback=on_redirect))
+            responses.add(RedirectResponse(
+                url=submit_uri, callback=on_redirect, location=redirect_to
+            ))
 
             return test(self, **cfscrape_kwargs)
         return wrapper
@@ -165,14 +170,6 @@ def wrapper(self):
     return recaptcha_responses_decorator
 
 
-def server_error_response(test):
-    @responses.activate
-    def wrapper(self):
-        responses.add(DefaultResponse(url=url, status=500))
-        return test(self, **cfscrape_kwargs)
-    return wrapper
-
-
 def cloudflare_cookies():
     # Cloudflare cookie that should be set when challenge is presented
     cfduid = Morsel()

diff --git a/tests/test_cfscrape.py b/tests/test_cfscrape.py
@@ -4,16 +4,20 @@
 import cfscrape
 import requests
 import re
+import os
 import ssl
+import responses
+import subprocess
 
 from sure import expect
 from . import challenge_responses, recaptcha_responses, requested_page, url, \
-    cloudflare_cookies, server_error_response
+    cloudflare_cookies, DefaultResponse, ChallengeResponse, fixtures, \
+    cfscrape_kwargs
 
 
 class TestCloudflareScraper:
 
-    @challenge_responses(filename='js_challenge_10_04_2019.html', jschl_answer='18.8766915031')
+    @challenge_responses(filename='js_challenge_10_04_2019.html', jschl_answer='18.8766915031', redirect_to=url)
     def test_js_challenge_10_04_2019(self, **kwargs):
         scraper = cfscrape.CloudflareScraper(**kwargs)
         expect(scraper.get(url).content).to.equal(requested_page)
@@ -61,6 +65,150 @@ def test_cf_recaptcha_15_04_2019(self, **kwargs):
         finally:
             ssl.OPENSSL_VERSION_NUMBER = v
 
+    @responses.activate
+    def test_js_challenge_unable_to_identify(self):
+        body = fixtures('js_challenge_10_04_2019.html')
+        body = body.replace(b'setTimeout', b'')
+
+        responses.add(ChallengeResponse(url=url, body=body))
+
+        scraper = cfscrape.create_scraper(**cfscrape_kwargs)
+        message = re.compile(r'Unable to identify Cloudflare IUAM Javascript')
+        scraper.get.when.called_with(url) \
+            .should.have.raised(ValueError, message)
+
+    @responses.activate
+    def test_js_challenge_unexpected_answer(self):
+        body = fixtures('js_challenge_10_04_2019.html')
+        body = body.replace(b'\'; 121\'', b'a.value = "foobar"')
+
+        responses.add(ChallengeResponse(url=url, body=body))
+
+        scraper = cfscrape.create_scraper(**cfscrape_kwargs)
+        message = re.compile(r'Cloudflare IUAM challenge returned unexpected answer')
+        scraper.get.when.called_with(url) \
+            .should.have.raised(ValueError, message)
+
+    @responses.activate
+    def test_js_challenge_missing_pass(self):
+        body = fixtures('js_challenge_10_04_2019.html')
+        body = body.replace(b'name="pass"', b'')
+
+        responses.add(ChallengeResponse(url=url, body=body))
+
+        scraper = cfscrape.create_scraper(**cfscrape_kwargs)
+        message = re.compile(r'Unable to parse .* pass is missing from challenge form')
+        scraper.get.when.called_with(url) \
+            .should.have.raised(ValueError, message)
+
+    def test_js_challenge_subprocess_unknown_error(self, caplog):
+        def test(self, **kwargs):
+            __Popen = subprocess.Popen
+
+            # Temporarily disable this method to generate an exception
+            subprocess.Popen = None
+
+            try:
+                scraper = cfscrape.CloudflareScraper(**kwargs)
+                scraper.get.when.called_with(url) \
+                    .should.have.raised(TypeError)
+                caplog.text.should.match(re.compile(r'Error executing Cloudflare IUAM Javascript'))
+            finally:
+                subprocess.Popen = __Popen
+
+        challenge_responses(
+            filename='js_challenge_10_04_2019.html', jschl_answer='18.8766915031'
+        )(test)(self)
+
+    def test_js_challenge_subprocess_system_error(self, caplog):
+        def test(self, **kwargs):
+            __Popen = subprocess.Popen
+
+            # Temporarily Mock subprocess method to raise an OSError
+            def mock(*args, **kwargs):
+                raise OSError('System Error')
+
+            subprocess.Popen = mock
+
+            try:
+                scraper = cfscrape.CloudflareScraper(**kwargs)
+                scraper.get.when.called_with(url) \
+                    .should.have.raised(OSError, re.compile(r'System Error'))
+                caplog.text.should.equal('')
+            finally:
+                subprocess.Popen = __Popen
+
+        challenge_responses(
+            filename='js_challenge_10_04_2019.html', jschl_answer='18.8766915031'
+        )(test)(self)
+
+    def test_js_challenge_subprocess_non_zero(self, caplog):
+        def test(self, **kwargs):
+            __Popen = subprocess.Popen
+
+            # Temporarily Mock subprocess method to return non-zero exit code
+            def mock(*args, **kwargs):
+                def node(): pass
+                node.communicate = lambda: ('stdout', 'stderr')
+                node.returncode = 1
+                return node
+
+            subprocess.Popen = mock
+
+            try:
+                scraper = cfscrape.CloudflareScraper(**kwargs)
+                message = re.compile(r'non-zero exit status')
+                scraper.get.when.called_with(url) \
+                    .should.have.raised(subprocess.CalledProcessError, message)
+                caplog.text.should.match(re.compile(r'Error executing Cloudflare IUAM Javascript'))
+                caplog.text.should_not.match(re.compile(r'Outdated Node.js detected'))
+            finally:
+                subprocess.Popen = __Popen
+
+        challenge_responses(
+            filename='js_challenge_10_04_2019.html', jschl_answer='18.8766915031'
+        )(test)(self)
+
+    def test_js_challenge_outdated_node(self, caplog):
+        def test(self, **kwargs):
+            __Popen = subprocess.Popen
+
+            # Temporarily Mock subprocess method to return non-zero exit code
+            def mock(*args, **kwargs):
+                def node(): pass
+                node.communicate = lambda: ('stdout', 'Outdated Node.js detected')
+                node.returncode = 1
+                return node
+
+            subprocess.Popen = mock
+
+            try:
+                scraper = cfscrape.CloudflareScraper(**kwargs)
+                message = re.compile(r'non-zero exit status')
+                scraper.get.when.called_with(url) \
+                    .should.have.raised(subprocess.CalledProcessError, message)
+                caplog.text.should_not.match(re.compile(r'Error executing Cloudflare IUAM Javascript'))
+                caplog.text.should.match(re.compile(r'Outdated Node.js detected'))
+            finally:
+                subprocess.Popen = __Popen
+
+        challenge_responses(
+            filename='js_challenge_10_04_2019.html', jschl_answer='18.8766915031'
+        )(test)(self)
+
+    @challenge_responses(filename='js_challenge_10_04_2019.html', jschl_answer='18.8766915031')
+    def test_js_challenge_environment_error(self, **kwargs):
+        __path = os.environ['PATH']
+        # Temporarily unset PATH to hide Node.js
+        os.environ['PATH'] = ''
+        try:
+            scraper = cfscrape.CloudflareScraper(**kwargs)
+            message = re.compile(r'Missing Node.js runtime')
+            scraper.get.when.called_with(url) \
+                .should.have.raised(EnvironmentError, message)
+        finally:
+            os.environ['PATH'] = __path
+
     @challenge_responses(filename='js_challenge_10_04_2019.html', jschl_answer='18.8766915031')
     def test_get_cookie_string(self, **kwargs):
         # get_cookie_string doesn't accept the delay kwarg.
@@ -140,12 +288,16 @@ def __init__(self, *args, **kwargs):
         Test.get_tokens.when.called_with(url, **kwargs) \
             .should.have.raised(ValueError, message)
 
-    @server_error_response
-    def test_get_tokens_request_error(self, **kwargs):
+    @responses.activate
+    def test_get_tokens_request_error(self, caplog):
         # get_tokens doesn't accept the delay kwarg.
+        kwargs = cfscrape_kwargs.copy()
         kwargs.pop('delay', None)
+
+        responses.add(DefaultResponse(url=url, status=500))
         cfscrape.get_tokens.when.called_with(url, **kwargs) \
                 .should.have.raised(requests.HTTPError)
+        caplog.text.should.match(re.compile(r'Could not collect tokens'))
 
     @challenge_responses(filename='js_challenge_10_04_2019.html', jschl_answer='18.8766915031')
     def test_cloudflare_is_bypassed(self, **kwargs):
@@ -160,3 +312,16 @@ def __init__(self, *args, **kwargs):
 
         scraper = Test(**kwargs)
         scraper.cloudflare_is_bypassed(url).should.be.ok
+
+    def test_create_scraper_with_session(self):
+        session = requests.session()
+        session.headers = {'foo': 'bar'}
+        session.data = None
+
+        scraper = cfscrape.create_scraper(sess=session)
+        scraper.headers.should.equal(session.headers)
+        scraper.should_not.have.property('data')
+
+        session.data = {'bar': 'foo'}
+        scraper = cfscrape.create_scraper(sess=session)
+        scraper.data.should.equal(session.data)