Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

removal of global follow_redirects, set at the request level

  • Loading branch information...
commit aded67cc5f2bb14a08a8855f2a6da69f0bbc1ab7 1 parent fb03dfc
@jamesturk jamesturk authored
Showing with 9 additions and 36 deletions.
  1. +9 −16 scrapelib/__init__.py
  2. +0 −20 scrapelib/tests/test_scraper.py
View
25 scrapelib/__init__.py
@@ -364,6 +364,9 @@ def __init__(self,
if prefetch is not None:
warnings.warn('prefetch is a no-op as of scrapelib 0.8',
DeprecationWarning)
+ if follow_redirects is not True:
+ warnings.warn('follow_redirects is a no-op as of scrapelib 0.8',
+ DeprecationWarning)
# added by this class
if timeout:
@@ -377,9 +380,11 @@ def __init__(self,
self.raise_errors = raise_errors
# shortcuts to underlying requests config
+ if user_agent != _user_agent:
+ warnings.warn('passing "timeout" to constructor is deprecated as '
+ 'of scrapelib 0.8', DeprecationWarning)
if not headers or 'User-Agent' not in headers:
self.user_agent = user_agent
- self.follow_redirects = follow_redirects
self.disable_compression = disable_compression
# added by CachingSession
@@ -399,14 +404,10 @@ def __init__(self,
@property
def user_agent(self):
- warnings.warn('user_agent attribute is deprecated, use '
- 'headers["User-Agent"]', DeprecationWarning)
return self.headers['User-Agent']
@user_agent.setter
def user_agent(self, value):
- warnings.warn('user_agent attribute is deprecated, use '
- 'headers["User-Agent"]', DeprecationWarning)
self.headers['User-Agent'] = value
@property
@@ -423,8 +424,6 @@ def disable_compression(self, value):
self.headers['Accept-Encoding'] = 'gzip, deflate, compress'
def request(self, method, url, **kwargs):
- # apply global redirect rule
- allow_redirects = kwargs.pop('allow_redirects', self.follow_redirects)
# apply global timeout
timeout = kwargs.pop('timeout', self.timeout)
@@ -436,11 +435,8 @@ def request(self, method, url, **kwargs):
headers = requests.sessions.merge_kwargs(kwargs.pop('headers', {}),
headers)
- return super(Scraper, self).request(method, url,
- allow_redirects=allow_redirects,
- timeout=timeout, headers=headers,
- **kwargs
- )
+ return super(Scraper, self).request(method, url, timeout=timeout,
+ headers=headers, **kwargs)
def urlopen(self, url, method='GET', body=None, retry_on_404=False):
"""
@@ -543,16 +539,13 @@ def scrapeshell(): # pragma: no cover
help='user agent to make requests with')
parser.add_argument('--robots', dest='robots', action='store_true',
default=False, help='obey robots.txt')
- parser.add_argument('--noredirect', dest='redirects', action='store_false',
- default=True, help="don't follow redirects")
parser.add_argument('-p', '--postdata', dest='postdata',
default=None,
help="POST data (will make a POST instead of GET)")
args = parser.parse_args(orig_argv)
scraper = Scraper(user_agent=args.user_agent,
- follow_robots=args.robots,
- follow_redirects=args.redirects)
+ follow_robots=args.robots)
url = args.url
if args.postdata:
html = scraper.urlopen(args.url, 'POST', args.postdata)
View
20 scrapelib/tests/test_scraper.py
@@ -47,7 +47,6 @@ def test_fields():
timeout=0,
retry_attempts=-1, # will be 0
retry_wait_seconds=100,
- follow_redirects=False,
cache_write_only=False)
assert_equal(s.user_agent, 'secret-agent')
assert_equal(s.headers['test'], 'ok')
@@ -58,7 +57,6 @@ def test_fields():
assert s.timeout is None # 0 becomes None
assert s.retry_attempts == 0 # -1 becomes 0
assert s.retry_wait_seconds == 100
- assert s.follow_redirects == False
assert s.cache_write_only == False
@@ -171,24 +169,6 @@ def test_500():
assert_equal(500, resp.response.code)
-def test_follow_redirect():
- s = Scraper(requests_per_minute=0, follow_robots=False)
-
- redirect_url = HTTPBIN + 'redirect/1'
- final_url = HTTPBIN + 'get'
-
- resp = s.urlopen(redirect_url)
- assert_equal(final_url, resp.response.url)
- assert_equal(redirect_url, resp.response.requested_url)
- assert_equal(200, resp.response.code)
-
- s.follow_redirects = False
- resp = s.urlopen(redirect_url)
- assert_equal(redirect_url, resp.response.url)
- assert_equal(redirect_url, resp.response.requested_url)
- assert_equal(302, resp.response.code)
-
-
def test_caching():
cache_dir = tempfile.mkdtemp()
#s = Scraper(requests_per_minute=0, follow_robots=False,
Please sign in to comment.
Something went wrong with that request. Please try again.