Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
Conflicts:
	wpull/version.py
  • Loading branch information
chfoo committed Feb 9, 2015
2 parents 5ab01d8 + e47fac8 commit 5520e76
Show file tree
Hide file tree
Showing 21 changed files with 285 additions and 41 deletions.
10 changes: 10 additions & 0 deletions doc/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@
What's New
==========

0.1006.1 (2015-02-09)
=====================

* Security: Fixed ``Referer`` HTTP header field leaking from HTTPS to HTTP.
* Fixed ``AttributeError`` in proxy when using PhantomJS and ``pre_response`` scripting hook.
* Fixed early program end when server returns error fetching robots.txt.
* Fixed uninteresting errors outputted if program is forcefully closed.
* Fixed ``--referer`` option not applied to subsequent requests.


0.1006 (2015-02-01)
===================

Expand Down
6 changes: 6 additions & 0 deletions wpull/abstract/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ class DictableMixin(object):
def to_dict(self):
'''Convert to a dict suitable for JSON.'''

@classmethod
def call_to_dict_or_none(cls, instance):
'''Call ``to_dict`` or return ``None``.'''
if hasattr(instance, 'to_dict'):
return instance.to_dict()


class SerializableMixin(object):
'''Serialize and unserialize methods.'''
Expand Down
2 changes: 2 additions & 0 deletions wpull/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def graceful_stop_callback():

def forceful_stop_callback():
_logger.info(_('Forcing immediate stop...'))
logging.raiseExceptions = False
self._event_loop.stop()

self._event_loop.add_signal_handler(signal.SIGINT,
Expand Down Expand Up @@ -266,6 +267,7 @@ def _start_servers(self):
Coroutine.
'''
for task in self._server_tasks:
_logger.debug(__('Starting task {}', task))
server = yield From(task)
self._servers.append(server)

Expand Down
45 changes: 43 additions & 2 deletions wpull/app_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@ def test_immediate_robots_forbidden(self):
def test_immediate_robots_error(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([
'http://mordor.invalid:1',
'http://127.0.0.1:1',
self.get_url('/'),
'--recursive',
'--tries', '1',
Expand Down Expand Up @@ -1053,6 +1053,42 @@ def test_strip_session_id(self):
self.assertEqual(0, exit_code)
self.assertEqual(1, builder.factory['Statistics'].files)

@wpull.testing.async.async_test(timeout=DEFAULT_TIMEOUT)
def test_referer_option(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([
self.get_url('/referrer/'),
'-r',
'--referer', 'http://left.shark/'
])
builder = Builder(args, unit_test=True)

with cd_tempdir():
app = builder.build()
exit_code = yield From(app.run())

self.assertEqual(0, exit_code)
self.assertEqual(2, builder.factory['Statistics'].files)

@wpull.testing.async.async_test(timeout=DEFAULT_TIMEOUT)
def test_referer_option_negative(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([
self.get_url('/referrer/'),
'-r',
'--referer', 'http://superinformation.highway/',
'--tries', '1',
'--waitretry', '.1'
])
builder = Builder(args, unit_test=True)

with cd_tempdir():
app = builder.build()
exit_code = yield From(app.run())

self.assertEqual(0, exit_code)
self.assertEqual(0, builder.factory['Statistics'].files)

@unittest.skip('not a good idea to test continuously on external servers')
@wpull.testing.async.async_test(timeout=DEFAULT_TIMEOUT)
def test_youtube_dl(self):
Expand Down Expand Up @@ -1241,7 +1277,12 @@ def test_app_phantomjs(self):
self.assertEqual(0, exit_code)
self.assertGreaterEqual(builder.factory['Statistics'].files, 1)

@wpull.testing.async.async_test(timeout=DEFAULT_TIMEOUT)
# FIXME: for some reason, it never makes a connection to the proxy under
# PyPy and Travis CI. eg: https://travis-ci.org/chfoo/wpull/jobs/49829901
@unittest.skipIf(IS_PYPY, 'Broken under Travis CI')
@wpull.testing.async.async_test(
timeout=DEFAULT_TIMEOUT * 3 if IS_PYPY else DEFAULT_TIMEOUT
)
def test_app_phantomjs_scroll(self):
arg_parser = AppArgumentParser()

Expand Down
5 changes: 3 additions & 2 deletions wpull/dns.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from wpull.cache import FIFOCache
from wpull.errors import DNSNotFound, NetworkError
from wpull.hook import HookableMixin, HookDisconnected
import wpull.util


_logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -301,10 +302,10 @@ def _query(self, host, query_type):
# class name instead.
raise DNSNotFound(
'DNS resolution failed: {error}'
.format(error=error.__class__.__name__)
.format(error=wpull.util.get_exception_message(error))
) from error
except dns.exception.DNSException as error:
raise NetworkError(
'DNS resolution error: {error}'
.format(error=error.__class.__.__name__)
.format(error=wpull.util.get_exception_message(error))
) from error
4 changes: 2 additions & 2 deletions wpull/driver/PhantomJS.hx
Original file line number Diff line number Diff line change
Expand Up @@ -292,8 +292,8 @@ class PhantomJS {
var url:String = Reflect.field(config, "url");

trace('Load URL $url.');
page.open(url, function () {
trace("Page loaded!");
page.open(url, function (status) {
trace('Page loaded! $status.');
pageLoaded = true;
});
// For PhantomJS, we need to poll so that the callback isn't in
Expand Down
4 changes: 2 additions & 2 deletions wpull/driver/phantomjs.js
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ PhantomJS.prototype = {
var _g = this;
var url = Reflect.field(this.config,"url");
console.log("Load URL " + url + ".");
this.page.open(url,function() {
console.log("Page loaded!");
this.page.open(url,function(status) {
console.log("Page loaded! " + status + ".");
_g.pageLoaded = true;
});
this.pollPageLoad();
Expand Down
17 changes: 7 additions & 10 deletions wpull/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,53 +4,50 @@

class ServerError(ValueError):
'''Server issued an error.'''
pass


class ProtocolError(ValueError):
'''A protocol was not followed.'''
pass


class SSLVerificationError(OSError):
'''A problem occurred validating SSL certificates.'''
pass


SSLVerficationError = SSLVerificationError


class NetworkError(OSError):
'''A networking error.'''
pass


class ConnectionRefused(NetworkError):
'''Server was online, but nothing was being served.'''
pass


class DNSNotFound(NetworkError):
'''Server's IP address could not be located.'''
pass


class NetworkTimedOut(NetworkError):
'''Connection read/write timed out.'''
pass


# TODO: use AuthenticationError


class ExitStatus(object):
'''Program exit status codes.
Attributes:
generic_error (1): A serious error occurred.
parser_error (2): A document failed to parse.
generic_error (1): An unclassified serious or fatal error occurred.
parser_error (2): A local document or configuration file could not
be parsed.
file_io_error (3): A problem with reading/writing a file occurred.
network_failure (4): A problem with the network occurred such as a DNS
resolver error or a connection was refused.
ssl_verification_error (5): A server's SSL/TLS certificate was invalid.
authentication_failure (7): A problem with a username or password.
authentication_failure (6): A problem with a username or password.
protocol_error (7): A problem with communicating with a server
occurred.
server_error (8): The server had problems fulfilling our requests.
Expand Down
12 changes: 6 additions & 6 deletions wpull/ftp/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ class Response(DictableMixin, ProtocolResponseMixin):
Attributes:
request (:class:`Request`): The corresponding request.
body (:class:`.body.Body`): The file.
body (:class:`.body.Body` or a file-like): The file.
reply (:class:`Reply`): The latest Reply.
file_transfer_size (int): Size of the file transfer without
considering restart. (REST is issued last.)
Expand All @@ -175,11 +175,11 @@ def protocol(self):
def to_dict(self):
return {
'protocol': 'ftp',
'request': self.request.to_dict(),
'body': self.body.to_dict() if self.body else None,
'reply': self.reply.to_dict(),
'response_code': self.reply.code,
'response_message': self.reply.text,
'request': self.request.to_dict() if self.request else None,
'body': self.call_to_dict_or_none(self.body),
'reply': self.reply.to_dict() if self.reply else None,
'response_code': self.reply.code if self.reply else None,
'response_message': self.reply.text if self.reply else None,
'file_transfer_size': self.file_transfer_size,
'restart_value': self.restart_value,
}
Expand Down
15 changes: 15 additions & 0 deletions wpull/ftp/request_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import unittest
from wpull.body import Body
from wpull.ftp.request import Reply, Command, Request, Response


Expand Down Expand Up @@ -93,3 +94,17 @@ def test_to_dict(self):
self.assertEqual(200, response_dict['response_code'])
self.assertEqual('Success', response_dict['reply']['text'])
self.assertEqual('Success', response_dict['response_message'])

def test_to_dict_body(self):
response = Response()
response.body = Body()
response_dict = response.to_dict()

self.assertTrue(response_dict['body'])
response.body.close()

response = Response()
response.body = NotImplemented
response_dict = response.to_dict()

self.assertFalse(response_dict['body'])
8 changes: 4 additions & 4 deletions wpull/http/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class RawRequest(SerializableMixin, DictableMixin):
``HTTP/1.0``.
fields (:class:`.namevalue.NameValueRecord`): The fields in
the HTTP header.
body (:class:`.conversation.Body`): An optional payload.
body (:class:`.body.Body` or file-like): An optional payload.
encoding (str): The encoding of the status line.
'''
def __init__(self, method=None, resource_path=None, version='HTTP/1.1'):
Expand All @@ -40,7 +40,7 @@ def to_dict(self):
'version': self.version,
'resource_path': self.resource_path,
'fields': list(self.fields.get_all()),
'body': self.body.to_dict() if self.body else None,
'body': self.call_to_dict_or_none(self.body),
'encoding': self.encoding,
}

Expand Down Expand Up @@ -168,7 +168,7 @@ class Response(SerializableMixin, DictableMixin, ProtocolResponseMixin):
``HTTP/1.1``.
fields (:class:`.namevalue.NameValueRecord`): The fields in
the HTTP headers (and trailer, if present).
body (:class:`.conversation.Body`): The optional payload (without
body (:class:`.body.Body` or file-like): The optional payload (without
and transfer or content encoding).
request: The corresponding request.
encoding (str): The encoding of the status line.
Expand Down Expand Up @@ -200,7 +200,7 @@ def to_dict(self):
'response_message': self.reason,
'version': self.version,
'fields': list(self.fields.get_all()),
'body': self.body.to_dict() if self.body else None,
'body': self.call_to_dict_or_none(self.body),
'request': self.request.to_dict() if self.request else None,
'encoding': self.encoding,
}
Expand Down
28 changes: 28 additions & 0 deletions wpull/http/request_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import copy
import unittest
from wpull.body import Body

from wpull.errors import ProtocolError
from wpull.http.request import Request, Response
Expand Down Expand Up @@ -182,3 +183,30 @@ def test_to_dict(self):
self.assertEqual(200, response_dict['response_code'])
self.assertEqual('OK', response_dict['reason'])
self.assertEqual('OK', response_dict['response_message'])

def test_to_dict_body(self):
request = Request()
request.body = Body()
request_dict = request.to_dict()

self.assertTrue(request_dict['body'])
request.body.close()

request = Request()
request.body = NotImplemented
request_dict = request.to_dict()

self.assertFalse(request_dict['body'])

response = Response()
response.body = Body()
response_dict = response.to_dict()

self.assertTrue(response_dict['body'])
response.body.close()

response = Response()
response.body = NotImplemented
response_dict = response.to_dict()

self.assertFalse(response_dict['body'])
11 changes: 11 additions & 0 deletions wpull/processor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,23 @@
import trollius

from wpull.backport.logging import BraceMessage as __
from wpull.errors import ServerError, ProtocolError, SSLVerificationError, \
NetworkError


_logger = logging.getLogger(__name__)
_ = gettext.gettext


REMOTE_ERRORS = (
ServerError,
ProtocolError,
SSLVerificationError,
NetworkError,
)
'''List of error classes that are errors that occur with a server.'''


class BaseProcessor(object, metaclass=abc.ABCMeta):
'''Base class for processors.
Expand Down
5 changes: 3 additions & 2 deletions wpull/processor/ftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
SSLVerificationError
from wpull.ftp.request import Request, ListingResponse, Response
from wpull.hook import Actions
from wpull.processor.base import BaseProcessor, BaseProcessorSession
from wpull.processor.base import BaseProcessor, BaseProcessorSession, \
REMOTE_ERRORS
from wpull.processor.rule import ResultRule, FetchRule
from wpull.scraper.util import urljoin_safe
from wpull.url import parse_url_or_log
Expand Down Expand Up @@ -198,7 +199,7 @@ def _fetch(self, request):
if response:
response.body.close()

except (NetworkError, ProtocolError, ServerError, SSLVerificationError) as error:
except REMOTE_ERRORS as error:
self._log_error(request, error)

action = self._result_rule.handle_error(
Expand Down
Loading

0 comments on commit 5520e76

Please sign in to comment.