Skip to content

Commit

Permalink
Remove:
Browse files Browse the repository at this point in the history
- Non-performant FrozenDict, which doesn't appear necessary based on testing
- HTTP proxy CONNECT verb support, which is breaking encapsulation in asyncio in a way that is causing RuntimeErrors in some versions and could only be used to pass data through that wpull cannot
intercept (e.g. TLS tunneling)
  • Loading branch information
falconkirtaran committed May 15, 2017
1 parent 582fd26 commit 5613807
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 123 deletions.
9 changes: 4 additions & 5 deletions wpull/document/htmlparse/html5lib_.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import io
import os.path

from wpull.collections import FrozenDict, EmptyFrozenDict
from wpull.document.htmlparse.base import BaseParser
from wpull.document.htmlparse.element import Comment, Doctype, Element

Expand Down Expand Up @@ -45,11 +44,11 @@ def parse(self, file, encoding=None):
buffer = None

if tail_buffer:
yield Element(tag, EmptyFrozenDict(), None, tail_buffer.getvalue(), True)
yield Element(tag, dict(), None, tail_buffer.getvalue(), True)
tail_buffer = None

tag = token['name']
attrib = FrozenDict(dict(token['data']))
attrib = dict(token['data'])
buffer = io.StringIO()

if token['name'] == 'script':
Expand All @@ -67,7 +66,7 @@ def parse(self, file, encoding=None):
buffer = None

if tail_buffer:
yield Element(tag, EmptyFrozenDict(), None, tail_buffer.getvalue(), True)
yield Element(tag, dict(), None, tail_buffer.getvalue(), True)
tail_buffer = None

tail_buffer = io.StringIO()
Expand All @@ -88,7 +87,7 @@ def parse(self, file, encoding=None):
buffer = None

if tail_buffer:
yield Element(tag, EmptyFrozenDict(), None, tail_buffer.getvalue(), True)
yield Element(tag, dict(), None, tail_buffer.getvalue(), True)
tail_buffer = None


Expand Down
9 changes: 4 additions & 5 deletions wpull/document/htmlparse/lxml_.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import lxml.html

from wpull.collections import EmptyFrozenDict, FrozenDict
from wpull.document.htmlparse.base import BaseParser
from wpull.document.htmlparse.element import Element, Comment
from wpull.document.xml import XMLDetector
Expand Down Expand Up @@ -37,15 +36,15 @@ def start(self, tag, attrib):

if self.tail_buffer:
self.callback(Element(
self.tag, EmptyFrozenDict(),
self.tag, dict(),
None,
self.tail_buffer.getvalue(),
True
))
self.tail_buffer = None

self.tag = tag
self.attrib = FrozenDict(attrib)
self.attrib = attrib
self.buffer = io.StringIO()

def data(self, data):
Expand All @@ -66,7 +65,7 @@ def end(self, tag):

if self.tail_buffer:
self.callback(Element(
self.tag, EmptyFrozenDict(),
self.tag, dict(),
None,
self.tail_buffer.getvalue(),
True
Expand All @@ -90,7 +89,7 @@ def close(self):

if self.tail_buffer:
self.callback(Element(
self.tag, EmptyFrozenDict(),
self.tag, dict(),
None,
self.tail_buffer.getvalue(),
True
Expand Down
114 changes: 1 addition & 113 deletions wpull/proxy/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ def __init__(self, http_client: Client, reader: asyncio.StreamReader, writer: as
self._http_client = http_client
self._reader = self._original_reader = reader
self._writer = self._original_writer = writer
self._is_tunnel = False
self._is_ssl_tunnel = False

self._cert_filename = wpull.util.get_package_filename('proxy/proxy.crt')
Expand Down Expand Up @@ -130,7 +129,7 @@ def _process_request(self, request: Request):
_logger.debug(__('Got request {0}', request))

if request.method == 'CONNECT':
yield from self._start_connect_tunnel()
self._reject_request('CONNECT is intentionally not supported')
return

if self._is_ssl_tunnel and request.url.startswith('http://'):
Expand Down Expand Up @@ -203,117 +202,6 @@ def _process_request(self, request: Request):

_logger.debug('Response done.')

@asyncio.coroutine
def _start_connect_tunnel(self):
if self._is_tunnel:
self._reject_request('Cannot CONNECT within CONNECT')
return

self._is_tunnel = True

original_socket = yield from self._detach_socket_and_start_tunnel()
is_ssl = yield from self._is_client_request_ssl(original_socket)

if is_ssl:
_logger.debug('Tunneling as SSL')
yield from self._start_ssl_tunnel()
else:
yield from self._rewrap_socket(original_socket)

@classmethod
@asyncio.coroutine
def _is_client_request_ssl(cls, socket_: socket.socket) -> bool:
while True:
original_timeout = socket_.gettimeout()
socket_.setblocking(False)

try:
data = socket_.recv(3, socket.MSG_PEEK)
except OSError as error:
if error.errno in (errno.EWOULDBLOCK, errno.EAGAIN):
yield from asyncio.sleep(0.01)
else:
raise
else:
break
finally:
socket_.settimeout(original_timeout)

_logger.debug('peeked data %s', data)
if all(ord('A') <= char_code <= ord('Z') for char_code in data):
return False
else:
return True

@asyncio.coroutine
def _start_ssl_tunnel(self):
'''Start SSL protocol on the socket.'''

self._is_ssl_tunnel = True
ssl_socket = yield from self._start_ssl_handshake()
yield from self._rewrap_socket(ssl_socket)

@asyncio.coroutine
def _detach_socket_and_start_tunnel(self) -> socket.socket:
socket_ = self._writer.get_extra_info('socket')

try:
asyncio.get_event_loop().remove_reader(socket_.fileno())
except ValueError as error:
raise ConnectionAbortedError() from error

self._writer.write(b'HTTP/1.1 200 Connection established\r\n\r\n')
yield from self._writer.drain()

try:
asyncio.get_event_loop().remove_writer(socket_.fileno())
except ValueError as error:
raise ConnectionAbortedError() from error

return socket_

@asyncio.coroutine
def _start_ssl_handshake(self):
socket_ = self._writer.get_extra_info('socket')

ssl_socket = ssl.wrap_socket(
socket_, server_side=True,
certfile=self._cert_filename,
keyfile=self._key_filename,
do_handshake_on_connect=False
)

# FIXME: this isn't how to START TLS
for dummy in range(1200):
try:
ssl_socket.do_handshake()
break
except ssl.SSLError as error:
if error.errno in (ssl.SSL_ERROR_WANT_READ, ssl.SSL_ERROR_WANT_WRITE):
_logger.debug('Do handshake %s', error)
yield from asyncio.sleep(0.05)
else:
raise
else:
_logger.error(_('Unable to handshake.'))
ssl_socket.close()
self._reject_request('Could not start TLS')
raise ConnectionAbortedError('Could not start TLS')

return ssl_socket

@asyncio.coroutine
def _rewrap_socket(self, new_socket):
loop = asyncio.get_event_loop()
reader = asyncio.StreamReader(loop=loop)
protocol = asyncio.StreamReaderProtocol(reader, loop=loop)
transport, dummy = yield from loop.create_connection(
lambda: protocol, sock=new_socket)
writer = asyncio.StreamWriter(transport, protocol, reader, loop)

self._reader = reader
self._writer = writer

@asyncio.coroutine
def _read_request_header(self) -> Request:
request = Request()
Expand Down

0 comments on commit 5613807

Please sign in to comment.