Skip to content

Commit

Permalink
almost completed handling of follow redirects in client
Browse files Browse the repository at this point in the history
  • Loading branch information
RobertoPrevato committed Dec 24, 2018
1 parent 7cb7299 commit c422604
Show file tree
Hide file tree
Showing 18 changed files with 300 additions and 65 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,5 @@ tests/out/

blacksheep/*.c
blacksheep/*.html
blacksheep/includes/*.html
blacksheep/includes/*.html
out/*
2 changes: 1 addition & 1 deletion blacksheep/client/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .session import HttpClient
from .session import ClientSession
1 change: 0 additions & 1 deletion blacksheep/client/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ def on_header(self, name, value):
self.headers.append(HttpHeader(name, value))

def on_headers_complete(self):
# cdef HttpResponse response
status = self.parser.get_status_code()
response = HttpResponse(
status,
Expand Down
4 changes: 2 additions & 2 deletions blacksheep/client/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def _get_connection(self):
connection = self._idle_connections.get_nowait() # type: HttpConnection

if connection.open:
print(f'Reusing connection {id(connection)}')
# print(f'Reusing connection {id(connection)}')
return connection

def try_return_connection(self, connection):
Expand All @@ -52,7 +52,7 @@ async def get_connection(self):
return await self.create_connection()

async def create_connection(self):
print(f'[*] creating connection for: {self.host}:{self.port}')
# print(f'[*] creating connection for: {self.host}:{self.port}')
transport, connection = await self.loop.create_connection(
lambda: HttpConnection(self.loop, self),
self.host,
Expand Down
208 changes: 181 additions & 27 deletions blacksheep/client/session.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,29 @@
import time
import asyncio
from .pool import HttpConnectionPool, HttpConnectionPools
from blacksheep import HttpRequest, HttpResponse, HttpHeaders, URL
from typing import List, Optional, Union, Type, Any
from .pool import HttpConnectionPools
from blacksheep import (HttpRequest,
HttpResponse,
HttpContent,
HttpHeaders,
HttpHeader,
URL)


URLType = Union[str, bytes, URL]


class InvalidResponseException(Exception):

def __init__(self, message, response):
super().__init__(message)
self.response = response


class MissingLocationForRedirect(InvalidResponseException):

def __init__(self, response):
super().__init__(f'The server returned a redirect status ({response.status}) '
f'but didn`t send a "Location" header', response)


class HttpRequestException(Exception):
Expand All @@ -11,28 +33,66 @@ def __init__(self, message, allow_retry):
self.can_retry = allow_retry


class HttpClient:
class RedirectsCache:
"""Used to store permanent redirects urls for later reuse"""

__slots__ = ('_cache',)

def __init__(self):
self._cache = {}

def store_redirect(self, source, destination):
self._cache[source] = destination

def __setitem__(self, key, value):
self._cache[key] = value

def __getitem__(self, item):
try:
return self._cache[item]
except KeyError:
return None

def __contains__(self, item):
return item in self._cache


class ClientSession:

def __init__(self,
loop=None,
url=None,
ssl=None,
pools=None,
default_headers=None):
default_headers: Optional[List[HttpHeader]] = None,
follow_redirects: bool = True,
redirects_cache_type: Union[Type[RedirectsCache], Any] = None):
if loop is None:
loop = asyncio.get_event_loop()

if url and not isinstance(url, URL):
url = URL(url)

if not pools:
pools = HttpConnectionPools(loop)

if redirects_cache_type is None and follow_redirects:
redirects_cache_type = RedirectsCache

self.loop = loop
self.base_url = url
self.ssl = ssl
self.default_headers = default_headers
self.default_headers = HttpHeaders(default_headers)
self.pools = pools
self.connection_timeout = 3.0
self.request_timeout = 6660.0 # TODO: put in settings class
self.request_timeout = 60.0
self.follow_redirects = follow_redirects
self._permanent_redirects_urls = redirects_cache_type() if follow_redirects else None
self.non_standard_handling_of_301_302_redirect_method = True

def use_standard_redirect(self):
"""Uses specification compliant handling of 301 and 302 redirects"""
self.non_standard_handling_of_301_302_redirect_method = False

def get_headers(self):
if not self.default_headers:
Expand Down Expand Up @@ -63,45 +123,139 @@ async def __aexit__(self, exc_type, exc_val, exc_tb):
async def close(self):
pass

@staticmethod
def extract_redirect_location(response: HttpResponse):
location = response.headers[b'Location']
if not location:
raise MissingLocationForRedirect(response)
# if the server returned more than one value, use the last header in order
return location[-1].value

@staticmethod
def get_redirect_url(request: HttpRequest, location: URL):
if location.is_absolute:
return location
return request.url.base_url().join(location)

def update_request_for_redirect(self, request: HttpRequest, response: HttpResponse):
status = response.status

if status == 301 or status == 302:
if self.non_standard_handling_of_301_302_redirect_method:
# Change original request method to GET (Browser-like)
request.method = b'GET'

if status == 303:
# 303 See Other
# Change original request method to GET
request.method = b'GET'

location = self.extract_redirect_location(response)
redirect_url = self.get_redirect_url(request, URL(location))

if status == 301 or status == 308:
self._permanent_redirects_urls[request.url.value] = redirect_url

request.url = redirect_url

def check_redirected_url(self, request):
if self.follow_redirects and request.url.value in self._permanent_redirects_urls:
request.url = self._permanent_redirects_urls[request.url.value]

async def send(self, request: HttpRequest):
# TODO: store request context (such as number of redirects, and to which page it redirected)
# validate max number of redirects

request.headers += self.get_headers()
return await self._send(request)

async def _send(self, request: HttpRequest):
self.check_redirected_url(request)

# TODO: while True (with timeout?)
url = request.url
pool = self.pools.get_pool(url.schema, url.host, url.port, self.ssl)

connection = await asyncio.wait_for(pool.get_connection(),
self.connection_timeout,
loop=self.loop)


# TODO: weak reference to get_connection tasks and connection.send tasks
# TODO: store connections in use and pending operations, to dispose them when the client is closed
# TODO: test what happens if the connection is closed at this point, while sending the request

response = await asyncio.wait_for(connection.send(request),
self.request_timeout,
loop=self.loop)

# TODO: should close the connection? (did the server return Connection: Close?)
# TODO: follow redirects?
# TODO: detect circular redirects, and applies a maximum number of redirects
if self.follow_redirects and response.is_redirect():
self.update_request_for_redirect(request, response)
return await self._send(request)

return response

async def get(self, url, headers=None):
return await self.send(HttpRequest(b'GET', self.get_url(url), HttpHeaders(headers), None))
async def get(self,
url: URLType,
headers: Optional[List[HttpHeader]] = None):
return await self.send(HttpRequest(b'GET',
self.get_url(url),
HttpHeaders(headers), None))

async def post(self, url, content, headers=None):
return await self.send(HttpRequest(b'POST', self.get_url(url), HttpHeaders(headers), content))
async def post(self,
url: URLType,
content: HttpContent = None,
headers: Optional[List[HttpHeader]] = None):
return await self.send(HttpRequest(b'POST',
self.get_url(url),
HttpHeaders(headers), content))

async def put(self, url, content, headers=None):
return await self.send(HttpRequest(b'PUT', self.get_url(url), HttpHeaders(headers), content))
async def put(self,
url: URLType,
content: HttpContent = None,
headers: Optional[List[HttpHeader]] = None):
return await self.send(HttpRequest(b'PUT',
self.get_url(url),
HttpHeaders(headers), content))

async def delete(self, url, content=None, headers=None):
return await self.send(HttpRequest(b'DELETE', self.get_url(url), HttpHeaders(headers), content))
async def delete(self,
url: URLType,
content: HttpContent = None,
headers: Optional[List[HttpHeader]] = None):
return await self.send(HttpRequest(b'DELETE',
self.get_url(url),
HttpHeaders(headers),
content))

async def trace(self, url, headers=None):
return await self.send(HttpRequest(b'TRACE', self.get_url(url), HttpHeaders(headers), None))
async def trace(self,
url: URLType,
headers: Optional[List[HttpHeader]] = None):
return await self.send(HttpRequest(b'TRACE',
self.get_url(url),
HttpHeaders(headers),
None))

async def head(self, url, headers=None):
return await self.send(HttpRequest(b'HEAD', self.get_url(url), HttpHeaders(headers), None))
async def head(self,
url: URLType,
headers: Optional[List[HttpHeader]] = None):
return await self.send(HttpRequest(b'HEAD',
self.get_url(url),
HttpHeaders(headers),
None))

async def patch(self, url, content, headers=None):
return await self.send(HttpRequest(b'PATCH', self.get_url(url), HttpHeaders(headers), content))
async def patch(self,
url: URLType,
content: HttpContent = None,
headers: Optional[List[HttpHeader]] = None):
return await self.send(HttpRequest(b'PATCH',
self.get_url(url),
HttpHeaders(headers),
content))

async def options(self, url, content, headers=None):
return await self.send(HttpRequest(b'OPTIONS', self.get_url(url), HttpHeaders(headers), content))
async def options(self,
url: URLType,
content: HttpContent = None,
headers: Optional[List[HttpHeader]] = None):
return await self.send(HttpRequest(b'OPTIONS',
self.get_url(url),
HttpHeaders(headers),
content))
11 changes: 11 additions & 0 deletions blacksheep/connection.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ cdef class ServerConnection:
# ignore: this can happen for example if a client posts a big request to a wrong URL;
# we return 404 immediately; but the client sends more chunks; http-parser.c throws exception
# in this case
# TODO: 1) see below; a possible solution is to reset the connection only when the client is done sending
# a message - this way we don't need to ignore errors in the client HTTP request format
pass

cpdef str get_client_ip(self):
Expand Down Expand Up @@ -124,6 +126,15 @@ cdef class ServerConnection:
self.request = request
self.loop.create_task(self.handle_request(request))

async def reset_when_request_completed(self):
# TODO: to avoid the problem at point 1). use this function
# however, measure if resolving that problem impacts performance in a negative way
await self.request_complete.wait()

if not self.parser.should_keep_alive():
self.close()
self.reset()

cpdef void on_url(self, bytes url):
self.url = url
self.method = self.parser.get_method()
Expand Down
5 changes: 0 additions & 5 deletions blacksheep/headers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,3 @@ cdef class HttpHeaders:
return values[-1]
return values[0] if values else None

@classmethod
def from_param(cls, param: Union[None, 'HttpHeaders', List[HttpHeader], Dict[bytes, bytes]]):
if param is None:
return cls()
return cls(param)
5 changes: 4 additions & 1 deletion blacksheep/includes/consts.pxi
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
DEF MAX_RESPONSE_CHUNK_SIZE = 61440 # 64kb
DEF MAX_REQUEST_HEADERS_COUNT = 80
DEF MAX_REQUEST_HEADER_SIZE = 8192 # 8kb
DEF MAX_REQUEST_HEADER_SIZE = 8192 # 8kb

# 61440 # 64kb
# 16384
4 changes: 3 additions & 1 deletion blacksheep/messages.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ cdef class HttpRequest(HttpMessage):
cdef public bint active
cdef public dict route_values
cdef readonly bytes raw_url
cdef readonly URL url
cdef public URL url
cdef public bytes method
cdef public str client_ip
cdef dict __dict__
Expand All @@ -38,3 +38,5 @@ cdef class HttpResponse(HttpMessage):
cdef public int status
cdef public bint active
cdef dict __dict__

cpdef bint is_redirect(self)
3 changes: 3 additions & 0 deletions blacksheep/messages.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -245,3 +245,6 @@ cdef class HttpResponse(HttpMessage):

def remove_cookie(self, name):
del self.cookies[name]

cpdef bint is_redirect(self):
return self.status in {301, 302, 303, 307, 308}
4 changes: 2 additions & 2 deletions blacksheep/server/files/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,6 @@ def get_response_for_file(request, resource_path, cache_time):
return HttpResponse(200, headers, None)

return HttpResponse(200,
HttpHeaders.from_param(headers),
HttpHeaders(headers),
HttpContent(get_mime_type(resource_path),
get_file_data(resource_path, file_size)))
get_file_data(resource_path, file_size)))
1 change: 1 addition & 0 deletions blacksheep/url.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ cdef class URL:
cdef readonly bint is_absolute

cpdef URL join(self, URL other)
cpdef URL base_url(self)
Loading

0 comments on commit c422604

Please sign in to comment.