Skip to content

Commit

Permalink
Merge pull request #86 from PlaidWeb/feature/57-redirection-handling
Browse files Browse the repository at this point in the history
Handle IndieAuth profile URL redirection
  • Loading branch information
fluffy-critter committed Aug 19, 2020
2 parents 7aa6ef0 + 7a922a7 commit e5f08af
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 57 deletions.
29 changes: 22 additions & 7 deletions authl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ def add_handler(self, handler: handlers.Handler):
raise ValueError("Already have handler with id " + cb_id)
self._handlers[cb_id] = handler

def _match_url(self, url: str):
for hid, handler in self._handlers.items():
result = handler.handles_url(url)
if result:
LOGGER.debug("%s URL matches %s", url, handler)
return handler, hid, result
return None, None, None

def get_handler_for_url(self, url: str) -> typing.Tuple[typing.Optional[handlers.Handler],
str,
str]:
Expand All @@ -72,18 +80,25 @@ def get_handler_for_url(self, url: str) -> typing.Tuple[typing.Optional[handlers
if not url:
return None, '', ''

for hid, handler in self._handlers.items():
result = handler.handles_url(url)
if result:
LOGGER.debug("%s URL matches %s", url, handler)
return handler, hid, result
by_url = self._match_url(url)
if by_url[0]:
return by_url

request = utils.request_url(url)
if request:
profile = utils.permanent_url(request)
if profile != url:
LOGGER.debug("%s: got permanent redirect to %s", url, profile)
# the profile URL is different than the request URL, so re-run
# the URL matching logic just in case
by_url = self._match_url(profile)
if by_url[0]:
return by_url

soup = BeautifulSoup(request.text, 'html.parser')
for hid, handler in self._handlers.items():
if handler.handles_page(request.url, request.headers, soup, request.links):
LOGGER.debug("%s response matches %s", request.url, handler)
if handler.handles_page(profile, request.headers, soup, request.links):
LOGGER.debug("%s response matches %s", profile, handler)
return handler, hid, request.url

LOGGER.debug("No handler found for URL %s", url)
Expand Down
41 changes: 27 additions & 14 deletions authl/handlers/indieauth.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,19 @@

def find_endpoint(id_url: str,
links: typing.Dict = None,
content: BeautifulSoup = None) -> typing.Optional[str]:
content: BeautifulSoup = None) -> typing.Tuple[typing.Optional[str],
typing.Optional[str]]:
""" Given an identity URL, discover its IndieAuth endpoint
:param str id_url: an identity URL to check
:param links: a request.links object from a requests operation
:param BeautifulSoup content: a BeautifulSoup parse tree of an HTML document
:returns: a tuple of ``(endpoint_url, profile_url)``
"""
def _derive_endpoint(links, content):
profile = id_url

def _derive_endpoint(links, content) -> typing.Optional[str]:
LOGGER.debug('links for %s: %s', id_url, links)
if links and 'authorization_endpoint' in links:
LOGGER.debug("Found link header")
Expand All @@ -65,8 +70,8 @@ def _derive_endpoint(links, content):

# Get the cached endpoint value, but don't immediately use it if we have
# links and/or content, as it might have changed
cached = _ENDPOINT_CACHE.get(id_url)
LOGGER.debug("Cached endpoint for %s: %s", id_url, cached)
cached, profile = _ENDPOINT_CACHE.get(id_url, (None, profile))
LOGGER.debug("Cached endpoint for %s: %s %s", id_url, cached, profile)

found = (links or content) and _derive_endpoint(links, content)
if id_url and not found and not cached:
Expand All @@ -77,17 +82,19 @@ def _derive_endpoint(links, content):
links = request.links
content = BeautifulSoup(request.text, 'html.parser')
found = _derive_endpoint(links, content)
profile = utils.permanent_url(request)

if found and id_url:
# we found a new value so update the cache
LOGGER.debug("Caching %s -> %s", id_url, found)
_ENDPOINT_CACHE[id_url] = found
_ENDPOINT_CACHE[id_url] = (found, profile)
_ENDPOINT_CACHE[profile] = (found, profile)

# Let's also prefill the profile, while we're here
if content:
get_profile(id_url, content)
get_profile(profile, content)

return found or cached
return (found or cached), profile


def _parse_hcard(id_url, card):
Expand Down Expand Up @@ -174,10 +181,16 @@ def verify_id(request_id: str, response_id: str) -> str:
LOGGER.debug("netloc mismatch %s %s", orig.netloc, resp.netloc)
raise ValueError("Domain mismatch")

# If both pages have the same authorization-endpoint we assume they know
# what they are doing
req_endpoint = find_endpoint(request_id)
resp_endpoint = find_endpoint(response_id)
req_endpoint, _ = find_endpoint(request_id)
resp_endpoint, resp_profile = find_endpoint(response_id)

# Need to make sure the domains match with the final profile URLs too
resp = urllib.parse.urlparse(resp_profile) # type:ignore
if orig.netloc != resp.netloc:
LOGGER.debug("redirected netloc mismatch %s %s", orig.netloc, resp.netloc)
raise ValueError("Domain mismatch (profile redirection)")

# Both the original and final profile must have the same endpoint
LOGGER.debug('request endpoint=%s response endpoint=%s', req_endpoint, resp_endpoint)
if req_endpoint != resp_endpoint:
raise ValueError("Authorization endpoint mismatch")
Expand Down Expand Up @@ -242,15 +255,15 @@ def handles_url(self, url):
through to :py:func:`handles_page`.
"""
if url in _ENDPOINT_CACHE:
return url
return _ENDPOINT_CACHE[url][1]
return None

def handles_page(self, url, headers, content, links):
""" :returns: whether an ``authorization_endpoint`` was found on the page. """
return find_endpoint(url, links, content) is not None
return find_endpoint(url, links, content)[0] is not None

def initiate_auth(self, id_url, callback_uri, redir):
endpoint = find_endpoint(id_url)
endpoint, id_url = find_endpoint(id_url)
if not endpoint:
return disposition.Error("Failed to get IndieAuth endpoint", redir)

Expand Down
42 changes: 25 additions & 17 deletions authl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import logging
import os.path
import typing

import requests

Expand All @@ -19,26 +20,16 @@ def read_icon(filename):
return read_file(os.path.join(os.path.dirname(__file__), 'icons', filename))


def request_url(url):
def request_url(url: str) -> typing.Optional[requests.Response]:
""" Requests a URL, attempting to canonicize it as it goes """
# pylint:disable=broad-except

try:
return requests.get(url)
except requests.exceptions.MissingSchema:
LOGGER.info("Missing schema on URL %s", url)
except (requests.exceptions.InvalidSchema, requests.exceptions.InvalidURL):
LOGGER.info("Not a valid URL scheme: %s", url)
return None
except Exception as err:
LOGGER.info("%s failed: %s", url, err)

for prefix in ('https://', 'http://'):

for prefix in ('', 'https://', 'http://'):
attempt = prefix + url
try:
attempt = prefix + url
LOGGER.debug("attempting %s", attempt)
return requests.get(attempt)
except Exception as err:
except requests.exceptions.MissingSchema:
LOGGER.info("Missing schema on URL %s", attempt)
except Exception as err: # pylint:disable=broad-except
LOGGER.info("%s failed: %s", attempt, err)

return None
Expand All @@ -49,3 +40,20 @@ def resolve_value(val):
if callable(val):
return val()
return val


def permanent_url(response: requests.Response) -> str:
""" Given a requests.Response object, determine what the permanent URL
for it is from the response history """

for item in response.history:
if item.status_code in (301, 308):
# permanent redirect means we continue on to the next URL in the
# redirection change
continue
# Any other status code is assumed to be a temporary redirect, so this
# is the last permanent URL
return item.url

# Last history item was a permanent redirect, or there was no history
return response.url
78 changes: 59 additions & 19 deletions tests/handlers/test_indieauth.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,60 +28,86 @@ def test_find_endpoint_by_url(requests_mock):
requests_mock.get('http://link.absolute/', text='Nothing to see',
headers={'Link': '<https://endpoint/>; rel="authorization_endpoint"'})

assert find_endpoint('http://link.absolute/') == 'https://endpoint/'
assert find_endpoint('http://link.absolute/')[0] == 'https://endpoint/'

requests_mock.get('http://link.relative/', text='Nothing to see',
headers={'Link': '<invalid>; rel="authorization_endpoint"'})
assert find_endpoint('http://link.relative/') == 'invalid'
assert find_endpoint('http://link.relative/')[0] == 'invalid'

requests_mock.get('http://content.absolute/',
text='<link rel="authorization_endpoint" href="https://endpoint/">')
assert find_endpoint('http://content.absolute/') == 'https://endpoint/'
assert find_endpoint('http://content.absolute/')[0] == 'https://endpoint/'

requests_mock.get('http://content.relative/',
text='<link rel="authorization_endpoint" href="endpoint" >')
assert find_endpoint('http://content.relative/') == 'http://content.relative/endpoint'
assert find_endpoint('http://content.relative/')[0] == 'http://content.relative/endpoint'

requests_mock.get('http://both/',
text='<link rel="authorization_endpoint" href="http://content/endpoint">',
headers={'Link': '<https://header/endpoint/>; rel="authorization_endpoint"'}
)
assert find_endpoint('http://both/') == 'https://header/endpoint/'
assert find_endpoint('http://both/')[0] == 'https://header/endpoint/'

requests_mock.get('http://nothing/', text='nothing')
assert find_endpoint('http://nothing/') is None
assert find_endpoint('http://nothing/')[0] is None

assert find_endpoint('https://undefined.example') is None
assert find_endpoint('https://undefined.example')[0] is None

# test the caching
requests_mock.reset()
assert find_endpoint('http://link.absolute/') == 'https://endpoint/'
assert find_endpoint('http://link.relative/') == 'invalid'
assert find_endpoint('http://content.absolute/') == 'https://endpoint/'
assert find_endpoint('http://content.relative/') == 'http://content.relative/endpoint'
assert find_endpoint('http://link.absolute/')[0] == 'https://endpoint/'
assert find_endpoint('http://link.relative/')[0] == 'invalid'
assert find_endpoint('http://content.absolute/')[0] == 'https://endpoint/'
assert find_endpoint('http://content.relative/')[0] == 'http://content.relative/endpoint'
assert not requests_mock.called

# but a failed lookup shouldn't be cached
assert find_endpoint('http://nothing/') is None
assert find_endpoint('http://nothing/')[0] is None
assert requests_mock.called


def test_find_endpoint_redirections(requests_mock):
from authl.handlers.indieauth import find_endpoint
# test that redirections get handled correctly
requests_mock.get('http://start/', status_code=301,
headers={'Location': 'http://perm-redirect/'})
requests_mock.get('http://perm-redirect/', status_code=302,
headers={'Location': 'http://temp-redirect/'})
requests_mock.get('http://temp-redirect/',
text='<link rel="authorization_endpoint" href="https://foobar/">')

# final URL should be the last permanent redirection
assert find_endpoint('http://start/') == ('https://foobar/', 'http://perm-redirect/')
assert requests_mock.call_count == 3

# endpoint should be cached for both the initial and permanent-redirect URLs
assert find_endpoint('http://start/') == ('https://foobar/', 'http://perm-redirect/')
assert find_endpoint(
'http://perm-redirect/') == ('https://foobar/', 'http://perm-redirect/')
assert requests_mock.call_count == 3


def test_find_endpoint_by_content(requests_mock):
from authl.handlers.indieauth import find_endpoint

links = {'authorization_endpoint': {'url': 'http://link_endpoint'}}
rel_content = BeautifulSoup('<link rel="authorization_endpoint" href="foo">',
'html.parser')
abs_content = BeautifulSoup('<link rel="authorization_endpoint" href="http://foo/">',
'html.parser')

assert indieauth.find_endpoint('http://example', links=links) == 'http://link_endpoint'
assert indieauth.find_endpoint('http://example',
content=rel_content) == 'http://example/foo'
assert indieauth.find_endpoint('http://example', content=abs_content) == 'http://foo/'
assert find_endpoint('http://example', links=links)[0] == 'http://link_endpoint'
assert find_endpoint('http://example',
content=rel_content)[0] == 'http://example/foo'
assert find_endpoint('http://example', content=abs_content)[0] == 'http://foo/'

# link header overrules page content
assert indieauth.find_endpoint('http://example',
links=links,
content=rel_content) == 'http://link_endpoint'
assert find_endpoint('http://example',
links=links,
content=rel_content)[0] == 'http://link_endpoint'

# final result should be cached
assert find_endpoint('http://example')[0] == 'http://link_endpoint'

assert not requests_mock.called

Expand Down Expand Up @@ -117,6 +143,20 @@ def test_verify_id(requests_mock):
requests_mock.get('https://upgrade.example', headers=endpoint_2)
assert indieauth.verify_id('http://upgrade.example', 'https://upgrade.example')

# redirect is fine as long as the domain matches
requests_mock.get('https://redir.example/user', headers=endpoint_1)
requests_mock.get('https://redir.example/me', status_code=301,
headers={'Location': 'https://redir.example/target'})
requests_mock.get('https://redir.example/target', headers=endpoint_1)
assert indieauth.verify_id('https://redir.example/user', 'https://redir.example/me')

# redirect is NOT fine if the domain changes
requests_mock.get('https://redir.example/offsite', status_code=301,
headers={'Location': 'https://redir.target/'})
requests_mock.get('https://redir.target/', headers=endpoint_1)
with pytest.raises(ValueError):
indieauth.verify_id('https://redir.example/user', 'https://redir.example/offsite')


def test_handler_success(requests_mock):
store = {}
Expand Down
11 changes: 11 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,14 @@ def test_from_config(mocker):

mock_test_handler.assert_called_once()
assert mock_test_handler.call_args == (())


def test_redir_url(requests_mock):
""" Ensure that redirected profile pages match URL rules for the redirect """
requests_mock.get('http://foo', status_code=301, headers={'Location': 'http://bar'})
requests_mock.get('http://bar', text='blah')
handler = UrlHandler('http://bar', 'foo')
instance = Authl([handler])

assert instance.get_handler_for_url('http://foo') == \
(handler, 'foo', 'http://bar')
26 changes: 26 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# pylint:disable=missing-docstring


import requests

from authl import utils


Expand All @@ -28,3 +30,27 @@ def moo():
return 5
assert utils.resolve_value(moo) == 5
assert utils.resolve_value(10) == 10


def test_permanent_url(requests_mock):
requests_mock.get('http://make-secure.example', status_code=301,
headers={'Location': 'https://make-secure.example'})
requests_mock.get('https://make-secure.example', status_code=302,
headers={'Location': 'https://make-secure.example/final'})
requests_mock.get('https://make-secure.example/final', text="you made it!")

# this redirects permanent to https, which redirects temporary to /final
req = requests.get('http://make-secure.example')
assert utils.permanent_url(req) == 'https://make-secure.example'

# direct request to /final should remain /final
req = requests.get('https://make-secure.example/final')
assert utils.permanent_url(req) == 'https://make-secure.example/final'

# ensure 308 redirect works too
requests_mock.get('http://perm-308.example', status_code=308,
headers={'Location': 'https://make-secure.example/308'})
requests_mock.get('https://make-secure.example/308', status_code=401)

req = requests.get('http://perm-308.example')
assert utils.permanent_url(req) == 'https://make-secure.example/308'

0 comments on commit e5f08af

Please sign in to comment.