Skip to content

Commit

Permalink
Merge pull request #76 from PlaidWeb/feature/63-profiles
Browse files Browse the repository at this point in the history
Add profile parsing to all the applicable handlers
  • Loading branch information
fluffy-critter committed Jul 30, 2020
2 parents b03c070 + 8874e3c commit e4d7afe
Show file tree
Hide file tree
Showing 12 changed files with 271 additions and 23 deletions.
9 changes: 8 additions & 1 deletion authl/disposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,14 @@ class Verified(Disposition):
:param str identity: The verified identity URL
:param str redir: Where to redirect the user to
:param dict profile: The user's profile information
:param dict profile: The user's profile information. Standardized keys:
* ``avatar``: A URL to the user's avatar image
* ``bio``: Brief biographical information
* ``homepage``: The user's personal homepage
* ``location``: The user's stated location
* ``name``: The user's display/familiar name
* ``pronouns``: The user's declared pronouns
"""

Expand Down
4 changes: 2 additions & 2 deletions authl/handlers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ def check_callback(self, url: str, get: dict, data: dict) -> disposition.Disposi
:param dict data: the POST parameters for the verification
:returns: a :py:mod:`authl.disposition` object to be handled by the
frontend. Any errors which get raised internally should be caught and
returned as an appropriate :py:class:`authl.disposition.Error`.
frontend. Any errors which get raised internally should be caught and
returned as an appropriate :py:class:`authl.disposition.Error`.
"""

Expand Down
2 changes: 1 addition & 1 deletion authl/handlers/email_addr.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def check_callback(self, url, get, data):
if time.time() > when + self._lifetime:
return disposition.Error("Login timed out", redir)

return disposition.Verified('mailto:' + email_addr, redir)
return disposition.Verified('mailto:' + email_addr, redir, {'email': email_addr})


def smtplib_connector(hostname, port, username=None, password=None, use_ssl=False):
Expand Down
17 changes: 16 additions & 1 deletion authl/handlers/fediverse.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,22 @@ def _get_identity(client, auth_headers, redir) -> disposition.Disposition:
client.instance, response['url'], id_url)
return disposition.Error("Domains do not match", redir)

return disposition.Verified(id_url, redir, response)
profile = {
'name': response.get('display_name'),
'bio': response.get('source', {}).get('note'),
'avatar': response.get('avatar_static', response.get('avatar'))
}

# Attempt to parse useful stuff out of the fields source
for field in response.get('source', {}).get('fields'):
name = field.get('name', '')
value = field.get('value', '')
if 'homepage' not in profile and urllib.parse.urlparse(value).scheme:
profile['homepage'] = value
elif 'pronoun' in name.lower():
profile['pronouns'] = value

return disposition.Verified(id_url, redir, {k: v for k, v in profile.items() if v})

def initiate_auth(self, id_url, callback_uri, redir):
try:
Expand Down
72 changes: 68 additions & 4 deletions authl/handlers/indieauth.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import urllib.parse

import expiringdict
import mf2py
import requests
from bs4 import BeautifulSoup

Expand All @@ -35,6 +36,9 @@
# and find_endpoint can both benefit from the same endpoint cache
_ENDPOINT_CACHE = expiringdict.ExpiringDict(max_len=128, max_age_seconds=1800)

# And similar for retrieving user profiles
_PROFILE_CACHE = expiringdict.ExpiringDict(max_len=128, max_age_seconds=1800)


def find_endpoint(id_url: str,
links: typing.Dict = None,
Expand Down Expand Up @@ -69,17 +73,77 @@ def _derive_endpoint(links, content):
# We didn't find a new endpoint, and we didn't have a cached one
LOGGER.debug("Retrieving %s", id_url)
request = utils.request_url(id_url)
found = _derive_endpoint(request.links,
BeautifulSoup(request.text, 'html.parser'))
links = request.links
content = BeautifulSoup(request.text, 'html.parser')
found = _derive_endpoint(links, content)

if found and id_url:
# we found a new value so update the cache
LOGGER.debug("Caching %s -> %s", id_url, found)
_ENDPOINT_CACHE[id_url] = found

# Let's also prefill the profile, while we're here
if content:
get_profile(id_url, content)

return found or cached


def _parse_hcard(id_url, card):
properties = card.get('properties', {})

def get_str(prop) -> typing.Optional[str]:
for item in properties.get(prop, []):
if isinstance(item, str):
return item
if isinstance(item, dict) and 'value' in item:
# got an e-property; use the plaintext version
return item['value']
return None

def get_url(prop, scheme=None) -> typing.Tuple[typing.Optional[str],
urllib.parse.ParseResult]:
for item in properties.get(prop, []):
if isinstance(item, str):
url = urllib.parse.urljoin(id_url, item)
parsed = urllib.parse.urlparse(url)
if not scheme or parsed.scheme == scheme:
return url, parsed
return None, urllib.parse.urlparse('')

return {
'avatar': get_url('photo')[0],
'bio': get_str('note'),
'email': urllib.parse.unquote(get_url('email', 'mailto')[1].path),
'homepage': get_url('url')[0],
'name': get_str('name'),
'pronouns': get_str('pronouns'),
}


def get_profile(id_url: str, content: BeautifulSoup = None) -> dict:
""" Given an identity URL, try to parse out an Authl profile """
try:
if content:
h_cards = mf2py.Parser(doc=content).to_dict(filter_by_type="h-card")
elif id_url in _PROFILE_CACHE:
return _PROFILE_CACHE[id_url]
else:
h_cards = mf2py.Parser(url=id_url).to_dict(filter_by_type="h-card")
except Exception as err: # pylint:disable=broad-except
LOGGER.debug("Couldn't retrieve profile at %s: %s", id_url, err)
h_cards = []

profile = {}
for card in h_cards:
items = _parse_hcard(id_url, card)

profile.update({k: v for k, v in items.items() if v and k not in profile})

_PROFILE_CACHE[id_url] = profile
return profile


def verify_id(request_id: str, response_id: str) -> typing.Optional[str]:
"""
Expand All @@ -94,7 +158,7 @@ def verify_id(request_id: str, response_id: str) -> typing.Optional[str]:
:param str response_id: The authorized response identity
:returns: a normalized version of the response ID, or None if the URL could
not be verified.
not be verified.
"""

Expand Down Expand Up @@ -274,7 +338,7 @@ def check_callback(self, url, get, data):
"Identity URL '%s' does not match request '%s'" % (response['me'], id_url),
redir)

return disposition.Verified(response_id, redir, response)
return disposition.Verified(response_id, redir, get_profile(response_id))
except KeyError as key:
return disposition.Error("Missing " + str(key), redir)

Expand Down
40 changes: 33 additions & 7 deletions authl/handlers/twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def __init__(self, client_key: str,
client_secret: str,
timeout: int = None,
storage: dict = None):
# pylint:disable=too-many-arguments
self._client_key = client_key
self._client_secret = client_secret
self._pending = expiringdict.ExpiringDict(
Expand Down Expand Up @@ -150,26 +151,51 @@ def check_callback(self, url, get, data):
resource_owner_key=request.get('oauth_token'),
resource_owner_secret=request.get('oauth_token_secret'))

verify_url = 'https://api.twitter.com/1.1/account/verify_credentials.json?skip_status=1'
user_info = requests.get(
'https://api.twitter.com/1.1/account/verify_credentials.json', auth=auth).json()
verify_url, auth=auth).json()
if 'errors' in user_info:
return disposition.Error(
"Could not retrieve credentials: %r" % user_info.get('errors'),
redir)

user_id = user_info.get('id_str')
username = user_info.get('screen_name')
# We include the user ID after the hash code to prevent folks from
# logging in by taking over a username that someone changed/abandoned.
return disposition.Verified(
f'https://twitter.com/{username}#{user_id}',
# We include the user ID after the hash code to prevent folks from
# logging in by taking over a username that someone changed/abandoned.
f'https://twitter.com/{user_info["screen_name"]}#{user_info["id_str"]}',
redir,
user_info)
self._build_profile(user_info))

@property
def generic_url(self):
return 'https://twitter.com/'

@staticmethod
def _build_profile(user_info: dict) -> dict:
# Get the basic profile
entities = user_info.get('entities', {})

def expand_entities(name):
text = user_info[name]
for url in entities.get(name, {}).get('urls', []):
tco = url.get('url')
real = url.get('expanded_url')
if tco and real:
text = text.replace(tco, real)
return text

mapping = (('avatar', 'profile_image_url_https'),
('bio', 'description'),
('email', 'email'),
('homepage', 'url'),
('location', 'location'),
('name', 'name'),
)
profile = {p_key: expand_entities(t_key)
for p_key, t_key in mapping if t_key in user_info}

return {k: v for k, v in profile.items() if v}


def from_config(config, storage):
""" Generate a Twitter handler from the given config dictionary.
Expand Down
54 changes: 52 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ itsdangerous = "^1.1.0"
requests = "^2.24.0"
requests_oauthlib = "^1.3.0"
validate_email = "^1.3"
mf2py = "^1.1.2"

[tool.poetry.dev-dependencies]
autopep8 = "^1.5.3"
Expand Down

0 comments on commit e4d7afe

Please sign in to comment.