Skip to content

Commit

Permalink
load logos in parallel for discojson
Browse files Browse the repository at this point in the history
  • Loading branch information
leifj committed Mar 11, 2019
1 parent 6342439 commit b3a8e65
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 30 deletions.
7 changes: 2 additions & 5 deletions src/pyff/builtins.py
Expand Up @@ -23,14 +23,12 @@
from .utils import total_seconds, dumptree, safe_write, root, with_tree, duration2timedelta, xslt_transform, \
validate_document, hash_id
from .samlmd import sort_entities, iter_entities, annotate_entity, set_entity_attributes, \
discojson, set_pubinfo, set_reginfo, find_in_document, entitiesdescriptor, set_nodecountry
discojson_t, set_pubinfo, set_reginfo, find_in_document, entitiesdescriptor, set_nodecountry
from .fetch import Resource
from six.moves.urllib_parse import urlparse
from .exceptions import MetadataException
from .store import make_store_instance
import six
import ipaddr
import multiprocessing

__author__ = 'leifj'

Expand Down Expand Up @@ -723,8 +721,7 @@ def _discojson(req, *opts):
if req.t is None:
raise PipeException("Your pipeline is missing a select statement.")

pool = multiprocessing.Pool(config.worker_pool_size)
res = pool.map(discojson, iter_entities(req.t))
res = discojson_t(req.t)
res.sort(key=operator.itemgetter('title'))

return json.dumps(res)
Expand Down
79 changes: 54 additions & 25 deletions src/pyff/samlmd.py
Expand Up @@ -16,6 +16,7 @@
from .fetch import ResourceManager
from .parse import add_parser
from xmlsec.crypto import CertDict
from concurrent import futures

log = get_log(__name__)

Expand Down Expand Up @@ -649,7 +650,47 @@ def entity_scopes(e):
return [s.text for s in elt]


def discojson(e, langs=None, fallback_to_favicon=False):
def discojson_load_icon(d, fallback_to_favicon=False):
try:
if 'entity_icon_url' in d:
icon_info = d['entity_icon_url']
else:
icon_info = {}
urls = []
if icon_info and 'url' in icon_info:
url = icon_info['url']
urls.append(url)

if 'scope' in d and fallback_to_favicon:
scopes = d['scope'].split(',')
for scope in scopes:
urls.append("https://{}/favico.ico".format(scope))
urls.append("https://www.{}/favico.ico".format(scope))
urls.append("http://{}/favico.ico".format(scope))
urls.append("http://www.{}/favico.ico".format(scope))

d['entity_icon'] = None
for url in urls:
if url.startswith("data:"):
d['entity_icon'] = url
break

if '://' in url:
try:
r = url_get(url)
except IOError:
continue
if r.ok and r.content:
d['entity_icon'] = img_to_data(r.content, r.headers.get('Content-Type'))
break
except Exception as ex:
log.debug(traceback.format_exc())
log.error(ex)

return d


def discojson(e, langs=None, fallback_to_favicon=False, load_icon=True):
if e is None:
return dict()

Expand All @@ -671,36 +712,18 @@ def discojson(e, langs=None, fallback_to_favicon=False):
d['type'] = 'sp'

scopes = entity_scopes(e)
icon_info = entity_icon_url(e)
urls = []
if icon_info is not None and 'url' in icon_info:
url = icon_info['url']
urls.append(url)
if scopes is not None and len(scopes) == 1 and fallback_to_favicon:
urls.append("https://{}/favico.ico".format(scopes[0]))
urls.append("https://www.{}/favico.ico".format(scopes[0]))

d['entity_icon'] = None
for url in urls:
if url.startswith("data:"):
d['entity_icon'] = url
break

if '://' in url:
try:
r = url_get(url)
except IOError:
continue
if r.ok and r.content:
d['entity_icon'] = img_to_data(r.content, r.headers.get('Content-Type'))
break

if scopes is not None and len(scopes) > 0:
d['scope'] = ",".join(scopes)
if len(scopes) == 1:
d['domain'] = scopes[0]
d['name_tag'] = (scopes[0].split('.'))[0].upper()

icon_info = entity_icon_url(e)
d['entity_icon_url'] = entity_icon_url(e)

if load_icon:
discojson_load_icon(d, fallback_to_favicon=fallback_to_favicon)

keywords = filter_lang(e.iter("{%s}Keywords" % NS['mdui']), langs=langs)
if keywords is not None:
lst = [elt.text for elt in keywords]
Expand All @@ -716,6 +739,12 @@ def discojson(e, langs=None, fallback_to_favicon=False):
return d


def discojson_t(t):
lst = [discojson(en, load_icon=False) for en in iter_entities(t)]
with futures.ThreadPoolExecutor(max_workers=config.worker_pool_size) as executor:
return list(executor.map(discojson_load_icon, lst))


def sha1_id(e):
return hash_id(e, 'sha1')

Expand Down

0 comments on commit b3a8e65

Please sign in to comment.