In [3]:

# class ScholInfra:
#     """
#     methods for accessing a specific Scholarly Infrastructure API
#     """

#     def __init__ (self, parent=None, name="Generic", api_url=None, cgi_url=None):
#         self.parent = parent
#         self.name = name
#         self.api_url = api_url
#         self.cgi_url = cgi_url

#         self.api_obj = None
#         self.elapsed_time = 0.0


In [2]:
class ScholInfra_search:
    """
    methods for accessing a specific Scholarly Infrastructure API
    """

    def __init__ (self, api,limit):
        self.api = api
        self.exact_match = False
        self.limit = limit
    
    
    def get_xml_node_value (self, root, name):
        """
        return the named value from an XML node, if it exists
        """
        node = root.find(name)

        if not node:
            return None
        elif len(node.text) < 1:
            return None
        else:
            return node.text.strip()
    
    
class ScholInfra_parse (ScholInfra_search):
    
    def parse_oa (self, result):
        """
        parse metadata from XML returned from the openaire full text search API query
        """
        if result.find("instancetype")["classname"] in ["Other literature type", "Article"]:
            meta = OrderedDict()
            result_title = self.get_xml_node_value(result, "title")
            meta["title"] = result_title
            if self.get_xml_node_value(result, "journal"):
                meta["journal"] = self.get_xml_node_value(result, "journal")
            meta["url"] = self.get_xml_node_value(result, "url")
            meta["authors"] = [a.text for a in result.find_all("creator")]
            meta["open"] = len(result.find_all("bestaccessright",  {"classid": "OPEN"})) > 0
            return meta
        else:
            return None
        
        
    def parse_dimensions(self, result):
        if result["type"] in ["article","preprint"]:
            meta = OrderedDict()
            meta["title"] = result["title"]
            try:
                meta["journal"] = result["journal"]["title"]
            except:
                pass
            try:
                meta["doi"] = result["doi"]
            except:
                pass
            try:
                author_list = result["authors"]
                meta["authors"] = [b["last_name"] + ", " + b["first_name"] for b in author_list]
            except:
                pass
            return meta
        else:
            return None
        
        
    def parse_pubmed(self, result):
        article_meta = result["MedlineCitation"]["Article"]
        meta = OrderedDict()
        meta["title"] = article_meta["ArticleTitle"]
        meta["journal"] = article_meta["Journal"]["Title"]
        try:
            if isinstance(article_meta["AuthorList"]["Author"],list):
                meta["authors"] = [a["LastName"]+ ", " + a["ForeName"] for a in article_meta["AuthorList"]["Author"]]
            if isinstance(article_meta["AuthorList"]["Author"],dict):
                        meta["authors"] = article_meta["AuthorList"]["Author"]["LastName"]+ "," + article_meta["AuthorList"]["Author"]["ForeName"]
        except:
            meta["authors"] = ''
        try:
            pid_list = article_meta["ELocationID"]    
            if isinstance(pid_list,list):
                    doi_test = [d["#text"] for d in pid_list if d["@EIdType"] == "doi"]
                    if len(doi_test) > 0:
                        meta["doi"] = doi_test[0]
            if isinstance(pid_list,dict):
                if pid_list["@EIdType"] == "doi":
                    meta["doi"] = pid_list["#text"]
        except:
            pass
        return meta
        
        
    def parse_result(self,result):
        if self.api == "dimensions":
            meta = parse_oa(self,result)
        if self.api == "openaire":
            meta = parse_oa(self,result)
        if self.api == 'pubmed':
            meta = parse_pubmed(self, result)
            
        if meta:
            return meta
        else:
            return None


In [None]:
 for api in [schol.openaire, schol.europepmc, schol.dimensions]:
        try:
            meta = api.title_search(title)
        except Exception:
            # debug this as an edge case
            traceback.print_exc()
            print(title)
            print(api.name)
            continue

        if meta and len(meta) > 0:
            title_match = True
            meta = dict(meta)
            pub[api.name] = meta

    # send this publication along into the workflow stream
    return title_match