In [1]:
import bs4
import taxon
import gui_widgets
from wikidataintegrator import wdi_core
import bibtexparser
import requests
import pandas as pd
import json
import ipywidgets as widgets
from IPython.display import IFrame, clear_output,  HTML, Image
from ipywidgets import interact, interactive, fixed, interact_manual

In [2]:
def fetch_missing_wikipedia_articles(url):
    photos = json.loads(requests.get(url).text)
    temp_results = []
    for obs in photos["results"]:
        if obs["taxon"]["wikipedia_url"] is None:
            result = dict()
            result["inat_obs_id"] = obs["id"]
            result["inat_taxon_id"] =  obs["taxon"]["id"]
            result["taxon_name"] = obs["taxon"]["name"]
            temp_results.append(result)
    to_verify = []
    for temp in temp_results:
        if temp["taxon_name"] not in to_verify:
            to_verify.append(temp["taxon_name"])
    verified = verify_wikidata(to_verify)
    results = []
    for temp in temp_results:
        if temp["taxon_name"] in verified:
            results.append(temp)    
    return results

def verify_wikidata(taxon_names):
    query = """
         SELECT DISTINCT ?taxon_name (COUNT(?item) AS ?item_count) (COUNT(?article) AS ?article_count)   WHERE {{
                    VALUES ?taxon_name {{{names}}} 
            {{?item wdt:P225 ?taxon_name .}}
           UNION
           {{?item wdt:P225 ?taxon_name .
            ?article schema:about ?item ;
                     schema:isPartOf 	<https://en.wikipedia.org/> .}}
             UNION 
           {{?basionym wdt:P566 ?item ;
                      wdt:P225 ?taxon_name .
           ?article schema:about ?item ;
                    schema:isPartOf 	<https://en.wikipedia.org/> .}}
           UNION
           {{?basionym wdt:P566 ?item .
            ?item wdt:P225 ?taxon_name .
           ?article schema:about ?basionym ;
                    schema:isPartOf 	<https://en.wikipedia.org/> .}}
  }} GROUP BY ?taxon_name  
        """.format(names=" ".join('"{0}"'.format(w) for w in taxon_names))

    url = "https://query.wikidata.org/sparql?format=json&query="+query
    results = json.loads(requests.get(url).text)
    verified = []
    for result in results["results"]["bindings"]:
        if result["article_count"]["value"]=='0':
            verified.append(result["taxon_name"]["value"])
    return verified

def render_results(photos):
    table = dict()
    for result in photos["results"]:
        if result["taxon"]["id"] not in table.keys():
            table[result["taxon"]["id"]] = dict()
        table[result["taxon"]["id"]]["taxon_name"] = result["taxon"]["name"]
        for photo in result["observation_photos"]:
            if "photos" not in table[result["taxon"]["id"]].keys():
                table[result["taxon"]["id"]]["photos"] = []
            table[result["taxon"]["id"]]["photos"].append(photo["photo"]["url"])

    to_verify = []
    for taxon in table.keys():
        to_verify.append(table[taxon]['taxon_name'])
    verified = verify_wikidata(to_verify)

    result_rows = []
    for taxon in table.keys():
        if table[taxon]["taxon_name"] in verified:
            result_row = []
            #result_row.append(interactive(get_data, taxon_id=str(taxon)))
            stub_button = widgets.Button(
                                description='WP stub',
                                disabled=False,
                                button_style='', # 'success', 'info', 'warning', 'danger' or ''
                                tooltip='Click me',
                                icon='check' # (FontAwesome names without the `fa-` prefix)
                            )
            stub_button.taxon_id = str(taxon)
            stub_button.on_click(get_data) 
            result_row.append(stub_button)
            result_row.append(widgets.Label(value="id: {taxon_id}".format(taxon_id=str(taxon))))
            result_row.append(widgets.Label(value="name: {taxon_name}".format(taxon_name=str(table[taxon]["taxon_name"]))))
            
            photos = []
            for photo in table[taxon]["photos"]:
                photos.append(photo)
            result_row.append(widgets.HTML(gallery(photos)))
            result_rows.append(widgets.HBox(result_row))
    return widgets.VBox(result_rows)
    
def fetch_by_user(username, license):
    url = "https://api.inaturalist.org/v1/observations?photo_license="+license+"&quality_grade=research&per_page=200&user_id="+username
    return display(render_results(json.loads(requests.get(url).text)))

def fetch_by_taxon(taxon_id, license):
    url = "https://api.inaturalist.org/v1/observations?photo_license="+license+"&taxon_id="+str(taxon_id)+"&quality_grade=research&per_page=200&subview=grid"
    return display(render_results(json.loads(requests.get(url).text)))

def fetch_by_country(country_code, license):
    # results = fetch_by_place_code(country_code)
    url = "https://api.inaturalist.org/v1/observations?photo_license="+license+"&place_id="+str(country_code)+"&quality_grade=research&per_page=200&subview=grid"
    return display(render_results(json.loads(requests.get(url).text)))

def search_by_taxon(taxon_str, rank, license):
    url = "https://api.inaturalist.org/v1/taxa/autocomplete?q="+taxon_str+"&rank="+rank
    results = json.loads(requests.get(url).text)
    display(fetch_by_taxon(results["results"][0]["id"], license))
    

def search_species_place(place, license):
    url = "https://api.inaturalist.org/v1/places/autocomplete?q="+str(place)
    results = json.loads(requests.get(url).text)
    display(fetch_by_country(results["results"][0]["id"], license))
                              
                              
def _src_from_data(data):
    """Base64 encodes image bytes for inclusion in an HTML img element"""
    img_obj = Image(data=data)
    for bundle in img_obj._repr_mimebundle_():
        for mimetype, b64value in bundle.items():
            if mimetype.startswith('image/'):
                return f'data:{mimetype};base64,{b64value}'

def gallery(images, row_height='auto'):
    """Shows a set of images in a gallery that flexes with the width of the notebook.
    
    Parameters
    ----------
    images: list of str or bytes
        URLs or bytes of images to display

    row_height: str
        CSS height value to assign to all images. Set to 'auto' by default to show images
        with their native dimensions. Set to a value like '250px' to make all rows
        in the gallery equal height.
    """
    figures = []
    for image in images:
        if isinstance(image, bytes):
            src = _src_from_data(image)
            caption = ''
        else:
            src = image
        figures.append(f'''
            <figure style="margin: 5px !important;">
              <img src="{src}" style="height: {row_height}">
            </figure>
        ''')
    return f'''
        <div style="display: flex; flex-flow: row wrap; text-align: center;">
        {''.join(figures)}
        </div>
    '''
    

In [3]:
tab1 = widgets.Output()
tab2 = widgets.Output()
tab3 = widgets.Output()
tab4 = widgets.Output()
tab5 = widgets.Output()
tab6 = widgets.Output()
tab = widgets.Tab(children=[tab1,tab2, tab3, tab4, tab5, tab6])
tab

# iNaturalistTab = IFrame(src='https://www.inaturalist.org/home', width=1000, height=600)
tab.set_title(0, 'iNaturalist') 
tab.set_title(1, 'GBIF')
tab.set_title(2, '(cc0, cc-by, cc-by-sa) iNaturalist images')
tab.set_title(3, 'BHL')
tab.set_title(4, 'Commons')
tab.set_title(5, 'Wikipedia')

with tab1:
    clear_output()
    def paste_commons(commons_file_name):
        with tab6:
                print("https://en.wikipedia.org/wiki/"+data.inaturalist_data[0]["name"].replace(" ", "_"))
                print("=========================")
                print(data.create_wikipedia_stub(infobox_image=commons_file_name)) 
        return commons_file_name
    def get_data(b):
        global data
        data = taxon.external_data(inaturalist_id=b.taxon_id)
        html = "<table><tr><td><img src='"+data.inaturalist_data[0]['default_photo']['medium_url']+"'><br>"+data.inaturalist_data[0]['default_photo']['attribution']+"</td>"
        html += "<td>"
        html += "stub-type: "+data.inaturalist_data[0]["iconic_taxon_name"]
        html += "<br>iNaturalist taxon id: "+ str(data.inaturalist_data[0]["id"])
        html += "<br>name: "+data.inaturalist_data[0]["name"]
        if "preferrd_common_name" in data.inaturalist_data[0].keys():
            html += "<br>common name: "+data.inaturalist_data[0]["preferred_common_name"]
        html += "<br>rank: "+data.inaturalist_data[0]["rank"]
        html += "<br>parent id: "+str(data.inaturalist_parent_data[0]["id"])
        html += "<br>parent name: "+data.inaturalist_parent_data[0]["name"]
        html += "<br>parent rank: "+data.inaturalist_parent_data[0]["rank"]
        html += "</td></tr></table>"
        
        
        output_widget = widgets.HTML(value=html)
        with tab2:
            clear_output()
            html2 = "<table>"
            for key in data.gbif_data.keys():
                html2 += "<tr><td>{}</td><td>{}</td></tr>".format(key, data.gbif_data[key])
            html2 += "</table>"
            gbif_output = widgets.HTML(value=html2)
            display(gbif_output)
        with tab3:
            clear_output()
            url = "https://api.inaturalist.org/v1/observations?photo_license=cc0,cc-by,cc-by-sa&quality_grade=research&taxon_id="+b.taxon_id
            photos = json.loads(requests.get(url).text)
            i = 0

            html = "<h1>images in iNaturalist with a license allowing reuse in Wikipedia (cc0, cc-by, cc-by-sa)<table><tr>"
            for result in photos["results"]:
                for photo in result["observation_photos"]:
                    i += 1
                    html += "<td><img src='"+photo['photo']['url'].replace("square", "medium")+"'></td>"
                    if i % 5 == 0:
                        html += "</tr><tr>"
            html += "</tr></table>"

            display(HTML(html))

        with tab4:
            clear_output()
            bhlurl = "https://www.biodiversitylibrary.org/name/"+data.inaturalist_data[0]["name"].replace(" ", "_")
            print("source: ", bhlurl)

            fields = []
            for entry in data.bhl_references:
                for key in entry.keys():
                    if key not in fields:
                        fields.append(key)
            fields
            df = pd.DataFrame(columns= fields)
            for i in range(len(data.bhl_references)):
                row = dict()
                for key in fields:
                    if key not in data.bhl_references[i].keys():
                        row[key]=None
                    else:
                        row[key]=data.bhl_references[i][key]
                df.loc[i] = row
            display(df)
        with tab5:
            clear_output()
            commons_query = """
                SELECT * WHERE {{?commons schema:about <{taxon}>  ; 
                schema:isPartOf <https://commons.wikimedia.org/> . 
                }}""".format(taxon = data.wikidata["main_rank"].loc[0]["taxon"])
            commons_query_result = wdi_core.WDItemEngine.execute_sparql_query(commons_query, as_dataframe=True)
            if len(commons_query_result) == 0:
                html5 = "<a href = 'https://commons.wikimedia.org/w/index.php?title=Category:"+data.inaturalist_data[0]["name"].replace(" ", "_")+"&action=edit'>create commons category</a><br>"
                html5 += "[[Category:"+data.inaturalist_data[0]["name"].replace(" ", "|")+"]]"
            else:
                html5 = "<a href = 'https://commons.wikimedia.org/wiki/Category:"+data.inaturalist_data[0]["name"].replace(" ", "_")+"' target='_new'>"+data.inaturalist_data[0]["name"].replace(" ", "_")+"</a><br>"
            commons_output = widgets.HTML(value=html5)
            
            data.selected_commons=gui_widgets.interact_manual(paste_commons, commons_file_name="")
            display(commons_output)    
        return output_widget    
    
    tab1tab1 = widgets.Output()
    tab1tab2 = widgets.Output()
    tab1tab3 = widgets.Output()
    tab1tab = widgets.Tab(children=[tab1tab1,tab1tab2,tab1tab3])
    tab1tab.set_title(0, 'search by taxon') 
    tab1tab.set_title(1, 'search by user')
    tab1tab.set_title(2, 'search by country')
    
    with tab1tab1:
        interact_manual(search_by_taxon, taxon_str='', rank=["genus", "family", "order"], license=["cc0,cc-by,cc-by-sa", "cc0", "cc-by", "cc-by-sa"])
    with tab1tab2:
        interact_manual(fetch_by_user, username='', license=["cc0,cc-by,cc-by-sa", "cc0", "cc-by", "cc-by-sa"])
    with tab1tab3:
        interact_manual(search_species_place, place='', license=["cc0,cc-by,cc-by-sa", "cc0", "cc-by", "cc-by-sa"])
    
    display(tab1tab)
    data = None
    #taxon_window = gui_widgets.interact_manual(get_data, taxon_id="")
    
    

display(tab)

Tab(children=(Output(), Output(), Output(), Output(), Output(), Output()), _titles={'0': 'iNaturalist', '1': '…