In [30]:
import pandas as pd
import gzip
import json
import matplotlib.pyplot as plt
import os
import seaborn as sns
import langid

In [31]:
# Read all tables and store them in dfs list
dfs = {}
tables_path = "Dataset/val/tables/"
files = os.listdir(tables_path)
for file in files:
    df = pd.read_csv(tables_path + file)
    dfs[file] = df

In [32]:
new_column_names = ['Table Name', 'Column Index', 'Row Index', 'Retrieved Annotation']
df_output = pd.read_csv("Dataset/output/cea annotation/cea_biodiv_first_experiment.csv", header=None, names=new_column_names)
df_output.head()

Unnamed: 0,Table Name,Column Index,Row Index,Retrieved Annotation
0,008851b16aa04124b3a9195676604f35,0,0,https://www.wikidata.org/wiki/Q7432
1,008851b16aa04124b3a9195676604f35,0,1,https://www.wikidata.org/wiki/Q2093287
2,008851b16aa04124b3a9195676604f35,0,2,https://www.wikidata.org/wiki/Q8261255
3,008851b16aa04124b3a9195676604f35,0,3,https://www.wikidata.org/wiki/Q2942992
4,008851b16aa04124b3a9195676604f35,0,4,https://www.wikidata.org/wiki/Q1105092


In [33]:
# Retrieve all rows with NaN values in a specific column
nan_rows = df_output[df_output["Retrieved Annotation"].isnull()]

# Print the resulting dataframe
print(nan_rows)

                            Table Name  Column Index  Row Index  \
58    008851b16aa04124b3a9195676604f35             2         89   
59    008851b16aa04124b3a9195676604f35             2         91   
75    008851b16aa04124b3a9195676604f35            10          0   
76    008851b16aa04124b3a9195676604f35            11          0   
86    008851b16aa04124b3a9195676604f35            25          1   
...                                ...           ...        ...   
2004  e749786aff714981a5a7da3da0789128             1         95   
2005  e749786aff714981a5a7da3da0789128             1         96   
2006  e749786aff714981a5a7da3da0789128             1         97   
2007  e749786aff714981a5a7da3da0789128             1         98   
2008  e749786aff714981a5a7da3da0789128             1         99   

     Retrieved Annotation  
58                    NaN  
59                    NaN  
75                    NaN  
76                    NaN  
86                    NaN  
...                   ...  

In [34]:
# Mapping with the value
new_column_names = ['Table Name', 'Column Index', 'Row Index', 'Target Annotation']
cea_gt = pd.read_csv(f"Dataset/val/gt/CEA_biodivtab_selected_tables_gt.csv", header=None, names=new_column_names)
values = {}
for index, row in nan_rows.iterrows():
    # Load the dataframe
    df = pd.read_csv(f"Dataset/val/tables/{row[0]}.csv", header=None)
    annotation = cea_gt[(cea_gt.iloc[:,0] == row[0]) & (cea_gt.iloc[:,1] == row[1]) & (cea_gt.iloc[:,2] == row[2])].iloc[:,3].values[0]
    cell_value = df.iloc[row[2], row[1]]
    values[cell_value] = annotation

In [35]:
# join prediction and target dataframes
merged_df = pd.merge(cea_gt, df_output, on=['Table Name', 'Column Index', 'Row Index'])

In [36]:
# Add Cell Content
def retrieve_cell_value(row):
    df = pd.read_csv(f"Dataset/val/tables/{row['Table Name']}.csv", header=None)
    cell_value = df.iloc[row["Row Index"], row["Column Index"]]
    return cell_value
    
merged_df["Cell Value"] = merged_df.apply(lambda row: retrieve_cell_value(row), axis=1)

In [37]:
# Correct retrieval
nan_values = merged_df[(merged_df["Target Annotation"] != merged_df["Retrieved Annotation"]) & 
                       (merged_df["Retrieved Annotation"].isna())]

In [38]:
# Count number of nan_values strings with abbreviations
def detect_abbreviation_in_string(row):
    if row["Cell Value"].count(".") == 1 and row["Cell Value"].count("*") == 0 and row["Cell Value"][0].isupper():
        return True
    return False

# Apply the lambda function to identify True values
mask = nan_values[["Cell Value", "Target Annotation"]].apply(lambda row: detect_abbreviation_in_string(row), axis=1)

# Use the mask to filter and store the values that returned True
abbreviation_values = nan_values.loc[mask, :]
abbreviation_values

Unnamed: 0,Table Name,Column Index,Row Index,Target Annotation,Retrieved Annotation,Cell Value
997,34169c088ee848e4866f42e87b4ccbc2,5,1,https://www.wikidata.org/wiki/Q15340551,,D.glaucifolia
998,34169c088ee848e4866f42e87b4ccbc2,5,5,https://www.wikidata.org/wiki/Q12838227,,C.glauca
999,34169c088ee848e4866f42e87b4ccbc2,5,11,https://www.wikidata.org/wiki/Q15490427,,L.glaber
1000,34169c088ee848e4866f42e87b4ccbc2,5,20,https://www.wikidata.org/wiki/Q847209,,Q.serrata
1001,34169c088ee848e4866f42e87b4ccbc2,5,27,https://www.wikidata.org/wiki/Q15247814,,C.sclerophylla
...,...,...,...,...,...,...
1431,8249f8533f764f6dbd195a872c18fd6d,0,86,https://www.wikidata.org/wiki/Q610177,,C.cognatus
1432,8249f8533f764f6dbd195a872c18fd6d,0,87,https://www.wikidata.org/wiki/Q3766704,,C.spiloptera
1433,8249f8533f764f6dbd195a872c18fd6d,0,88,https://www.wikidata.org/wiki/Q606436,,E.olmstedi
1434,8249f8533f764f6dbd195a872c18fd6d,0,90,https://www.wikidata.org/wiki/Q6406968,,H.roanokense


In [39]:
abbreviations = abbreviation_values["Cell Value"].values.tolist()
abbreviations

['D.glaucifolia',
 'C.glauca',
 'L.glaber',
 'Q.serrata',
 'C.sclerophylla',
 'L.formosana',
 'R.chinensis',
 'C.fargesii',
 'Ca. eyrei',
 'Ch. axillaris',
 'Ac. davidii',
 'Li. formosana',
 'Ci. camphora',
 'Tr. cochinchinensis',
 'Sc. superba',
 'Ca. henryi',
 'Cy. glauca',
 'Di. japonica',
 'Da. oldhamii',
 'Di. myricoides',
 'Pi. massoniana',
 'Ca. fargesii',
 'Ma. thunbergii',
 'It. chinensis',
 'Ko. bipinnata',
 'Cy. myrsinifolia',
 'Ca. carlesii',
 'Qu. serrata',
 'Rh. indica',
 'Sa. saponaria',
 'Ny. sinensis',
 'Qu. fabri',
 'Qu. acutissima',
 'Li. glaber',
 'Rh. chinensis',
 'El. japonicus',
 'Al. fortunei',
 'El. chinensis',
 'Ma. fordiana',
 'Tr. sebifera',
 'Me. flexuosa',
 'Ma. grijsii',
 'Me. azedarach',
 'Qu. phillyreoides',
 'Cu. lanceolata',
 'Ca. sclerophylla',
 'A.rupestris',
 'C.anomalum',
 'C.commersonii',
 'C.erythrogaster',
 'C.funduloides',
 'E.caeruleum',
 'N.buccatus',
 'P.notatus',
 'R.obtusus',
 'S.atromaculatus',
 'E.blennioides',
 'E.flabellare',
 'C.cumb

In [53]:
import requests
import xml.etree.ElementTree as ET


def make_get_request(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an HTTPError for bad responses

        # Parse the XML content
        xml_content = response.content
        root = ET.fromstring(xml_content)

        # Now 'root' contains the XML content, and you can navigate and extract information as needed.

        return root

    except requests.exceptions.RequestException as e:
        print(f"Error during GET request: {e}")
        return None


def retrieve_full_name(abbreviation):
    print(abbreviation)
    abb = abbreviation.split(".")[0]
    word = abbreviation.split(".")[-1].strip()
    
    # Example URL
    url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=taxonomy&term={word}[Name%20Tokens]&retmax=1000"  

    # Make the GET request
    result_xml = make_get_request(url)

    # Example: Extract information from the XML response
    if result_xml is not None:
        # Assume the XML structure, adapt based on your actual XML content
        ids = []
        for child in result_xml:
            if child.tag == "IdList":
                for ids_list_child in child:
                    ids.append(ids_list_child.text)                
                    
    possible_full_strings = []
    for id in ids:
        url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=taxonomy&id={id}&version=2.0" 
        while True:
            try:
                result_xml = make_get_request(url)
                string = result_xml.find(".//ScientificName").text
                if (string.split(" ")[0][0] == abb[0] and len(string.split(" ")) == 2):
                    possible_full_strings.append(string) 
                break
            except:
                continue
    return possible_full_strings


In [54]:
full_strings = {}
for abbreviation in abbreviations:
    possible_full_strings = retrieve_full_name(abbreviation)
    full_strings[abbreviation] = possible_full_strings

D.glaucifolia
C.glauca
Error during GET request: 502 Server Error: Bad Gateway for url: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=taxonomy&id=2789716&version=2.0
L.glaber
Q.serrata
C.sclerophylla
L.formosana
Error during GET request: 502 Server Error: Bad Gateway for url: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=taxonomy&id=1559352&version=2.0
Error during GET request: 502 Server Error: Bad Gateway for url: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=taxonomy&id=1008966&version=2.0
Error during GET request: 502 Server Error: Bad Gateway for url: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=taxonomy&id=1008966&version=2.0
Error during GET request: 502 Server Error: Bad Gateway for url: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=taxonomy&id=1008966&version=2.0
Error during GET request: 502 Server Error: Bad Gateway for url: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=tax

From cffi callback <function _verify_callback at 0x000001DAAEB136A8>:
Traceback (most recent call last):
  File "C:\Users\lenovo\Anaconda3\lib\site-packages\OpenSSL\SSL.py", line 311, in wrapper
    @wraps(callback)
KeyboardInterrupt


Error during GET request: HTTPSConnectionPool(host='eutils.ncbi.nlm.nih.gov', port=443): Max retries exceeded with url: /entrez/eutils/esummary.fcgi?db=taxonomy&id=584955&version=2.0 (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')],)",),))
Ma. grijsii
Me. azedarach


From cffi callback <function _verify_callback at 0x000001DAAEB13400>:
Traceback (most recent call last):
  File "C:\Users\lenovo\Anaconda3\lib\site-packages\OpenSSL\SSL.py", line 311, in wrapper
    @wraps(callback)
KeyboardInterrupt


Error during GET request: HTTPSConnectionPool(host='eutils.ncbi.nlm.nih.gov', port=443): Max retries exceeded with url: /entrez/eutils/esummary.fcgi?db=taxonomy&id=1848402&version=2.0 (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')],)",),))
Qu. phillyreoides
Cu. lanceolata


From cffi callback <function _verify_callback at 0x000001DAAEB13D08>:
Traceback (most recent call last):
  File "C:\Users\lenovo\Anaconda3\lib\site-packages\OpenSSL\SSL.py", line 311, in wrapper
    @wraps(callback)
KeyboardInterrupt


Error during GET request: HTTPSConnectionPool(host='eutils.ncbi.nlm.nih.gov', port=443): Max retries exceeded with url: /entrez/eutils/esummary.fcgi?db=taxonomy&id=3089904&version=2.0 (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')],)",),))


From cffi callback <function _verify_callback at 0x000001DAAEB1EA60>:
Traceback (most recent call last):
  File "C:\Users\lenovo\Anaconda3\lib\site-packages\OpenSSL\SSL.py", line 311, in wrapper
    @wraps(callback)
KeyboardInterrupt


Error during GET request: HTTPSConnectionPool(host='eutils.ncbi.nlm.nih.gov', port=443): Max retries exceeded with url: /entrez/eutils/esummary.fcgi?db=taxonomy&id=2488027&version=2.0 (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')],)",),))


From cffi callback <function _verify_callback at 0x000001DAAEB13158>:
Traceback (most recent call last):
  File "C:\Users\lenovo\Anaconda3\lib\site-packages\OpenSSL\SSL.py", line 311, in wrapper
    @wraps(callback)
KeyboardInterrupt


Error during GET request: HTTPSConnectionPool(host='eutils.ncbi.nlm.nih.gov', port=443): Max retries exceeded with url: /entrez/eutils/esummary.fcgi?db=taxonomy&id=659965&version=2.0 (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')],)",),))
Ca. sclerophylla
A.rupestris
C.anomalum
C.commersonii
C.erythrogaster
C.funduloides
E.caeruleum
N.buccatus
P.notatus
R.obtusus
S.atromaculatus
E.blennioides
E.flabellare
C.cumberlandensis
E.kennicotti
E.nigrum
L.auritus
L.macrochirus
R.atratulus
M.punctulatus
M.erythrurum
N.leuciodus
N.photogenis
N.miurus
L.aepyptera
L.megalotis
L.chrysocephalus
N.rubellus
P.caprodes
S.vitreus
A.natalis
N.micropogon
E.zonale
I.bubalus
L.cyanellus
C.bairdii
C.carpio
E.niger
R.cataractae
L.cornutus
S.trutta
S.corporalis
C.cognatus
C.spiloptera
E.olmstedi
H.roanokense
S.fontinalis


In [55]:
full_strings

{'D.glaucifolia': ['Diospyros glaucifolia'],
 'C.glauca': ['Causonis glauca',
  'Cyanosporus glauca',
  'Caloncoba glauca',
  'Clematis glauca',
  'Cheilanthes glauca',
  'Caeruleuptychia glauca',
  'Cycas glauca',
  'Chlorociboria glauca',
  'Chusquea glauca',
  'Cassine glauca',
  'Crotalaria glauca',
  'Choleva glauca',
  'Callitris glauca',
  'Carinavalva glauca',
  'Cliffortia glauca',
  'Cyathea glauca',
  'Cardamine glauca',
  'Cleistopholis glauca',
  'Calceolaria glauca',
  'Chrysiptera glauca',
  'Cheniella glauca',
  'Cladonia glauca',
  'Canna glauca',
  'Chrysochlamys glauca',
  'Cyrtostachys glauca',
  'Cissus repens',
  'Craspedia glauca',
  'Citrus glauca',
  'Cyathodes glauca',
  'Centropodia glauca',
  'Cenchrus americanus',
  'Casuarina glauca',
  'Capnoides sempervirens'],
 'L.glaber': ['Leiaster glaber', 'Lotus tenuis', 'Lithocarpus glaber'],
 'Q.serrata': ['Quercus serrata'],
 'C.sclerophylla': ['Calamagrostis sclerophylla',
  'Campanula sclerophylla',
  'Cousinia

In [56]:
# Save the dictionary to a Pickle file
import pickle
with open('abbreviation_mapping.pkl', 'wb') as pickle_file:
    pickle.dump(full_strings, pickle_file)

In [40]:
# Load the dictionary from the Pickle file
import pickle

with open('abbreviation_mapping.pkl', 'rb') as pickle_file:
    loaded_data = pickle.load(pickle_file)
loaded_data

{'D.glaucifolia': ['Diospyros glaucifolia'],
 'C.glauca': ['Causonis glauca',
  'Cyanosporus glauca',
  'Caloncoba glauca',
  'Clematis glauca',
  'Cheilanthes glauca',
  'Caeruleuptychia glauca',
  'Cycas glauca',
  'Chlorociboria glauca',
  'Chusquea glauca',
  'Cassine glauca',
  'Crotalaria glauca',
  'Choleva glauca',
  'Callitris glauca',
  'Carinavalva glauca',
  'Cliffortia glauca',
  'Cyathea glauca',
  'Cardamine glauca',
  'Cleistopholis glauca',
  'Calceolaria glauca',
  'Chrysiptera glauca',
  'Cheniella glauca',
  'Cladonia glauca',
  'Canna glauca',
  'Chrysochlamys glauca',
  'Cyrtostachys glauca',
  'Cissus repens',
  'Craspedia glauca',
  'Citrus glauca',
  'Cyathodes glauca',
  'Centropodia glauca',
  'Cenchrus americanus',
  'Casuarina glauca',
  'Capnoides sempervirens'],
 'L.glaber': ['Leiaster glaber', 'Lotus tenuis', 'Lithocarpus glaber'],
 'Q.serrata': ['Quercus serrata'],
 'C.sclerophylla': ['Calamagrostis sclerophylla',
  'Campanula sclerophylla',
  'Cousinia

In [41]:
from SPARQLWrapper import SPARQLWrapper, JSON
import time

def annotate_cell_with_wikidata(full_name, abbreviation):
    abbreviation = abbreviation.split(".")[0][0] + ". " + abbreviation.split(".")[1].strip()
    
    sparql_endpoint_url = "https://query.wikidata.org/sparql"

    # Step 2: Construct the SPARQL query
    query = """
        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "%s" @en. 
          FILTER (CONTAINS(?propertyValue, "%s"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    """ % (full_name, abbreviation)
    
    print(query)


    claims = None

    while claims == None:
        try:
            sparql = SPARQLWrapper(sparql_endpoint_url, agent='example-UA (https://example.com/; mail@example.com)')
            sparql.setQuery(query)
            sparql.setReturnFormat(JSON)
            claims = sparql.query().convert()
            if 'results' not in claims:
                claims = None
        except:
            time.sleep(10)
            continue
    if (len(claims["results"]["bindings"]) > 0):
        return claims["results"]["bindings"][0]["entity"]["value"].replace("http", "https").replace("entity", "wiki")
    return None

In [42]:
retrieved_annotations = {}
for abbreviation in loaded_data.keys():
    if (abbreviation not in retrieved_annotations.keys()):
        retrieved_annotations[abbreviation] = []
    for full_name in loaded_data[abbreviation]:        
        retrieved_annotation = annotate_cell_with_wikidata(full_name, abbreviation)
        print(retrieved_annotation)
        if (retrieved_annotation != None):
            retrieved_annotations[abbreviation].append(retrieved_annotation)
print(retrieved_annotations)
        


        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Diospyros glaucifolia" @en. 
          FILTER (CONTAINS(?propertyValue, "D. glaucifolia"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q15245560

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Causonis glauca" @en. 
          FILTER (CONTAINS(?propertyValue, "C. glauca"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Cyanosporus glauc

https://www.wikidata.org/wiki/Q1043658

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Cheniella glauca" @en. 
          FILTER (CONTAINS(?propertyValue, "C. glauca"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q42405540

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Cladonia glauca" @en. 
          FILTER (CONTAINS(?propertyValue, "C. glauca"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q3678887

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 

https://www.wikidata.org/wiki/Q15247814

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Cryptocarya sclerophylla" @en. 
          FILTER (CONTAINS(?propertyValue, "C. sclerophylla"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q15607849

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Lichtwardtia formosana" @en. 
          FILTER (CONTAINS(?propertyValue, "L. formosana"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q110987123

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
 

None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Reticulitermes chinensis" @en. 
          FILTER (CONTAINS(?propertyValue, "R. chinensis"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Rosa chinensis" @en. 
          FILTER (CONTAINS(?propertyValue, "R. chinensis"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q344900

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Corydalis f

https://www.wikidata.org/wiki/Q15595855

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Crotalaria axillaris" @en. 
          FILTER (CONTAINS(?propertyValue, "C. axillaris"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q15445350

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Coptosperma supra-axillare" @en. 
          FILTER (CONTAINS(?propertyValue, "C. axillaris"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyVal

https://www.wikidata.org/wiki/Q24850843

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Lychnuris formosana" @en. 
          FILTER (CONTAINS(?propertyValue, "L. formosana"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Liparis formosana" @en. 
          FILTER (CONTAINS(?propertyValue, "L. formosana"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q10956185

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
      

https://www.wikidata.org/wiki/Q10664695

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Schima superba" @en. 
          FILTER (CONTAINS(?propertyValue, "S. superba"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q11093140

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Clerodendrum henryi" @en. 
          FILTER (CONTAINS(?propertyValue, "C. henryi"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q10907209

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P

https://www.wikidata.org/wiki/Q15610870

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Caeruleuptychia glauca" @en. 
          FILTER (CONTAINS(?propertyValue, "C. glauca"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Cycas glauca" @en. 
          FILTER (CONTAINS(?propertyValue, "C. glauca"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q10262621

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
              

None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Craspedia glauca" @en. 
          FILTER (CONTAINS(?propertyValue, "C. glauca"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q15563327

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Citrus glauca" @en. 
          FILTER (CONTAINS(?propertyValue, "C. glauca"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q50839729

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  r

None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Diophrys japonica" @en. 
          FILTER (CONTAINS(?propertyValue, "D. japonica"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q121304941

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Desmarestia japonica" @en. 
          FILTER (CONTAINS(?propertyValue, "D. japonica"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q29533011

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
      

https://www.wikidata.org/wiki/Q11054923

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Clethra fargesii" @en. 
          FILTER (CONTAINS(?propertyValue, "C. fargesii"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q10929711

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Cypripedium fargesii" @en. 
          FILTER (CONTAINS(?propertyValue, "C. fargesii"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q6768588

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  

https://www.wikidata.org/wiki/Q2902122

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Castanopsis carlesii" @en. 
          FILTER (CONTAINS(?propertyValue, "C. carlesii"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q15245858

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Quercus serrata" @en. 
          FILTER (CONTAINS(?propertyValue, "Q. serrata"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q847209

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:

None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Saponaria pumilio" @en. 
          FILTER (CONTAINS(?propertyValue, "S. saponaria"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Saponaria zapaterii" @en. 
          FILTER (CONTAINS(?propertyValue, "S. saponaria"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Saponaria subrosularis" @en. 
          FILTER

None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Saponaria atocioides" @en. 
          FILTER (CONTAINS(?propertyValue, "S. saponaria"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Saponaria lutea" @en. 
          FILTER (CONTAINS(?propertyValue, "S. saponaria"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Saponaria glutinosa" @en. 
          FILTER (CO

https://www.wikidata.org/wiki/Q301071

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Neolycaena sinensis" @en. 
          FILTER (CONTAINS(?propertyValue, "N. sinensis"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Nyssa sinensis" @en. 
          FILTER (CONTAINS(?propertyValue, "N. sinensis"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q7071277

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
              

https://www.wikidata.org/wiki/Q163116

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Rhabdothamnopsis sinensis" @en. 
          FILTER (CONTAINS(?propertyValue, "R. chinensis"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Reticulitermes chinensis" @en. 
          FILTER (CONTAINS(?propertyValue, "R. chinensis"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "

None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Epiactis japonica" @en. 
          FILTER (CONTAINS(?propertyValue, "E. japonicus"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Elenchus japonicus" @en. 
          FILTER (CONTAINS(?propertyValue, "E. japonicus"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q110532804

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Engraulis j

https://www.wikidata.org/wiki/Q2453984

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Eremurus chinensis" @en. 
          FILTER (CONTAINS(?propertyValue, "E. chinensis"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q15224985

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Eothenomys chinensis" @en. 
          FILTER (CONTAINS(?propertyValue, "E. chinensis"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
     

https://www.wikidata.org/wiki/Q1875658

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Mauritia flexuosa" @en. 
          FILTER (CONTAINS(?propertyValue, "M. flexuosa"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q1016320

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Machilus grijsii" @en. 
          FILTER (CONTAINS(?propertyValue, "M. grijsii"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q11178164

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P

None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Cakile lanceolata" @en. 
          FILTER (CONTAINS(?propertyValue, "C. lanceolata"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q15537974

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Cliffortia lanceolata" @en. 
          FILTER (CONTAINS(?propertyValue, "C. lanceolata"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q17246207

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
  

None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Cuphea lanceolata" @en. 
          FILTER (CONTAINS(?propertyValue, "C. lanceolata"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q5194464

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Calamagrostis sclerophylla" @en. 
          FILTER (CONTAINS(?propertyValue, "C. sclerophylla"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q15484564

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyVal

https://www.wikidata.org/wiki/Q50987331

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Anthyllis rupestris" @en. 
          FILTER (CONTAINS(?propertyValue, "A. rupestris"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q15524619

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Albuca rupestris" @en. 
          FILTER (CONTAINS(?propertyValue, "A. rupestris"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q15517696

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                

https://www.wikidata.org/wiki/Q55839556

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Cephalorhynchus commersonii" @en. 
          FILTER (CONTAINS(?propertyValue, "C. commersonii"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Cylindrocline commersonii" @en. 
          FILTER (CONTAINS(?propertyValue, "C. commersonii"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q5199311

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?pr

None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Pissodes notatus" @en. 
          FILTER (CONTAINS(?propertyValue, "P. notatus"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q4425163

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Pimephales notatus" @en. 
          FILTER (CONTAINS(?propertyValue, "P. notatus"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Porichthys notatus

None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Lethrinops auritus" @en. 
          FILTER (CONTAINS(?propertyValue, "L. auritus"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Limnochromis auritus" @en. 
          FILTER (CONTAINS(?propertyValue, "L. auritus"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Lepomis macrochirus" @en. 
          FILTER (CON

None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Percina caprodes" @en. 
          FILTER (CONTAINS(?propertyValue, "P. caprodes"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q947772

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Syrphoctonus vitreus" @en. 
          FILTER (CONTAINS(?propertyValue, "S. vitreus"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q14515879

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
           

https://www.wikidata.org/wiki/Q2000540

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Epyris niger" @en. 
          FILTER (CONTAINS(?propertyValue, "E. niger"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Enoplochiton niger" @en. 
          FILTER (CONTAINS(?propertyValue, "E. niger"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
https://www.wikidata.org/wiki/Q3801503

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs

None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Salmo labrax" @en. 
          FILTER (CONTAINS(?propertyValue, "S. trutta"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Salmo caspius" @en. 
          FILTER (CONTAINS(?propertyValue, "S. trutta"))
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }}
    
None

        SELECT ?entity ?entityLabel ?propertyValue 
        WHERE {{
          ?entity wdt:P31 wd:Q16521;  
                  wdt:P1813 ?propertyValue;
                  rdfs:label "Salmo letnica" @en. 
          FILTER (CONTAINS(?propertyValue,

None
{'D.glaucifolia': ['https://www.wikidata.org/wiki/Q15245560'], 'C.glauca': ['https://www.wikidata.org/wiki/Q15390716', 'https://www.wikidata.org/wiki/Q12841746', 'https://www.wikidata.org/wiki/Q15610870', 'https://www.wikidata.org/wiki/Q10262621', 'https://www.wikidata.org/wiki/Q107565987', 'https://www.wikidata.org/wiki/Q15513414', 'https://www.wikidata.org/wiki/Q15313554', 'https://www.wikidata.org/wiki/Q15447886', 'https://www.wikidata.org/wiki/Q15736717', 'https://www.wikidata.org/wiki/Q87597525', 'https://www.wikidata.org/wiki/Q15538624', 'https://www.wikidata.org/wiki/Q17246272', 'https://www.wikidata.org/wiki/Q3008072', 'https://www.wikidata.org/wiki/Q15538014', 'https://www.wikidata.org/wiki/Q15353329', 'https://www.wikidata.org/wiki/Q13190179', 'https://www.wikidata.org/wiki/Q1043658', 'https://www.wikidata.org/wiki/Q42405540', 'https://www.wikidata.org/wiki/Q3678887', 'https://www.wikidata.org/wiki/Q3318493', 'https://www.wikidata.org/wiki/Q5767804', 'https://www.wikidat

In [43]:
for index, row in abbreviation_values.iterrows():
    abbreviation_values.at[index, "Retrieved Annotation"] = retrieved_annotations[row["Cell Value"]]
abbreviation_values.head()

Unnamed: 0,Table Name,Column Index,Row Index,Target Annotation,Retrieved Annotation,Cell Value
997,34169c088ee848e4866f42e87b4ccbc2,5,1,https://www.wikidata.org/wiki/Q15340551,[https://www.wikidata.org/wiki/Q15245560],D.glaucifolia
998,34169c088ee848e4866f42e87b4ccbc2,5,5,https://www.wikidata.org/wiki/Q12838227,"[https://www.wikidata.org/wiki/Q15390716, http...",C.glauca
999,34169c088ee848e4866f42e87b4ccbc2,5,11,https://www.wikidata.org/wiki/Q15490427,"[https://www.wikidata.org/wiki/Q2540427, https...",L.glaber
1000,34169c088ee848e4866f42e87b4ccbc2,5,20,https://www.wikidata.org/wiki/Q847209,[https://www.wikidata.org/wiki/Q847209],Q.serrata
1001,34169c088ee848e4866f42e87b4ccbc2,5,27,https://www.wikidata.org/wiki/Q15247814,"[https://www.wikidata.org/wiki/Q15484564, http...",C.sclerophylla


In [44]:
total = 0
for index, row in abbreviation_values.iterrows():
    if row["Target Annotation"] in row["Retrieved Annotation"]:
        total = total + 1
print(total)

50


50 out of 92 are filled with this method

In [45]:
def list_to_string(my_list):
    # Use join to concatenate the elements of the list into a string with comma separator
    result_string = ','.join(map(str, my_list))
    
    return result_string

abbreviation_values["Retrieved Annotation"] = abbreviation_values["Retrieved Annotation"].apply(lambda value: list_to_string(value))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [46]:
abbreviation_values_retrieved = abbreviation_values[abbreviation_values["Retrieved Annotation"] != ""]

In [47]:
abbreviation_dict = dict(zip(abbreviation_values_retrieved['Cell Value'], abbreviation_values_retrieved['Retrieved Annotation']))

In [48]:
abbreviation_dict

{'D.glaucifolia': 'https://www.wikidata.org/wiki/Q15245560',
 'C.glauca': 'https://www.wikidata.org/wiki/Q15390716,https://www.wikidata.org/wiki/Q12841746,https://www.wikidata.org/wiki/Q15610870,https://www.wikidata.org/wiki/Q10262621,https://www.wikidata.org/wiki/Q107565987,https://www.wikidata.org/wiki/Q15513414,https://www.wikidata.org/wiki/Q15313554,https://www.wikidata.org/wiki/Q15447886,https://www.wikidata.org/wiki/Q15736717,https://www.wikidata.org/wiki/Q87597525,https://www.wikidata.org/wiki/Q15538624,https://www.wikidata.org/wiki/Q17246272,https://www.wikidata.org/wiki/Q3008072,https://www.wikidata.org/wiki/Q15538014,https://www.wikidata.org/wiki/Q15353329,https://www.wikidata.org/wiki/Q13190179,https://www.wikidata.org/wiki/Q1043658,https://www.wikidata.org/wiki/Q42405540,https://www.wikidata.org/wiki/Q3678887,https://www.wikidata.org/wiki/Q3318493,https://www.wikidata.org/wiki/Q5767804,https://www.wikidata.org/wiki/Q15466155,https://www.wikidata.org/wiki/Q15563327,https://w

In [49]:
# Save the dictionary to a Pickle file
import pickle
with open('abbreviation_annotation_mapping.pkl', 'wb') as pickle_file:
    pickle.dump(abbreviation_dict, pickle_file)

## Added annotations

In [50]:
new_column_names = ['Table Name', 'Column Index', 'Row Index', 'Retrieved Annotation']
df_output = pd.read_csv("Dataset/output/cea annotation/cea_biodiv_third_experiment.csv", header=None, names=new_column_names)
df_output.head()

Unnamed: 0,Table Name,Column Index,Row Index,Retrieved Annotation
0,008851b16aa04124b3a9195676604f35,0,0,https://www.wikidata.org/wiki/Q7432
1,008851b16aa04124b3a9195676604f35,0,1,https://www.wikidata.org/wiki/Q2093287
2,008851b16aa04124b3a9195676604f35,0,2,https://www.wikidata.org/wiki/Q8261255
3,008851b16aa04124b3a9195676604f35,0,3,https://www.wikidata.org/wiki/Q2942992
4,008851b16aa04124b3a9195676604f35,0,4,https://www.wikidata.org/wiki/Q1105092


In [51]:
# join prediction and target dataframes
merged_df = pd.merge(cea_gt, df_output, on=['Table Name', 'Column Index', 'Row Index'])

In [52]:
# Add Cell Content
def retrieve_cell_value(row):
    df = pd.read_csv(f"Dataset/val/tables/{row['Table Name']}.csv", header=None)
    cell_value = df.iloc[row["Row Index"], row["Column Index"]]
    return cell_value
    
merged_df["Cell Value"] = merged_df.apply(lambda row: retrieve_cell_value(row), axis=1)

In [53]:
# Count number of nan_values strings with abbreviations
def detect_abbreviation_in_string(row):
    if row["Cell Value"].count(".") == 1 and row["Cell Value"].count("*") == 0 and row["Cell Value"][0].isupper():
        return True
    return False

# Apply the lambda function to identify True values
mask = merged_df[["Cell Value", "Target Annotation"]].apply(lambda row: detect_abbreviation_in_string(row), axis=1)

# Use the mask to filter and store the values that returned True
abbreviation_values_by_google = merged_df.loc[mask, :]
abbreviation_values_by_google

Unnamed: 0,Table Name,Column Index,Row Index,Target Annotation,Retrieved Annotation,Cell Value
997,34169c088ee848e4866f42e87b4ccbc2,5,1,https://www.wikidata.org/wiki/Q15340551,https://www.wikidata.org/wiki/Q7687549,D.glaucifolia
998,34169c088ee848e4866f42e87b4ccbc2,5,5,https://www.wikidata.org/wiki/Q12838227,https://www.wikidata.org/wiki/Q2941555,C.glauca
999,34169c088ee848e4866f42e87b4ccbc2,5,11,https://www.wikidata.org/wiki/Q15490427,https://m.wikidata.org/wiki/Q149476,L.glaber
1000,34169c088ee848e4866f42e87b4ccbc2,5,20,https://www.wikidata.org/wiki/Q847209,https://www.wikidata.org/wiki/Q847209,Q.serrata
1001,34169c088ee848e4866f42e87b4ccbc2,5,27,https://www.wikidata.org/wiki/Q15247814,https://www.wikidata.org/wiki/Q15247814,C.sclerophylla
...,...,...,...,...,...,...
1431,8249f8533f764f6dbd195a872c18fd6d,0,86,https://www.wikidata.org/wiki/Q610177,https://www.wikidata.org/wiki/Q649073,C.cognatus
1432,8249f8533f764f6dbd195a872c18fd6d,0,87,https://www.wikidata.org/wiki/Q3766704,https://www.wikidata.org/wiki/Q2320738,C.spiloptera
1433,8249f8533f764f6dbd195a872c18fd6d,0,88,https://www.wikidata.org/wiki/Q606436,https://www.wikidata.org/wiki/Q606436,E.olmstedi
1434,8249f8533f764f6dbd195a872c18fd6d,0,90,https://www.wikidata.org/wiki/Q6406968,https://www.wikidata.org/wiki/Q6406968,H.roanokense


In [54]:
abbreviation_values_by_google

Unnamed: 0,Table Name,Column Index,Row Index,Target Annotation,Retrieved Annotation,Cell Value
997,34169c088ee848e4866f42e87b4ccbc2,5,1,https://www.wikidata.org/wiki/Q15340551,https://www.wikidata.org/wiki/Q7687549,D.glaucifolia
998,34169c088ee848e4866f42e87b4ccbc2,5,5,https://www.wikidata.org/wiki/Q12838227,https://www.wikidata.org/wiki/Q2941555,C.glauca
999,34169c088ee848e4866f42e87b4ccbc2,5,11,https://www.wikidata.org/wiki/Q15490427,https://m.wikidata.org/wiki/Q149476,L.glaber
1000,34169c088ee848e4866f42e87b4ccbc2,5,20,https://www.wikidata.org/wiki/Q847209,https://www.wikidata.org/wiki/Q847209,Q.serrata
1001,34169c088ee848e4866f42e87b4ccbc2,5,27,https://www.wikidata.org/wiki/Q15247814,https://www.wikidata.org/wiki/Q15247814,C.sclerophylla
...,...,...,...,...,...,...
1431,8249f8533f764f6dbd195a872c18fd6d,0,86,https://www.wikidata.org/wiki/Q610177,https://www.wikidata.org/wiki/Q649073,C.cognatus
1432,8249f8533f764f6dbd195a872c18fd6d,0,87,https://www.wikidata.org/wiki/Q3766704,https://www.wikidata.org/wiki/Q2320738,C.spiloptera
1433,8249f8533f764f6dbd195a872c18fd6d,0,88,https://www.wikidata.org/wiki/Q606436,https://www.wikidata.org/wiki/Q606436,E.olmstedi
1434,8249f8533f764f6dbd195a872c18fd6d,0,90,https://www.wikidata.org/wiki/Q6406968,https://www.wikidata.org/wiki/Q6406968,H.roanokense


In [55]:
abbreviation_values

Unnamed: 0,Table Name,Column Index,Row Index,Target Annotation,Retrieved Annotation,Cell Value
997,34169c088ee848e4866f42e87b4ccbc2,5,1,https://www.wikidata.org/wiki/Q15340551,https://www.wikidata.org/wiki/Q15245560,D.glaucifolia
998,34169c088ee848e4866f42e87b4ccbc2,5,5,https://www.wikidata.org/wiki/Q12838227,"https://www.wikidata.org/wiki/Q15390716,https:...",C.glauca
999,34169c088ee848e4866f42e87b4ccbc2,5,11,https://www.wikidata.org/wiki/Q15490427,"https://www.wikidata.org/wiki/Q2540427,https:/...",L.glaber
1000,34169c088ee848e4866f42e87b4ccbc2,5,20,https://www.wikidata.org/wiki/Q847209,https://www.wikidata.org/wiki/Q847209,Q.serrata
1001,34169c088ee848e4866f42e87b4ccbc2,5,27,https://www.wikidata.org/wiki/Q15247814,"https://www.wikidata.org/wiki/Q15484564,https:...",C.sclerophylla
...,...,...,...,...,...,...
1431,8249f8533f764f6dbd195a872c18fd6d,0,86,https://www.wikidata.org/wiki/Q610177,"https://www.wikidata.org/wiki/Q2443326,https:/...",C.cognatus
1432,8249f8533f764f6dbd195a872c18fd6d,0,87,https://www.wikidata.org/wiki/Q3766704,https://www.wikidata.org/wiki/Q3766704,C.spiloptera
1433,8249f8533f764f6dbd195a872c18fd6d,0,88,https://www.wikidata.org/wiki/Q606436,https://www.wikidata.org/wiki/Q606436,E.olmstedi
1434,8249f8533f764f6dbd195a872c18fd6d,0,90,https://www.wikidata.org/wiki/Q6406968,https://www.wikidata.org/wiki/Q6406968,H.roanokense


In [60]:
# Fill NaNs with empty string
abbreviation_values_by_google['Retrieved Annotation'] = abbreviation_values_by_google['Retrieved Annotation'].fillna("")
abbreviation_values_by_google[abbreviation_values_by_google["Retrieved Annotation"] == ""]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Table Name,Column Index,Row Index,Target Annotation,Retrieved Annotation,Cell Value
1410,8249f8533f764f6dbd195a872c18fd6d,0,29,https://www.wikidata.org/wiki/Q388110,,N.leuciodus
1412,8249f8533f764f6dbd195a872c18fd6d,0,31,https://www.wikidata.org/wiki/Q3754033,,N.miurus
1427,8249f8533f764f6dbd195a872c18fd6d,0,78,https://www.wikidata.org/wiki/Q2178257,,R.cataractae


In [63]:
incorrect_annotations = 511
empty_annotations = 187
correct_annotations = 1311

for (index1, row1), (index2, row2) in zip(abbreviation_values.iterrows(), abbreviation_values_by_google.iterrows()):
    if (row1["Retrieved Annotation"] == ""):
        continue
    if (row1["Target Annotation"] in row1["Retrieved Annotation"]):
        if (row2["Target Annotation"] not in row2["Retrieved Annotation"]):
            if (row2["Retrieved Annotation"] == ""):
                empty_annotations = empty_annotations - 1
            else:
                incorrect_annotations = incorrect_annotations - 1
            correct_annotations = correct_annotations + 1

print(f"Incorrect annotations are {incorrect_annotations}")
print(f"Empty annotations are {empty_annotations}")
print(f"Correct annotations are {correct_annotations}")

Incorrect annotations are 490
Empty annotations are 186
Correct annotations are 1333


In [68]:
precision = correct_annotations / (correct_annotations + incorrect_annotations)

In [69]:
recall = correct_annotations / (correct_annotations + incorrect_annotations + empty_annotations)

In [70]:
f1_score = (2 * precision * recall) / (precision + recall)

In [71]:
f1_score

0.6957202505219208