# Get your token:

Get your token from https://services.d4science.org/group/tagme/

Before making any call to the web service, you will need to set the module-wise GCUBE_TOKEN variable. You can do so with:

In [2]:
import tagme
# Set the authorization token for subsequent calls.
tagme.GCUBE_TOKEN = "7d61-1b53-1694-4c90-8b18-8ba26a30c03b-843339462"

# Annotation
### An annotation, i.e. a link of a part of text to an entity.

The annotation service lets you find entities mentioned in a text and link them to Wikipedia. This is the so-called Sa2KB problem. You can annotate a text with:

In [5]:
lunch_annotations = tagme.annotate("My favourite meal is Mexican burritos.")

# Print annotations with a score higher than 0.1
for ann in lunch_annotations.get_annotations(0.1):
    print(ann)

meal -> Meal (score: 0.2014230340719223)
Mexican -> Mexican cuisine (score: 0.36614900827407837)
burritos -> Burrito (score: 0.28607892990112305)


# Mention finding
### A mention, i.e. a part of text that may mention an entity.

The mention finding service lets you find what parts of text may be a mention of an entity, without linking them to any entity.

The mentions parameter accepts an optional language parameter lang that defaults to en.

In [7]:
tomatoes_mentions = tagme.mentions("I definitely like ice cream better than tomatoes.")

for mention in tomatoes_mentions.mentions:
    print(mention)

ice cream [18,27] lp=0.18749085068702698
tomatoes [40,48] lp=0.004235605709254742


# Entity relatedness

Tagme also gives you the semantic relatedness among pairs of entities. Entities can be either specified as Wikipedia titles (like Barack Obama) or as Wikipedia IDs (like 534366, the ID of the entity Barack Obama). The two methods for obtaining the relatedness among entities are relatedness_title (that accepts titles) and relatedness_wid (that accepts Wikipedia IDs). 

In [8]:
# Get relatedness between a pair of entities specified by title.
rels = tagme.relatedness_title(("Barack Obama", "Italy"))
print("Obama and italy have a semantic relation of", rels.relatedness[0].rel)

# Get relatedness between a pair of entities specified by Wikipedia ID.
rels = tagme.relatedness_wid((31717, 534366))
print("IDs 31717 and 534366 have a semantic relation of ", rels.relatedness[0].rel)

# Get relatedness between three pairs of entities specified by title.
# The last entity does not exist, hence the value for that pair will be None.
rels = tagme.relatedness_title([("Barack_Obama", "Italy"),
                                ("Italy", "Germany"),
                                ("Italy", "BAD ENTITY NAME")])
for rel in rels.relatedness:
    print(rel)

# You can also build a dictionary
rels_dict = dict(rels)
print(rels_dict[("Barack Obama", "Italy")])

Obama and italy have a semantic relation of 0.05192309617996216
IDs 31717 and 534366 have a semantic relation of  0.09577333927154541
Barack Obama, Italy rel=0.05192309617996216
Italy, Germany rel=0.6111182570457458
Italy, BAD ENTITY NAME rel=None
0.05192309617996216


# A full example of tagMe

In [24]:
from __future__ import absolute_import, division, print_function, unicode_literals

import sys
import tagme

SAMPLE_TEXT = "Obama visited uk"

tagme.GCUBE_TOKEN = "7d611b53-1694-4c90-8b18-8ba26a30c03b-843339462"
def main():
    # Annotate a text.
    print("Annotating text: ", SAMPLE_TEXT)
    resp = tagme.annotate(SAMPLE_TEXT)
    print(resp)
    for ann in resp.annotations:
        print(ann)

    # Find mentions in a text.
    print("Finding mentions in text: ", SAMPLE_TEXT)
    resp = tagme.mentions(SAMPLE_TEXT)
    print(resp)
    for mention in resp.mentions:
        print(mention)

    # Find relatedness between one pair of entities, by title.
    resp = tagme.relatedness_title(["Barack_Obama", "Italy"])
    print(resp)
    for rel in resp.relatedness:
        print(rel)

    # Find relatedness between pairs of entities, by title.
    resp = tagme.relatedness_title([("Barack_Obama", "Italy"),
                                ("Italy", "Germany"),
                                ("Italy", "BAD ENTITY NAME")])
    print(resp)
    for rel in resp.relatedness:
        print(rel)

    # Access the relatedness response as a dictionary.
    resp_dict = dict(resp)
    print("Relatedness between Italy and Germany: ", resp_dict[("Italy", "Germany")])

    # Find relatedness between one pair of entities, by wikipedia id
    resp = tagme.relatedness_wid((31717, 534366))
    print(resp)
    for rel in resp.relatedness:
        print(rel)

    # Find relatedness between pairs of entities, by wikipedia id
    resp = tagme.relatedness_wid([(534366, 534366 + a) for a in range (1010)])
    print(resp)
    for rel in resp.relatedness:
        print(rel)

if __name__ == "__main__":
    assert tagme.normalize_title(" barack Obama  ") == "Barack_Obama"
    assert tagme.title_to_uri(" barack Obama  ") == "https://en.wikipedia.org/wiki/Barack_Obama"
    assert tagme.wiki_title("Barack_Obama") == ("Barack Obama")
    main()

Annotating text:  Obama visited uk
14msec, 2 annotations
Obama -> Barack Obama (score: 0.06389198452234268)
uk -> United Kingdom (score: 0.10190602391958237)
Finding mentions in text:  Obama visited uk
0msec, 2 mentions
Obama [0,5] lp=0.032010629773139954
uk [14,16] lp=0.10803870856761932
1 relatedness pairs, 1 calls
Barack Obama, Italy rel=0.05192309617996216
3 relatedness pairs, 1 calls
Barack Obama, Italy rel=0.05192309617996216
Italy, Germany rel=0.6111182570457458
Italy, BAD ENTITY NAME rel=None
Relatedness between Italy and Germany:  0.6111182570457458
1 relatedness pairs, 1 calls
31717, 534366 rel=0.09577333927154541
1010 relatedness pairs, 11 calls
534366, 534366 rel=1.0
534366, 534367 rel=None
534366, 534368 rel=None
534366, 534369 rel=None
534366, 534370 rel=None
534366, 534371 rel=None
534366, 534372 rel=None
534366, 534373 rel=None
534366, 534374 rel=None
534366, 534375 rel=None
534366, 534376 rel=None
534366, 534377 rel=None
534366, 534378 rel=None
534366, 534379 rel=None


# Another example

In [None]:
# -*- coding:utf-8 -*-
# Author:Zhou Yang
# Time:2019/3/30



import tagme
import logging
import sys
import os.path

# "Authorization Token" marked, need to register to have
tagme.GCUBE_TOKEN = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx-xxxxxxxxx"

program = os.path.basename(sys.argv[0])
logger = logging.getLogger(program)
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')


def similarity(A, B, flag=0):
    if flag == 0:
        rels = tagme.relatedness_title((A, B))
        return rels.relatedness[0].rel
    else:
        rels = tagme.relatedness_wid((A, B))
        return rels.relatedness[0].rel



if __name__ == '__main__':
    A, B = "Machine learning", "Artificial neural network"
    A_id, B_id = 21523, 233488
    obj = similarity(A, B)
    print(obj)
    obj = similarity(A_id, B_id, flag=1)
    print(obj)
