In [15]:
import spacy 

nlp = spacy.load('en_core_web_sm') 

sentence = "An Artificial Neural Network (ANN) is an information processing paradigm that is inspired by the way biological nervous systems, such as the brain, process information. The key element of this paradigm is the novel structure of the information processing system. It is composed of a large number of highly interconnected processing elements (neurons) working in unison to solve specific problems."

doc = nlp(sentence) 

for ent in doc.ents: 
    print(ent.text, ent.start_char, ent.end_char, ent.label_) 

An Artificial Neural Network 0 28 ORG
ANN 30 33 ORG


In [17]:
# -*- coding:utf-8 -*-
# Author:Zhou Yang
# Time:2019/3/30



import tagme
import logging
import sys
import os.path

# "Authorization Token" marked, need to register to have
tagme.GCUBE_TOKEN = "7d61-1b53-1694-4c90-8b18-8ba26a30c03b-843339462"

program = os.path.basename(sys.argv[0])
logger = logging.getLogger(program)
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')



def Annotation_mentions(txt):
    """
         Discover the concepts of wiki concept entities in those texts
         :param txt: a text object, str type
         :return: key-value pair, the key is the original entity concept in this article, the value is the concept size of the concept as a wiki concept, and those that belong to the wiki concept but have ambiguity also include
    """
    annotation_mentions = tagme.mentions(txt)
    dic = dict()
    for mention in annotation_mentions.mentions:
        try:
            dic[str(mention).split(" [")[0]] = str(mention).split("] lp=")[1]
        except:
            logger.error('error annotation_mention about ' + mention)
    return dic


def Annotate(txt, language="en", theta=0.1):
    """
         Solving the mapping problem between conceptual entities of text and Wikipedia concepts
         :param txt: a text object, str type
         :param language: The language used "de" is German, "en" is English, "it" is Italian. The default is English "en"
         :param theta: threshold [0, 1], select the label score, the larger the threshold, the more reliable the filtered map, the default is 0.1
         :return: key-value pair [(A, B):score] A is the conceptual entity in the text, B is the wiki concept entity, and score is the score
    """
    annotations = tagme.annotate(txt, lang=language)
    dic = dict()
    for ann in annotations.get_annotations(theta):
        # print(ann)
        try:
            A, B, score = str(ann).split(" -> ")[0], str(ann).split(" -> ")[1].split(" (score: ")[0], str(ann).split(" -> ")[1].split(" (score: ")[1].split(")")[0]
            dic[(A, B)] = score
        except:
            logger.error('error annotation about ' + ann)
    return dic


if __name__ == '__main__':
    f = open("text.txt", "r", encoding="utf8")
    txt = f.read()
    obj = Annotation_mentions(txt)
    for i in obj.keys():
        print(i + "  " + obj[i])
    print("=" * 30)
    obj = Annotate(txt, theta=0.2)
    for i in obj.keys():
        print(i[0] + " ---> " + i[1] + "  " + obj[i])

    pass

AirPower  0.044692736119031906
wireless charging  0.5
Dave Lee  0.31578946113586426
North America  0.30804112553596497
technology  0.023398298770189285
reporter  0.02981657162308693
step  0.004248012788593769
firm  0.00946947280317545
given up  0.006053550634533167
make it work  0.020179372280836105
mat  0.04156818985939026
charge  0.011948450468480587
devices  0.0019434246933087707
plug  0.05445897579193115
engineers  0.017148582264780998
stop  0.004503968637436628
getting  0.04302854835987091
too hot  0.024900399148464203
will  0.0036389119923114777
high standards  0.005692599806934595
standards  0.008279936388134956
cancelled  0.00203994894400239
project  0.0027154358103871346
company  0.008127721026539803
rumours  0.02806372568011284
issues  0.0018386875744909048
circulating  0.0015035081887617707
time  0.01014722604304552
public  0.01035712193697691
solution  0.04418136551976204
last year  0.009448818862438202
most recent  0.0010159160010516644
iPhone  0.6007529497146606
launch  0