In [7]:
import spacy

# Load English tokenizer, tagger, parser, NER and word vectors
nlp = spacy.load("ja_ginza")

# Process whole documents
text = ("When Sebastian Thrun started working on self-driving cars at "
        "Google in 2007, few people outside of the company took him "
        "seriously. “I can tell you very senior CEOs of major American "
        "car companies would shake my hand and turn away because I wasn’t "
        "worth talking to,” said Thrun, in an interview with Recode earlier "
        "this week.")
doc = nlp(text)

# Analyze syntax
print("Noun phrases:", [chunk.text for chunk in doc.noun_chunks])
print("Verbs:", [token.lemma_ for token in doc if token.pos_ == "VERB"])

Noun phrases: ['When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously', 'I can tell you very senior CEOs of', 'car companies would shake my hand and turn away because I wasn’t worth talking to', 'said Thrun', 'in an interview with Recode earlier this week']
Verbs: []
. Product_Other
I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to Product_Other
said Thrun Product_Other
in an interview with Recode earlier this week. Music


In [8]:
# Find named entities, phrases and concepts
for entity in doc.ents:
    print(entity.text, entity.label_)

. Product_Other
I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to Product_Other
said Thrun Product_Other
in an interview with Recode earlier this week. Music


In [18]:
doc  =  nlp("Jeffrey Hinton is a British-born researcher in computer science and cognitive psychology. \ Famous for researching neural networks. Currently working at the University of Toronto and Google." )
for entity in doc.ents:
    print(entity.text, entity.label_)



Jeffrey Hinton is a Corporation_Other
\ Famous for researching neural networks. Product_Other


In [19]:
from spacy.lang.en import English
from spacy.pipeline import EntityRuler

nlp = English()
ruler = EntityRuler(nlp)
patterns = [{"label": "ORG", "pattern": "MobiControl"},
            {"label": "OPSYS", "pattern": "Android"}]
ruler.add_patterns(patterns)
nlp.add_pipe(ruler)

doc = nlp("Apple 製品 MobiControl v 14 Manual ios overall Android 端末")
print([(ent.text, ent.label_) for ent in doc.ents])

[('MobiControl', 'ORG'), ('Android', 'OPSYS')]


In [12]:
import spacy
nlp = spacy.load('ja_ginza')
doc = nlp('※ｄトラベルでは海外発行のクレジットカードのご利用はできません。あらかじめご了承ください。 国内宿泊では、ドコモ払い、クレジットカード払い（dカード、VISA、MasterCard、JCB、AMEX...※ｄトラベルでは海外発行のクレジットカードのご利用はできません。あらかじめご了承ください。 ※JCB、AMEX、ダイナースはご利用いただけません。 Q.利用可能な支払方法について教えてください。（電..."')
# for sent in doc.sents:
#     for token in sent:
#         print(token.i, token.orth_, token.lemma_, token.pos_, token.tag_, token.dep_, token.head.i)
#     print('EOS')
#     print(sent)
#     break

In [8]:
[x for x in doc.]

[銀座, ランチ, ご, PM]

In [11]:
print([(ent.text, ent.label_) for ent in doc.])

[('トラベル', 'NP'), ('海外発行', 'NP'), ('クレジットカード', 'NP'), ('ご利用', 'NP'), ('ご', 'NP'), ('JCB', 'NP'), ('AMEX', 'NP'), ('ダイナース', 'NP'), ('ご利用', 'NP'), ('利用', 'NP'), ('支払方法', 'NP'), ('電', 'NP')]


In [13]:
print([(ent.text, ent.label_) for ent in doc.ents])

[('ドコモ払い', 'Company'), ('クレジットカード払い', 'Doctrine_Method_Other'), ('VISA', 'Product_Other'), ('MasterCard', 'Product_Other'), ('JCB', 'Product_Other'), ('AMEX...※ｄ', 'Product_Other'), ('JCB', 'Product_Other'), ('AMEX', 'Product_Other'), ('ダイナース', 'Product_Other')]
