In [6]:

import spacy

class SimplySpacy:
	def __init__(self):
		self.nlp = spacy.load("en_core_web_sm")

	def print_word_information(self, text = "This is going to be a fun course!"):
		doc = self.nlp(text)
		print("Sentences are:")
		for token in doc:
			print(token.text, token.pos_, token.lemma_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop)

sp = SimplySpacy()
sp.print_word_information()

Sentences are:
This PRON this DT nsubj Xxxx True True
is AUX be VBZ aux xx True True
going VERB go VBG ROOT xxxx True False
to PART to TO aux xx True True
be AUX be VB xcomp xx True True
a DET a DT det x True True
fun NOUN fun NN amod xxx True False
course NOUN course NN attr xxxx True False
! PUNCT ! . punct ! False False


In [7]:
import spacy

class SimplySpacy:
	def __init__(self):
		self.nlp = spacy.load("en_core_web_sm")

	def print_sentences(self, text = "This is the first sentence. This is the second sentence."):
		doc = self.nlp(text)
		print("Sentences are:")
		for sent in doc.sents:
			print(sent.text)

sp = SimplySpacy()

sp.print_sentences()


Sentences are:
This is the first sentence.
This is the second sentence.


In [8]:
sp.print_sentences("This is the first sentence... mmm. okay? I went to the U.S. 2.5 years ago!")

Sentences are:
This is the first sentence... mmm.
okay?
I went to the U.S. 2.5 years ago!


In [10]:
import spacy

nlp = spacy.load("en_core_web_md")  # make sure to use larger package!

doc1 = nlp("I spent my weekend studying.")
doc2 = nlp("I spent my weekend sleeping.")
doc3 = nlp("I lead a fun life.")
# Similarity of two documents
print(doc1, "<->", doc3, doc1.similarity(doc3))
print(doc2, "<->", doc3, doc2.similarity(doc3))

I spent my weekend studying. <-> I lead a fun life. 0.6979041434697087
I spent my weekend sleeping. <-> I lead a fun life. 0.6887326355620008


** Spacy Matcher **

In [19]:
import spacy
from spacy.matcher import PhraseMatcher


class OntologyMatcher:
    def __init__(self):
        self.nlp = spacy.load("en_core_web_sm")

        self.matcher = PhraseMatcher(self.nlp.vocab)
        cities = [i.strip().lower() for i in open("cities")]
        print("Loaded the following cities: ", cities)
        dwelling_type = [i.strip().lower() for i in open("dwellings")]
        print("Loaded the following dwellings: ", dwelling_type)
        city_patterns = [self.nlp.make_doc(text) for text in cities]
        dwelling_patterns = [self.nlp.make_doc(text) for text in dwelling_type]
        self.matcher.add("City", city_patterns)
        self.matcher.add("Dwelling_Type", dwelling_patterns)

    def match_concepts(self, sentence = "This is an apartment in Sydney."):
        doc = self.nlp(sentence.lower())
        matches = self.matcher(doc)
        self.print_output(sentence, doc, matches)

    def print_output(self, sentence, doc, matches):
        print("Sentence: ", sentence)

        for match_id, start, end in matches:
            span = doc[start:end]
            print(span.text, ":", self.matcher.vocab[match_id].text)

om = OntologyMatcher()


Loaded the following cities:  ['sydney', 'melbourne', 'perth', 'adelaide', 'darwin', 'hobart', 'canberra', 'brisbane']
Loaded the following dwellings:  ['apartment', 'townhouse', 'villa', 'unit', 'house']


In [20]:
om.match_concepts()

Sentence:  This is an apartment in Sydney.
apartment : Dwelling_Type
sydney : City
