In [4]:
import spacy
from spacy import displacy
import pandas as pd
nlp = spacy.load("en_core_web_sm")


In [5]:
active = ['Hens lay eggs.',
         'Birds build nests.',
         'The batter hit the ball.',
         'The computer transmitted a copy of the manual']
passive = ['Eggs are laid by hens',
           'Nests are built by birds',
           'The ball was hit by the batter',
           'A copy of the manual was transmitted by the computer.']

In [7]:
doc = nlp(active[0])
for token in doc:
    print(token.text, token.dep_, )

Hens nsubj
lay ROOT
eggs dobj
. punct


In [8]:
displacy.render(doc, style='dep', jupyter=True, options={'bg': 'white', 'color': 'black'})

In [10]:
for sentence in active:
    doc = nlp(sentence)
    displacy.render(doc, style='dep', jupyter=True, options={'bg': 'white', 'color': 'black'})

In [11]:
for sentence in passive:
    doc = nlp(sentence)
    displacy.render(doc, style='dep', jupyter=True, options={'bg': 'white', 'color': 'black'})

In [14]:
sent = "Nests are built by birds"
doc = nlp(sent)
displacy.render(doc, style='dep', jupyter=True, options={'bg': 'white', 'color': 'black'})
list(doc[3].children)

[birds]

In [19]:
s = "upGrad is teaching NLP."

doc = nlp(s)
for token in doc:
    print(token.text, token.dep_, token.lemma_)

upGrad nsubj upgrad
is aux be
teaching ROOT teach
NLP dobj NLP
. punct .


In [20]:
active = ['Hens lay eggs.',
         'Birds build nests.',
         'The batter hit the ball.',
         'The computer transmitted a copy of the manual']
passive = ['Eggs are laid by hens',
           'Nests are built by birds',
           'The ball was hit by the batter',
           'A copy of the manual was transmitted by the computer.']

In [21]:
from spacy.matcher import Matcher

In [22]:
doc = nlp(passive[0])
displacy.render(doc, style='dep', jupyter=True, options={'bg': 'white', 'color': 'black'})

In [23]:
rule = [{'POS':'NOUN'}]
matcher = Matcher(nlp.vocab)
matcher.add('Rule', [rule])

In [24]:
matcher(doc)


[(15740618714089435985, 0, 1), (15740618714089435985, 4, 5)]

In [25]:
doc[0:1]

Eggs

In [26]:
doc[4:5]

hens

In [27]:
passive_rule = [{'DEP':'nsubjpass'}]
matcher = Matcher(nlp.vocab)
matcher.add('Passive', [passive_rule])

In [28]:
matcher(doc)

[(3889985946448656432, 0, 1)]

In [29]:
doc = nlp(active[0])
displacy.render(doc, style='dep', jupyter=True, options={'bg': 'white', 'color': 'black'})

In [30]:
matcher(doc)

[]

In [31]:
def is_passive(doc, matcher):
    matches = matcher(doc)
    if len(matches) > 0:
        return True
    else:
        return False

In [32]:
for sentence in active:
    doc = nlp(sentence)
    print(is_passive(doc, matcher))

False
False
False
False


In [33]:
for sentence in passive:
    doc = nlp(sentence)
    print(is_passive(doc, matcher))

True
True
True
True


In [35]:
doc = nlp("A book is being bought by John.")
matcher(doc)

[(3889985946448656432, 1, 2)]

In [36]:
active_passive = pd.read_csv('active_passive.csv')
active_passive.head()

Unnamed: 0,Active,Passive
0,He reads a novel.,A novel is read.
1,He does not cook food.,Food is not cooked by him.
2,Does he purchase books?,Are books being purchased by him?
3,They grow plants.,Plants are grown by them.
4,She teaches me.,I am taught by her.


In [37]:
active_passive.shape

(40, 2)

In [38]:
active = active_passive['Active']
passive = active_passive['Passive']

In [39]:
cnt = 0
for sentence in active:
    doc = nlp(sentence)
    if not is_passive(doc, matcher):
        cnt += 1
print(cnt)

40


In [40]:
cnt = 0
for sentence in passive:
    doc = nlp(sentence)
    if is_passive(doc, matcher):
        cnt += 1
print(cnt)

38


In [41]:
cnt = 0
missed = []
for sentence in passive:
    doc = nlp(sentence)
    if is_passive(doc, matcher):
        cnt += 1
    else:
        missed.append(sentence)

In [45]:
missed[0]
displacy.render(nlp(missed[0]), style='dep', jupyter=True, options={'bg': 'white', 'color': 'black'})


In [46]:
missed[1]
displacy.render(nlp(missed[1]), style='dep', jupyter=True, options={'bg': 'white', 'color': 'black'})

In [47]:
spacy.explain("auxpass")

'auxiliary (passive)'

In [48]:
passive_rule = [{'DEP': {"IN": ['nsubjpass', 'auxpass']}}]
matcher = Matcher(nlp.vocab)
matcher.add('Passive', [passive_rule])

In [49]:
cnt = 0
for sentence in active:
    doc = nlp(sentence)
    if not is_passive(doc, matcher):
        cnt += 1
print(cnt)

40


In [50]:
cnt = 0
for sentence in passive:
    doc = nlp(sentence)
    if is_passive(doc, matcher):
        cnt += 1
print(cnt)

40


In [64]:
doc = nlp("JetAirways cancelled the flight this morning which was already late")
displacy.render(doc, style='dep', jupyter=True, options={'bg': 'white', 'color': 'black'})

In [65]:
doc = nlp("It was the best of times and it was the worst of times")

In [69]:
list(doc[3].children)

[the, of]

In [70]:
doc = nlp("Dole was defeated by Clinton")
displacy.render(doc, style='dep', jupyter=True, options={'bg': 'white', 'color': 'black'})