In [1]:
import spacy
from spacy.matcher import Matcher

In [2]:
nlp = spacy.load('en_core_web_sm')

---
**Current Rules**

---



In [3]:
matcher = Matcher(nlp.vocab)

In [4]:
#pattern1 = [{ "TAG": "NNP" , "TAG": "NNS" , "TAG":"NN"}, {"TAG": "VBD","TAG":"VBZ"}, {"TAG":"RB","OP":'?'}, {"TAG": "JJ"}]
#pattern2 = [{"TAG":"RB","OP":'?'},{"TAG": "JJ"},{"TAG": "NNP","TAG": "NNS","TAG":"NN"}]

pattern1 = [{"TAG": {"IN":["NNP","NNS","NN"]}}, {"TAG": {"IN":["VBD","VBZ"]}}, {"TAG":"RB","OP":'?'}, {"TAG": "JJ","OP":"+"}]
pattern2 = [{"TAG":"RB","OP":'?'},{"TAG": "JJ"},{"TAG": {"IN":["NNP","NNS","NN"]}}]

matcher.add("nva", None, pattern1)
matcher.add("an", None, pattern2)

In [5]:
#doc1 = nlp("pizza was good but rice was very bad , very poor service but place was amazing")
doc1 = nlp("fries were bad but Burger was good , poor place , Service is amazing ")
#doc1 = nlp("place was little clean")

mat = matcher(doc1)
print(mat)

for match_id, start, end in mat :
  string_id = nlp.vocab.strings[match_id]
  span = doc1[start:end]
  if string_id == 'nva' :
    pair = [span[0],span[2:]]
  elif string_id == 'an' :
    pair = [span[-1],span[:-1]]
  print(match_id,string_id,start, end, span.text,pair)

[(2410998387917754427, 0, 3), (2410998387917754427, 4, 7), (15099054000809333061, 8, 10), (2410998387917754427, 11, 14)]
2410998387917754427 nva 0 3 fries were bad [fries, bad]
2410998387917754427 nva 4 7 Burger was good [Burger, good]
15099054000809333061 an 8 10 poor place [place, poor]
2410998387917754427 nva 11 14 Service is amazing [Service, amazing]


In [6]:
for token in doc1 :
  print(token.text,token.tag_)

fries NNS
were VBD
bad JJ
but CC
Burger NNP
was VBD
good JJ
, ,
poor JJ
place NN
, ,
Service NN
is VBZ
amazing JJ


---
**Current function**

---


In [7]:
def form_pair(review):
  matcher = Matcher(nlp.vocab)
  
  pattern1 = [{"TAG": {"IN":["NNP","NNS","NN"]}}, {"TAG": {"IN":["VBD","VBZ"]}}, {"TAG":"RB","OP":'?'}, {"TAG": "JJ","OP":"+"}]
  pattern2 = [{"TAG":"RB","OP":'?'},{"TAG": "JJ"},{"TAG": {"IN":["NNP","NNS","NN"]}}]

  matcher.add("nva", None, pattern1)
  matcher.add("an", None, pattern2)

  doc = nlp(review)
  match = matcher(doc)
  
  all_pairs = []
  extracted_text = []
  for match_id, start, end in match :
    string_id = nlp.vocab.strings[match_id]
    span = doc[start:end]
    pair=()
    if string_id == "nva" :
      pair = (span[0],span[2:])
    elif string_id == "an" :
      pair = (span[-1],span[:-1])
    #print(match_id, string_id, start, end, span.text, pair)
    all_pairs.append(pair)
    extracted_text.append(span)
  return all_pairs,extracted_text

In [8]:
reviews = ["pizza was good but rice were very bad , very poor service but place was amazing","fries were bad but Burger was good , poor place , Service is amazing ","place was little clean"]
for review in reviews :
  all_pairs,extracted_text = form_pair(review)
  print(review ,"\n" ,all_pairs,"\n")

pizza was good but rice were very bad , very poor service but place was amazing 
 [(pizza, good), (rice, very bad), (service, very poor), (service, poor), (place, amazing)] 

fries were bad but Burger was good , poor place , Service is amazing  
 [(fries, bad), (Burger, good), (place, poor), (Service, amazing)] 

place was little clean 
 [(place, little clean)] 

