In [1]:
# “The quick brown fox jumps over the lazy dog”
# POS tags: (’The’, ’DET’) (’quick’, ’ADJ’) (’brown’, ’ADJ’) (’fox’, ’NOUN’) (’jumped’, ’VERB’) (’over’,
# ’ADP’) (’the’, ’DET’) (’lazy’, ’ADJ’) (’dog’, ’NOUN’)

# Create a chunker that detects noun-phrases (NPs) and lists the NPs in the sentence

In [108]:
import spacy

In [107]:
from spacy import displacy

In [109]:
from spacy.matcher import Matcher
from spacy.util import filter_spans

In [247]:
nlp = spacy.load("en_core_web_sm")

In [104]:
sentence = "The quick brown fox jumps over the lazy dog"

In [105]:
doc = nlp(sentence)

In [92]:
noun_phrases = list(doc.noun_chunks)
noun_phrases

[The quick brown fox, the lazy dog]

In [99]:
def noun_and_verb_phrases_chunking(sentence):
    
    verbpattern = [{'POS': 'VERB', 'OP': '?'},
           {'POS': 'ADV', 'OP': '*'},
           {'POS': 'VERB', 'OP': '+'}]
    
    nounpattern = [{'POS': 'NOUN'}]


    nlp = spacy.load('en_core_web_sm') 
    
    # instantiate a Matcher instance
    matcher = Matcher(nlp.vocab)
    matcher.add("Verb phrase", [verbpattern])
    matcher.add("Noun phrase", [nounpattern])
    
    doc = nlp(sentence) 
    # call the matcher to find matches 
    matches = matcher(doc)
    spans = [doc[start:end] for _, start, end in matches]
    
    return filter_spans(spans)

In [100]:
verb_phrases = noun_and_verb_phrases_chunking("The quick brown fox jumps over the lazy dog")
verb_phrases

[fox, jumps, dog]

In [85]:
noun_and_verb_phrases = noun_phrases + verb_phrases
noun_and_verb_phrases

[The quick brown fox, the lazy dog, jumps]

In [110]:
nlp=spacy.load('en_core_web_sm')
displacy.render(doc,jupyter=True)

In [120]:
subjectAndObjects = []
for token in doc:
    if(token.dep_ == "nsubj" or token.dep_ == "dobj" or token.dep_ == "pobj"):
        subjectAndObjects.append(token.text)

In [121]:
print(subjectAndObjects)

['fox', 'dog']


In [122]:
from nltk.corpus import wordnet as wn

In [242]:
from nltk.stem import WordNetLemmatizer

In [159]:
import nltk

In [160]:
def getSynonyms(word):
    return wn.synonyms(word)

In [161]:
getSynonyms("car")

[['auto', 'automobile', 'machine', 'motorcar'],
 ['railcar', 'railroad_car', 'railway_car'],
 ['gondola'],
 ['elevator_car'],
 ['cable_car']]

In [162]:
from nltk.corpus import sentiwordnet as swn

In [165]:
from spacy.tokenizer import Tokenizer

In [167]:
from spacy.lang.en import English

In [184]:
import re

In [335]:
# From https://nlpforhackers.io/sentiment-analysis-intro/
def tag_to_sent_tag(tag):


In [320]:
def clean_text(text):
    text = text.replace("<br />", " ")
    return text

In [384]:
def getSentiment(sentence):
    # Get POS-tags with WordNet
    # Lemmatize to get more reliable score with stem of words
    lemmatizer = WordNetLemmatizer()
    clean_text(sentence)
    score = 0.0

    for word in sentence.split():
        lemmatizer.lemmatize(word)
        if not word:
            continue
        # Get POS-tags from the word in sentence
        postag = nltk.pos_tag([word])
        for word,tag in postag:
            # We convert tag to get used in sentiwordnet functions
            good_tag = ""
            if tag.startswith('J'):
                good_tag = wn.ADJ
            elif tag.startswith('R'):
                good_tag = wn.ADV
            elif tag.startswith('N'):
                good_tag = wn.NOUN
            elif tag.startswith('V'):
                good_tag = wn.VERB
            
            if good_tag not in (wn.NOUN, wn.ADJ, wn.ADV):
                continue
            
            # Look up the word using synsets from wordnet with its POS tag
            synsets = wn.synsets(word, pos=good_tag)
            if not synsets:
                    continue
                
            # Use [0] to get most common synset to be used in sentiment
            synset = synsets[0]
            # Get sentiment synset text name
            swn_synset = swn.senti_synset(synset.name())
            
            # We add score from sentiment
            score += swn_synset.pos_score() - swn_synset.neg_score()
         
        
    # We return 1 if positive score in sentiment or 0 if it's negative score
    print("Score: " + str(score))
    if score > 0:
        return 1
    return 0
    

In [385]:
getSentiment('Well, I don’t hate it, but it’s not the greatest!')

Score: -0.875


0

In [312]:
senttoken = nltk.sent_tokenize('Well, I don’t hate it, but it’s not the greatest!')
senttoken

['Well, I don’t hate it, but it’s not the greatest!']

In [324]:
clean_text('Well, I don’t hate it, but it’s  the greatest!')

'Well, I don’t hate it, but it’s not the greatest!'

In [329]:
nltk.pos_tag(["greatest"])

[('greatest', 'JJS')]

In [332]:
help(WordNetLemmatizer())

Help on WordNetLemmatizer in module nltk.stem.wordnet object:

class WordNetLemmatizer(builtins.object)
 |  WordNet Lemmatizer
 |  
 |  Lemmatize using WordNet's built-in morphy function.
 |  Returns the input word unchanged if it cannot be found in WordNet.
 |  
 |      >>> from nltk.stem import WordNetLemmatizer
 |      >>> wnl = WordNetLemmatizer()
 |      >>> print(wnl.lemmatize('dogs'))
 |      dog
 |      >>> print(wnl.lemmatize('churches'))
 |      church
 |      >>> print(wnl.lemmatize('aardwolves'))
 |      aardwolf
 |      >>> print(wnl.lemmatize('abaci'))
 |      abacus
 |      >>> print(wnl.lemmatize('hardrock'))
 |      hardrock
 |  
 |  Methods defined here:
 |  
 |  __repr__(self)
 |      Return repr(self).
 |  
 |  lemmatize(self, word: str, pos: str = 'n') -> str
 |      Lemmatize `word` using WordNet's built-in morphy function.
 |      Returns the input word unchanged if it cannot be found in WordNet.
 |      
 |      :param word: The input word to lemmatize.
 |      