# AnTeDe Lab 12a: Using Syntax Parsers 

## Session goal
The goal of this session is to help you familiarize with syntax parsing.

The following cell will start your local CoreNLP server.

In [None]:
import os

def unzip_corenlp(zipped):    
    import zipfile
    with zipfile.ZipFile(zipped,"r") as zip_ref:
        zip_ref.extractall(".") 
        
home = os.getcwd()      

In [None]:
import requests

my_port = 9001

base_url = 'http://nlp.stanford.edu/software/'
corenlp_filename = 'stanford-corenlp-4.0.0'
corenlp_file = corenlp_filename + '.zip'
corenlp_url = base_url + corenlp_file

def download_from_url(url, save_path, chunk_size=128):
    r = requests.get(url, stream=True)
    with open(save_path, 'wb') as fd:
        for chunk in r.iter_content(chunk_size=chunk_size):
            fd.write(chunk)

if corenlp_filename not in os.listdir(home):
    download_from_url(corenlp_url, os.path.join(home, corenlp_file))
else:
    print ('CoreNLP seems to be there!')
    
path_to_file = os.path.join(os.getcwd(), corenlp_filename+'.'+'zip')
unzip_corenlp(path_to_file)

In [None]:
def start_corenlp_server():
    path=os.path.join(home, corenlp_filename)
    os.chdir(path)
    command = r'java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -annotators "tokenize,ssplit,pos,lemma,parse,sentiment" -port '+str(my_port)+' -timeout 30000'
    os.system (command+" &")
    os.chdir(home) 
    
start_corenlp_server()

The following cell will enable you to use the CoreNLP tokenizer and PoS tagger.

In [None]:
! pip install stanfordcorenlp
from stanfordcorenlp import StanfordCoreNLP
from nltk.tree import *
import logging

class StanfordNLP:
    
    def __init__(self, host='http://localhost', port=my_port):
        self.nlp = StanfordCoreNLP(host, 
                                   port=port,
                                   timeout=30,
                                   quiet=False, 
                                   logging_level=logging.DEBUG)
        self.props = {
            #'annotators': 'tokenize,ssplit,pos,lemma,ner,parse,depparse,dcoref,relation',
            'annotators': 'tokenize,ssplit,pos,lemma,ner,parse,depparse',
            'pipelineLanguage': 'en',
            'outputFormat': 'json'
        }

    def word_tokenize(self, sentence):
        return self.nlp.word_tokenize(sentence)

    def pos(self, sentence):
        return self.nlp.pos_tag(sentence)
    
    def parse(self, sentence):
        p = self.nlp.parse(sentence)
        Tree.fromstring(p).pretty_print()
        
        return p

    def dependency_parse(self, sentence):
        p = self.nlp.dependency_parse(sentence)
        print (p)
        
        return p

In [None]:
s = StanfordNLP()
sentence = 'Fed raises interest rates.'
print(s.parse(sentence))

In [None]:
sentence = 'The president eats her sandwich at her desk.'
r = s.parse(sentence)

In [None]:
sentence = 'I see a very big dog on the pile of clothes that you left next to the washing machine.'
r = s.parse(sentence)

In [None]:
# This sentence was contributed by course participant Pius von Däniken during the live lecture on May 7, 2020
# The parser has a really hard time with the word man used as a verb
sentence='The old man the boats.'
r=s.parse(sentence)

In [None]:
# The parser has an easier time if we change 'man' to 'are manning'
sentence = 'The old are manning the boats.'
r = s.parse(sentence)

In [None]:
from nltk.parse.corenlp import CoreNLPDependencyParser
my_port = 9001
url = 'http://localhost:' + str(my_port)
parser = CoreNLPDependencyParser(url=url)
parse = next(parser.raw_parse(sentence))
parse

In [None]:
sentence = 'San Jose cops kill man with knife.'
parse = next(parser.raw_parse(sentence))
parse