In [16]:
# Turn off unnecessary warnings
import warnings
warnings.filterwarnings("ignore")

# Import all the required packages
import nltk
import codecs
import urllib
import os, spacy
import en_core_web_sm
#from spacy import displacy
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer
from IPython.core.display import display, HTML
from nltk.tokenize import word_tokenize, sent_tokenize 
nltk.download('punkt')
nltk.download('wordnet')
nlp = en_core_web_sm.load()
from nltk.parse import CoreNLPParser
from nltk.parse.corenlp import CoreNLPDependencyParser


# Helper method to append values to duplicate keys in the dictionaries without data loss
def AppendData(mainDictionary, smallDictionary):
    smallKeys = smallDictionary.keys()
    mainKeys = mainDictionary.keys()
    for key in smallKeys:
        if key in mainKeys:
            mainDictionary[key].append(smallDictionary[key])
        else:
            mainDictionary[key] = smallDictionary[key]
    return mainDictionary
            

# Performs Word tokenization on sentences
def Tokenization(sentence):
    tokens = nltk.word_tokenize(sentence)
    return tokens


# Performs Word Lemmatization
def Lemmatization(word_tokens):
    lemmas = []
    for token in word_tokens:
        lemmas.append(wordnet_lemmatizer.lemmatize(token))
    return lemmas


# Performs POS tagging
def POSTagging(word_tokens):
    POStags = nltk.pos_tag(word_tokens)
    return POStags   


# Obtains sentence heads
def getHeads(sentence, word_tokens):
    dependencyParser = CoreNLPDependencyParser(url='http://localhost:9000')
    headList = []
    stripedSen = sentence.strip(" '\"")
    if stripedSen != "":
        depParse = dependencyParser.raw_parse(stripedSen)
        parseTree = list(depParse)[0]
        headWord = ""
        headWord = [k["word"] for k in parseTree.nodes.values() if k["head"] == 0][0]
        
        if headWord != "":
            headList.append([headWord])
        else:
            for i, pp in enumerate(tagged):
                if pp.startswith("VB"):
                    headList.append([word_tokens[i]])
                    break
            if headWord == "":
                for i, pp in enumerate(tagged):
                    if pp.startswith("NN"):
                        headList.append([word_tokens[i]])
                        break
    else:
        headList.append([""])
 
    return headList


# Obtains WordNet Features
def WordNetFeatures(word_tokens):
    # Creates dictionaries for important word senses
    hypernyms_dict = dict()
    hyponyms_dict = dict()
    meronyms_dict = dict()
    holonyms_dict = dict()
    
    # Populates the above dictionaries according to the word senses associated with them
    for token in word_tokens:
        # For each token, we create & populate the considered word senses & add that to their dictionaries
        token_hypernyms = []
        token_hyponyms = []
        token_meronyms = []
        token_holonyms = []
        
        # Obtain Synsets associated with the token
        synsetList = wn.synsets(token)
        synsetLen = len(synsetList)
        
        if synsetLen != 0:
            for each_syn in synsetList:
                # Appends Hypernym Lemmas to the Hypernym list
                for hypernym in each_syn.hypernyms():
                    for lemma in hypernym.lemmas():
                        token_hypernyms.append(lemma.name())
                
                # Appends Hyponym Lemmas to the Hypernym list
                for hyponym in each_syn.hyponyms():
                    for lemma in hyponym.lemmas():
                        token_hyponyms.append(lemma.name())
                
                # Appends Meronym Lemmas to the Hypernym list
                for meronym in each_syn.part_meronyms():
                    for lemma in meronym.lemmas():
                        token_meronyms.append(lemma.name())
                        
                # Appends Holonym Lemmas to the Hypernym list
                for holonym in each_syn.part_holonyms():
                    for lemma in holonym.lemmas():
                        token_holonyms.append(lemma.name())
                        
        hypernyms_dict[token] = token_hypernyms
        hyponyms_dict[token] = token_hyponyms
        meronyms_dict[token] = token_meronyms
        holonyms_dict[token] = token_holonyms
        
    return hypernyms_dict, hyponyms_dict, meronyms_dict, holonyms_dict
   
    
# Performs Dependency Parsing
def DependencyParsing(sentence):
    dependencyParser = CoreNLPDependencyParser(url='http://localhost:9000')
    parse, = dependencyParser.raw_parse(sentence)
    
    # Dependency parsing to parse tree based patters as features
    depParseResult = list(parse.triples())
    
    return depParseResult
    
    """depedencyParse = nlp(sentence)
    for token in depedencyParse:
        print(token.text,"----->",token.dep_,"----->",token.pos_,)
    print('\n\n')
    display(depedencyParse)
    html = displacy.render(depedencyParse, style="dep")
    display(HTML(html))"""
    

# Main method
if __name__ == "__main__":
    # List of all article names in the repository
    articleNames = ["109.txt"] #,"111.txt", "151.txt", "160.txt", "177.txt", 
                    #"179.txt","181.txt", "196.txt", "199.txt", "220.txt", 
                    #"222.txt", "226.txt", "288.txt", "297.txt", "304.txt", 
                    #"342.txt", "347.txt", "360.txt", "390.txt", "400.txt", 
                    #"56.txt", "58.txt", "6.txt"] 
    fileCount = len(articleNames)
    
    content = ""
    folderPath = "https://raw.githubusercontent.com/SaiManasaVedantam/NLP-QA-System-Datasets/main/Articles/"
    for i in range(fileCount):
        fileName = folderPath + articleNames[i]
        response = urllib.request.urlopen(fileName)
        webContents = response.read()
        stringTypeData = webContents.decode("utf-8")
        content += stringTypeData
        count = 0
        corpus_dict = {}
   
    # Use this if you want to use a local file on your machine
    """
    content = None
    try:
        f = open("Articles/6.txt", "r")
        content = f.read()
        
    except UnicodeDecodeError:
        f = open("Articles/6.txt", "r", encoding = "utf8")
        content = f.read()
        
    """
    
    # Obtain wordnet lemmatizer
    wordnet_lemmatizer = WordNetLemmatizer()
    
    # Get tokenized content
    sentences = []
    tokenizer = nltk.data.load("tokenizers/punkt/english.pickle")
    sentences.extend(tokenizer.tokenize(content))
    
    # Sentence count
    print("Total Sentences After splitting the document: ", len(sentences))
    print("Extracting features for each of the sentences and shown below:")
    
    # We maintain all tokens, lemmas etc. in the following lists
    all_word_tokens = []
    all_word_lemmas = []
    all_word_POStags = []
    all_hypernyms = dict()
    all_hyponymns = dict()
    all_meronyms = dict()
    all_holonyms = dict()
    
    
    # Extracting words
    for sen in sentences:
        print("\n------SENTENCE------")
        print(sen)
        
        print("\n----Word Tokenization----")
        word_tokens = Tokenization(sen)
        all_word_tokens += word_tokens
        #print(word_tokens)
        
        print("\n----Word Lemmatization----")
        word_lemmas = Lemmatization(word_tokens)
        all_word_lemmas += word_lemmas
        #print(word_lemmas)
        
        print("\n----POS Tagging----")
        word_POStags = POSTagging(word_tokens)
        all_word_POStags += word_POStags
        #print(word_POStags)
        
        print("\n----WordNet Feature Extraction----")
        hypernyms, hyponyms, meronyms, holonyms = WordNetFeatures(word_tokens)
        all_hypernyms = AppendData(all_hypernyms, hypernyms)
        #all_hypernyms.update(hypernyms)
        #print("===> HYPERNYMS: <===\n", hypernyms, "\n")
        
        all_hyponymns = AppendData(all_hyponymns, hyponyms)
        #print("===> HYPONYMS: <===\n", hyponyms, "\n")
        
        all_meronyms = AppendData(all_meronyms, meronyms)
        #print("===> MERONYMS: <===\n", meronyms, "\n")
        
        all_holonyms = AppendData(all_holonyms, holonyms)
        #print("===> HOLONYMS: <===\n", holonyms)
        
        print("\n----Dependency Parsing----")
        depParse = DependencyParsing(sen)
        print(depParse)
        
        print("\n----Obtaining Heads----")
        headList = getHeads(sen, word_tokens)
        print(headList)
        
        # Process data format to suit the Elastic Search requirements
        count = count + 1
        corpus_dict[count] = {}
        corpus_dict[count]["sentence"] = {}
        corpus_dict[count]["sentence"] = sen
        corpus_dict[count]["tokenized_text"] = {}
        corpus_dict[count]["tokenized_text"] = word_tokens
        corpus_dict[count]["lemma"] = {}
        corpus_dict[count]["lemma"] = word_lemmas
        #corpus_dict[count]["stemmed"] = {}
        #corpus_dict[count]["stemmed"] = stemmed
        corpus_dict[count]["tagged"] = {}
        corpus_dict[count]["tagged"] = word_POStags
        corpus_dict[count]["dependency_parse"] = {}
        corpus_dict[count]["dependency_parse"] = depParse
        #corpus_dict[count]["synonyms"] = {}
        #corpus_dict[count]["synonyms"] = synonyms
        corpus_dict[count]["hypernyms"] = {}
        corpus_dict[count]["hypernyms"] = hypernyms
        corpus_dict[count]["hyponyms"] = {}
        corpus_dict[count]["hyponyms"] = hyponyms
        corpus_dict[count]["meronyms"] = {}
        corpus_dict[count]["meronyms"] = meronyms
        corpus_dict[count]["holonyms"] = {}
        corpus_dict[count]["holonyms"] = holonyms
        #corpus_dict[count]["ner_tag"] = {}
        #corpus_dict[count]["ner_tag"] = str(dict(ner_tag))
        corpus_dict[count]["head_word"] = {}
        corpus_dict[count]["head_word"] = headList[0]
        corpus_dict[count]["file_name"] = {}
        corpus_dict[count]["file_name"] = articleNames[i]

    output_name = 'pipeline/parsed-' + articleNames[i]
    with open(output_name, 'w', encoding='utf8') as output_file:
        json.dump(corpus_dict, output_file,  indent=4, sort_keys=True, separators=(',', ': '), ensure_ascii=False)
          

[nltk_data] Downloading package punkt to /Users/Manasa/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/Manasa/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Total Sentences After splitting the document:  204
Extracting features for each of the sentences and shown below:

------SENTENCE------
Bird migration is the regular seasonal movement, often north and south along a flyway, between breeding and wintering grounds.

----Word Tokenization----

----Word Lemmatization----

----POS Tagging----

----WordNet Feature Extraction----

----Dependency Parsing----
[(('movement', 'NN'), 'nsubj', ('migration', 'NN')), (('migration', 'NN'), 'compound', ('Bird', 'NN')), (('movement', 'NN'), 'cop', ('is', 'VBZ')), (('movement', 'NN'), 'det', ('the', 'DT')), (('movement', 'NN'), 'amod', ('regular', 'JJ')), (('movement', 'NN'), 'amod', ('seasonal', 'JJ')), (('movement', 'NN'), 'punct', (',', ',')), (('movement', 'NN'), 'advmod', ('north', 'RB')), (('north', 'RB'), 'advmod', ('often', 'RB')), (('north', 'RB'), 'conj', ('south', 'RB')), (('south', 'RB'), 'cc', ('and', 'CC')), (('north', 'RB'), 'obl', ('flyway', 'NN')), (('flyway', 'NN'), 'case', ('along', 'IN

[(('species', 'NNS'), 'det', ('Some', 'DT')), (('species', 'NNS'), 'nmod', ('tubenoses', 'NNS')), (('tubenoses', 'NNS'), 'case', ('of', 'IN')), (('tubenoses', 'NNS'), 'punct', ('(', '-LRB-')), (('tubenoses', 'NNS'), 'dep', ('Procellariiformes', 'NNP')), (('tubenoses', 'NNS'), 'punct', (')', '-RRB-')), (('tubenoses', 'NNS'), 'nmod', ('albatrosses', 'NNS')), (('albatrosses', 'NNS'), 'case', ('such', 'JJ')), (('such', 'JJ'), 'fixed', ('as', 'IN')), (('albatrosses', 'NNS'), 'acl', ('circle', 'VBP')), (('circle', 'VBP'), 'obj', ('earth', 'NN')), (('earth', 'NN'), 'det', ('the', 'DT')), (('species', 'NNS'), 'punct', (',', ',')), (('species', 'NNS'), 'acl', ('flying', 'VBG')), (('flying', 'VBG'), 'obl', ('oceans', 'NNS')), (('oceans', 'NNS'), 'case', ('over', 'IN')), (('oceans', 'NNS'), 'det', ('the', 'DT')), (('oceans', 'NNS'), 'amod', ('southern', 'JJ')), (('species', 'NNS'), 'punct', (',', ',')), (('species', 'NNS'), 'dep', ('migrate', 'VBP')), (('migrate', 'VBP'), 'mark', ('while', 'IN'))

----Dependency Parsing----
[(('wrote', 'VBD'), 'nsubj', ('author', 'NN')), (('author', 'NN'), 'det', ('The', 'DT')), (('author', 'NN'), 'nmod', ('Jeremiah', 'NNP')), (('Jeremiah', 'NNP'), 'case', ('of', 'IN')), (('Jeremiah', 'NNP'), 'punct', ('(', '-LRB-')), (('Jeremiah', 'NNP'), 'dep', ('8:7', 'CD')), (('Jeremiah', 'NNP'), 'punct', (')', '-RRB-')), (('wrote', 'VBD'), 'punct', (':', ':')), (('wrote', 'VBD'), 'punct', ('"', '``')), (('wrote', 'VBD'), 'ccomp', ('knows', 'VBZ')), (('knows', 'VBZ'), 'nsubj', ('stork', 'NN')), (('stork', 'NN'), 'advmod', ('Even', 'RB')), (('stork', 'NN'), 'det', ('the', 'DT')), (('stork', 'NN'), 'nmod', ('heavens', 'NNPS')), (('heavens', 'NNPS'), 'case', ('in', 'IN')), (('heavens', 'NNPS'), 'det', ('the', 'DT')), (('knows', 'VBZ'), 'obj', ('seasons', 'NNS')), (('seasons', 'NNS'), 'nmod:poss', ('its', 'PRP$')), (('knows', 'VBZ'), 'punct', (',', ',')), (('knows', 'VBZ'), 'conj', ('keep', 'VB')), (('keep', 'VB'), 'cc', ('and', 'CC')), (('keep', 'VB'), 'nsubj',

[(('mentions', 'VBZ'), 'nsubj', ('History', 'NN')), (('History', 'NN'), 'nmod:poss', ('Bewick', 'NNP')), (('Bewick', 'NNP'), 'compound', ('Thomas', 'NNP')), (('Bewick', 'NNP'), 'case', ("'s", 'POS')), (('History', 'NN'), 'det', ('A', 'DT')), (('History', 'NN'), 'nmod', ('Birds', 'NNS')), (('Birds', 'NNS'), 'case', ('of', 'IN')), (('Birds', 'NNS'), 'amod', ('British', 'JJ')), (('History', 'NN'), 'appos', ('Volume', 'NN')), (('Volume', 'NN'), 'punct', ('(', '-LRB-')), (('Volume', 'NN'), 'nummod', ('1', 'CD')), (('Volume', 'NN'), 'punct', (',', ',')), (('Volume', 'NN'), 'nummod', ('1797', 'CD')), (('Volume', 'NN'), 'punct', (')', '-RRB-')), (('mentions', 'VBZ'), 'obj', ('report', 'NN')), (('report', 'NN'), 'det', ('a', 'DT')), (('report', 'NN'), 'nmod', ('master', 'NN')), (('master', 'NN'), 'case', ('from', 'IN')), (('master', 'NN'), 'punct', ('"', '``')), (('master', 'NN'), 'det', ('a', 'DT')), (('master', 'NN'), 'amod', ('intelligent', 'JJ')), (('intelligent', 'JJ'), 'advmod', ('very', 

[(('said', 'VBN'), 'nsubj:pass', ('birds', 'NNS')), (('birds', 'NNS'), 'amod', ('Non-migratory', 'JJ')), (('said', 'VBN'), 'aux:pass', ('are', 'VBP')), (('said', 'VBN'), 'xcomp', ('resident', 'JJ')), (('resident', 'JJ'), 'mark', ('to', 'TO')), (('resident', 'JJ'), 'cop', ('be', 'VB')), (('resident', 'JJ'), 'conj', ('sedentary', 'JJ')), (('sedentary', 'JJ'), 'cc', ('or', 'CC')), (('said', 'VBN'), 'punct', ('.', '.'))]

----Obtaining Heads----
[['said']]

------SENTENCE------
Approximately 1800 of the world's 10,000 bird species are long-distance migrants.

----Word Tokenization----

----Word Lemmatization----

----POS Tagging----

----WordNet Feature Extraction----

----Dependency Parsing----
[(('migrants', 'NNS'), 'nsubj', ('1800', 'CD')), (('1800', 'CD'), 'advmod', ('Approximately', 'RB')), (('1800', 'CD'), 'nmod', ('species', 'NNS')), (('species', 'NNS'), 'case', ('of', 'IN')), (('species', 'NNS'), 'nmod:poss', ('world', 'NN')), (('world', 'NN'), 'det', ('the', 'DT')), (('world', 'NN

[['helps']]

------SENTENCE------
As the days shorten in autumn, the birds return to warmer regions where the available food supply varies little with the season.

----Word Tokenization----

----Word Lemmatization----

----POS Tagging----

----WordNet Feature Extraction----

----Dependency Parsing----
[(('return', 'VBP'), 'advcl', ('shorten', 'VB')), (('shorten', 'VB'), 'mark', ('As', 'IN')), (('shorten', 'VB'), 'nsubj', ('days', 'NNS')), (('days', 'NNS'), 'det', ('the', 'DT')), (('shorten', 'VB'), 'obl', ('autumn', 'NN')), (('autumn', 'NN'), 'case', ('in', 'IN')), (('return', 'VBP'), 'punct', (',', ',')), (('return', 'VBP'), 'nsubj', ('birds', 'NNS')), (('birds', 'NNS'), 'det', ('the', 'DT')), (('return', 'VBP'), 'obl', ('regions', 'NNS')), (('regions', 'NNS'), 'case', ('to', 'IN')), (('regions', 'NNS'), 'amod', ('warmer', 'JJR')), (('regions', 'NNS'), 'acl:relcl', ('varies', 'VBZ')), (('varies', 'VBZ'), 'advmod', ('where', 'WRB')), (('varies', 'VBZ'), 'nsubj', ('supply', 'NN')), (('s

[['common']]

------SENTENCE------
In some species, the population at higher latitudes tends to be migratory and will often winter at lower latitude.

----Word Tokenization----

----Word Lemmatization----

----POS Tagging----

----WordNet Feature Extraction----

----Dependency Parsing----
[(('tends', 'VBZ'), 'obl', ('species', 'NNS')), (('species', 'NNS'), 'case', ('In', 'IN')), (('species', 'NNS'), 'det', ('some', 'DT')), (('tends', 'VBZ'), 'punct', (',', ',')), (('tends', 'VBZ'), 'nsubj', ('population', 'NN')), (('population', 'NN'), 'det', ('the', 'DT')), (('population', 'NN'), 'nmod', ('latitudes', 'NNS')), (('latitudes', 'NNS'), 'case', ('at', 'IN')), (('latitudes', 'NNS'), 'amod', ('higher', 'JJR')), (('tends', 'VBZ'), 'xcomp', ('migratory', 'JJ')), (('migratory', 'JJ'), 'mark', ('to', 'TO')), (('migratory', 'JJ'), 'cop', ('be', 'VB')), (('tends', 'VBZ'), 'conj', ('winter', 'NN')), (('winter', 'NN'), 'cc', ('and', 'CC')), (('winter', 'NN'), 'aux', ('will', 'MD')), (('winter', 'NN

[['migration']]

------SENTENCE------
Many, if not most, birds migrate in flocks.

----Word Tokenization----

----Word Lemmatization----

----POS Tagging----

----WordNet Feature Extraction----

----Dependency Parsing----
[(('migrate', 'VBP'), 'nsubj', ('Many', 'JJ')), (('migrate', 'VBP'), 'punct', (',', ',')), (('migrate', 'VBP'), 'advcl', ('not', 'RB')), (('not', 'RB'), 'mark', ('if', 'IN')), (('not', 'RB'), 'advmod', ('most', 'RBS')), (('migrate', 'VBP'), 'punct', (',', ',')), (('migrate', 'VBP'), 'nsubj', ('birds', 'NNS')), (('migrate', 'VBP'), 'obl', ('flocks', 'NNS')), (('flocks', 'NNS'), 'case', ('in', 'IN')), (('migrate', 'VBP'), 'punct', ('.', '.'))]

----Obtaining Heads----
[['migrate']]

------SENTENCE------
For larger birds, flying in flocks reduces the energy cost.

----Word Tokenization----

----Word Lemmatization----

----POS Tagging----

----WordNet Feature Extraction----

----Dependency Parsing----
[(('reduces', 'VBZ'), 'obl', ('birds', 'NNS')), (('birds', 'NNS'), 'cas

[['limited']]

------SENTENCE------
Most species of penguin (Spheniscidae) migrate by swimming.

----Word Tokenization----

----Word Lemmatization----

----POS Tagging----

----WordNet Feature Extraction----

----Dependency Parsing----
[(('migrate', 'VBP'), 'nsubj', ('species', 'NNS')), (('species', 'NNS'), 'amod', ('Most', 'JJS')), (('species', 'NNS'), 'nmod', ('penguin', 'NN')), (('penguin', 'NN'), 'case', ('of', 'IN')), (('penguin', 'NN'), 'punct', ('(', '-LRB-')), (('penguin', 'NN'), 'dep', ('Spheniscidae', 'NNP')), (('penguin', 'NN'), 'punct', (')', '-RRB-')), (('migrate', 'VBP'), 'obl', ('swimming', 'NN')), (('swimming', 'NN'), 'case', ('by', 'IN')), (('migrate', 'VBP'), 'punct', ('.', '.'))]

----Obtaining Heads----
[['migrate']]

------SENTENCE------
These routes can cover over 1,000 km (620 mi).

----Word Tokenization----

----Word Lemmatization----

----POS Tagging----

----WordNet Feature Extraction----

----Dependency Parsing----
[(('cover', 'VB'), 'nsubj', ('routes', 'NNS'

[['consist']]

------SENTENCE------
The same considerations about barriers and detours that apply to long-distance land-bird migration apply to water birds, but in reverse: a large area of land without bodies of water that offer feeding sites may also be a barrier to a bird that feeds in coastal waters.

----Word Tokenization----

----Word Lemmatization----

----POS Tagging----

----WordNet Feature Extraction----

----Dependency Parsing----
[(('barrier', 'NN'), 'nsubj', ('considerations', 'NNS')), (('considerations', 'NNS'), 'det', ('The', 'DT')), (('considerations', 'NNS'), 'amod', ('same', 'JJ')), (('considerations', 'NNS'), 'nmod', ('barriers', 'NNS')), (('barriers', 'NNS'), 'case', ('about', 'IN')), (('barriers', 'NNS'), 'conj', ('detours', 'NNS')), (('detours', 'NNS'), 'cc', ('and', 'CC')), (('considerations', 'NNS'), 'acl:relcl', ('apply', 'VBP')), (('apply', 'VBP'), 'nsubj', ('that', 'WDT')), (('apply', 'VBP'), 'obl', ('migration', 'NN')), (('migration', 'NN'), 'case', ('to', 'I

[['stored']]

------SENTENCE------
Seabird migration is similar in pattern to those of the waders and waterfowl.

----Word Tokenization----

----Word Lemmatization----

----POS Tagging----

----WordNet Feature Extraction----

----Dependency Parsing----
[(('similar', 'JJ'), 'nsubj', ('migration', 'NN')), (('migration', 'NN'), 'compound', ('Seabird', 'NNP')), (('similar', 'JJ'), 'cop', ('is', 'VBZ')), (('similar', 'JJ'), 'obl', ('pattern', 'NN')), (('pattern', 'NN'), 'case', ('in', 'IN')), (('pattern', 'NN'), 'nmod', ('those', 'DT')), (('those', 'DT'), 'case', ('to', 'IN')), (('those', 'DT'), 'nmod', ('waders', 'NNS')), (('waders', 'NNS'), 'case', ('of', 'IN')), (('waders', 'NNS'), 'det', ('the', 'DT')), (('waders', 'NNS'), 'conj', ('waterfowl', 'NN')), (('waterfowl', 'NN'), 'cc', ('and', 'CC')), (('similar', 'JJ'), 'punct', ('.', '.'))]

----Obtaining Heads----
[['similar']]

------SENTENCE------
Some, such as the black guillemot Cepphus grylle and some gulls, are quite sedentary; other

[['wanderers']]

------SENTENCE------
The tubenoses spread widely over large areas of open ocean, but congregate when food becomes available.

----Word Tokenization----

----Word Lemmatization----

----POS Tagging----

----WordNet Feature Extraction----

----Dependency Parsing----
[(('spread', 'VBP'), 'nsubj', ('tubenoses', 'NNS')), (('tubenoses', 'NNS'), 'det', ('The', 'DT')), (('spread', 'VBP'), 'advmod', ('widely', 'RB')), (('spread', 'VBP'), 'obj', ('areas', 'NNS')), (('areas', 'NNS'), 'advmod', ('over', 'RB')), (('areas', 'NNS'), 'amod', ('large', 'JJ')), (('areas', 'NNS'), 'nmod', ('ocean', 'NN')), (('ocean', 'NN'), 'case', ('of', 'IN')), (('ocean', 'NN'), 'amod', ('open', 'JJ')), (('spread', 'VBP'), 'punct', (',', ',')), (('spread', 'VBP'), 'conj', ('congregate', 'VBP')), (('congregate', 'VBP'), 'cc', ('but', 'CC')), (('congregate', 'VBP'), 'advcl', ('becomes', 'VBZ')), (('becomes', 'VBZ'), 'advmod', ('when', 'WRB')), (('becomes', 'VBZ'), 'nsubj', ('food', 'NN')), (('becomes', '

[(('counted', 'VBN'), 'nsubj:pass', ('species', 'NNS')), (('species', 'NNS'), 'amod', ('common', 'JJ')), (('common', 'JJ'), 'advmod', ('More', 'RBR')), (('species', 'NNS'), 'punct', (',', ',')), (('species', 'NNS'), 'nmod', ('buzzard', 'NN')), (('buzzard', 'NN'), 'case', ('such', 'JJ')), (('such', 'JJ'), 'fixed', ('as', 'IN')), (('buzzard', 'NN'), 'det', ('the', 'DT')), (('buzzard', 'NN'), 'amod', ('European', 'JJ')), (('buzzard', 'NN'), 'compound', ('honey', 'NN')), (('buzzard', 'NN'), 'dep', ('apivorus', 'NNP')), (('apivorus', 'NNP'), 'compound', ('Pernis', 'NNP')), (('counted', 'VBN'), 'punct', (',', ',')), (('counted', 'VBN'), 'aux', ('can', 'MD')), (('counted', 'VBN'), 'aux:pass', ('be', 'VB')), (('counted', 'VBN'), 'obl', ('hundreds', 'NNS')), (('hundreds', 'NNS'), 'case', ('in', 'IN')), (('hundreds', 'NNS'), 'nmod', ('thousands', 'NNS')), (('thousands', 'NNS'), 'case', ('of', 'IN')), (('counted', 'VBN'), 'obl', ('autumn', 'NN')), (('autumn', 'NN'), 'case', ('in', 'IN')), (('coun

[(('migrate', 'VBP'), 'nsubj', ('Many', 'JJ')), (('Many', 'JJ'), 'nmod', ('birds', 'NNS')), (('birds', 'NNS'), 'case', ('of', 'IN')), (('birds', 'NNS'), 'det', ('the', 'DT')), (('birds', 'NNS'), 'amod', ('smaller', 'JJR')), (('birds', 'NNS'), 'amod', ('insectivorous', 'JJ')), (('birds', 'NNS'), 'nmod', ('warblers', 'NNS')), (('warblers', 'NNS'), 'case', ('including', 'VBG')), (('warblers', 'NNS'), 'det', ('the', 'DT')), (('warblers', 'NNS'), 'punct', (',', ',')), (('warblers', 'NNS'), 'conj', ('hummingbirds', 'NNS')), (('warblers', 'NNS'), 'conj', ('flycatchers', 'NNS')), (('flycatchers', 'NNS'), 'cc', ('and', 'CC')), (('migrate', 'VBP'), 'obj', ('distances', 'NNS')), (('distances', 'NNS'), 'amod', ('large', 'JJ')), (('distances', 'NNS'), 'punct', (',', ',')), (('distances', 'NNS'), 'nmod', ('night', 'NN')), (('night', 'NN'), 'advmod', ('usually', 'RB')), (('night', 'NN'), 'case', ('at', 'IN')), (('migrate', 'VBP'), 'punct', ('.', '.'))]

----Obtaining Heads----
[['migrate']]

------SE

[(('move', 'VB'), 'advmod', ('Thus', 'RB')), (('move', 'VB'), 'nsubj', ('breeders', 'NNS')), (('breeders', 'NNS'), 'compound', ('mountain', 'NN')), (('mountain', 'NN'), 'conj', ('moorland', 'NN')), (('moorland', 'NN'), 'cc', ('and', 'CC')), (('breeders', 'NNS'), 'punct', (',', ',')), (('breeders', 'NNS'), 'nmod', ('muraria', 'NN')), (('muraria', 'NN'), 'case', ('such', 'JJ')), (('such', 'JJ'), 'fixed', ('as', 'IN')), (('muraria', 'NN'), 'compound', ('wallcreeper', 'NN')), (('muraria', 'NN'), 'compound', ('Tichodroma', 'NNP')), (('muraria', 'NN'), 'conj', ('cinclus', 'NNP')), (('cinclus', 'NNP'), 'cc', ('and', 'CC')), (('cinclus', 'NNP'), 'compound', ('dipper', 'NN')), (('dipper', 'NN'), 'amod', ('throated', 'JJ')), (('throated', 'JJ'), 'amod', ('white', 'JJ')), (('throated', 'JJ'), 'punct', ('-', 'HYPH')), (('cinclus', 'NNP'), 'compound', ('Cinclus', 'NNP')), (('move', 'VB'), 'punct', (',', ',')), (('move', 'VB'), 'aux', ('may', 'MD')), (('move', 'VB'), 'xcomp', ('escape', 'VB')), (('e

[['is']]

------SENTENCE------
Sometimes circumstances such as a good breeding season followed by a food source failure the following year lead to irruptions in which large numbers of a species move far beyond the normal range.

----Word Tokenization----

----Word Lemmatization----

----POS Tagging----

----WordNet Feature Extraction----

----Dependency Parsing----
[(('followed', 'VBN'), 'advmod', ('Sometimes', 'RB')), (('followed', 'VBN'), 'nsubj', ('circumstances', 'NNS')), (('circumstances', 'NNS'), 'nmod', ('season', 'NN')), (('season', 'NN'), 'case', ('such', 'JJ')), (('such', 'JJ'), 'fixed', ('as', 'IN')), (('season', 'NN'), 'det', ('a', 'DT')), (('season', 'NN'), 'amod', ('good', 'JJ')), (('season', 'NN'), 'compound', ('breeding', 'NN')), (('followed', 'VBN'), 'obl', ('failure', 'NN')), (('failure', 'NN'), 'case', ('by', 'IN')), (('failure', 'NN'), 'det', ('a', 'DT')), (('failure', 'NN'), 'compound', ('source', 'NN')), (('source', 'NN'), 'compound', ('food', 'NN')), (('failure',

[(('is', 'VBZ'), 'nsubj', ('This', 'DT')), (('is', 'VBZ'), 'advcl', ('is', 'VBZ')), (('is', 'VBZ'), 'mark', ('because', 'IN')), (('is', 'VBZ'), 'expl', ('there', 'EX')), (('is', 'VBZ'), 'nsubj', ('area', 'NN')), (('area', 'NN'), 'det', ('a', 'DT')), (('area', 'NN'), 'amod', ('large', 'JJ')), (('area', 'NN'), 'nmod', ('ocean', 'NN')), (('ocean', 'NN'), 'case', ('of', 'IN')), (('is', 'VBZ'), 'obl', ('Hemisphere', 'NNP')), (('Hemisphere', 'NNP'), 'case', ('in', 'IN')), (('Hemisphere', 'NNP'), 'det', ('the', 'DT')), (('Hemisphere', 'NNP'), 'compound', ('Southern', 'NNP')), (('is', 'VBZ'), 'punct', (',', ',')), (('is', 'VBZ'), 'dep', ('and', 'CC')), (('and', 'CC'), 'dep', ('islands', 'NNS')), (('islands', 'NNS'), 'amod', ('more', 'JJR')), (('islands', 'NNS'), 'amod', ('suitable', 'JJ')), (('suitable', 'JJ'), 'obl', ('seabirds', 'NNS')), (('seabirds', 'NNS'), 'case', ('for', 'IN')), (('seabirds', 'NNS'), 'nmod', ('nest', 'NN')), (('nest', 'NN'), 'case', ('to', 'IN')), (('is', 'VBZ'), 'punct'

[(('occurrence', 'NN'), 'det', ('The', 'DT')), (('occurrence', 'NN'), 'nmod', ('Zugunruhe', 'NNP')), (('Zugunruhe', 'NNP'), 'case', ('of', 'IN')), (('occurrence', 'NN'), 'nmod', ('birds', 'NNS')), (('birds', 'NNS'), 'advmod', ('even', 'RB')), (('birds', 'NNS'), 'case', ('in', 'IN')), (('birds', 'NNS'), 'amod', ('raised', 'VBN')), (('raised', 'VBN'), 'obl', ('cage', 'NN')), (('raised', 'VBN'), 'punct', ('-', 'HYPH')), (('birds', 'NNS'), 'nmod', ('cues', 'NNS')), (('cues', 'NNS'), 'case', ('with', 'IN')), (('cues', 'NNS'), 'det', ('no', 'DT')), (('cues', 'NNS'), 'amod', ('environmental', 'JJ')), (('cues', 'NNS'), 'punct', ('(', '-LRB-')), (('cues', 'NNS'), 'dep', ('e.g.', 'FW'))]

----Obtaining Heads----
[['occurrence']]

------SENTENCE------
shortening of day and falling temperature) has pointed to the role of circannual endogenous programs in controlling bird migrations.

----Word Tokenization----

----Word Lemmatization----

----POS Tagging----

----WordNet Feature Extraction----

---

[['explained']]

------SENTENCE------
The ability to successfully perform long-distance migrations can probably only be fully explained with an accounting for the cognitive ability of the birds to recognize habitats and form mental maps.

----Word Tokenization----

----Word Lemmatization----

----POS Tagging----

----WordNet Feature Extraction----

----Dependency Parsing----
[(('explained', 'VBN'), 'csubj:pass', ('perform', 'VB')), (('perform', 'VB'), 'nsubj', ('ability', 'NN')), (('ability', 'NN'), 'det', ('The', 'DT')), (('perform', 'VB'), 'mark', ('to', 'TO')), (('perform', 'VB'), 'advmod', ('successfully', 'RB')), (('perform', 'VB'), 'obj', ('migrations', 'NNS')), (('migrations', 'NNS'), 'compound', ('distance', 'NN')), (('distance', 'NN'), 'amod', ('long', 'JJ')), (('distance', 'NN'), 'punct', ('-', 'HYPH')), (('explained', 'VBN'), 'aux', ('can', 'MD')), (('explained', 'VBN'), 'advmod', ('probably', 'RB')), (('explained', 'VBN'), 'advmod', ('only', 'RB')), (('explained', 'VBN'), '

[['let']]

------SENTENCE------
There is a neural connection between the eye and "Cluster N", the part of the forebrain that is active during migrational orientation, suggesting that birds may actually be able to see the magnetic field of the earth.

----Word Tokenization----

----Word Lemmatization----

----POS Tagging----

----WordNet Feature Extraction----

----Dependency Parsing----
[(('is', 'VBZ'), 'expl', ('There', 'EX')), (('is', 'VBZ'), 'nsubj', ('connection', 'NN')), (('connection', 'NN'), 'det', ('a', 'DT')), (('connection', 'NN'), 'amod', ('neural', 'JJ')), (('connection', 'NN'), 'nmod', ('eye', 'NN')), (('eye', 'NN'), 'case', ('between', 'IN')), (('eye', 'NN'), 'det', ('the', 'DT')), (('eye', 'NN'), 'conj', ('N', 'NN')), (('N', 'NN'), 'cc', ('and', 'CC')), (('N', 'NN'), 'punct', ('"', '``')), (('N', 'NN'), 'compound', ('Cluster', 'NN')), (('N', 'NN'), 'punct', ('"', "''")), (('is', 'VBZ'), 'punct', (',', ',')), (('is', 'VBZ'), 'nsubj', ('part', 'NN')), (('part', 'NN'), 'det

[['possible']]

------SENTENCE------
After a trial with Canada geese Branta canadensis, microlight aircraft were used in the US to teach safe migration routes to reintroduced whooping cranes Grus americana.

----Word Tokenization----

----Word Lemmatization----

----POS Tagging----

----WordNet Feature Extraction----

----Dependency Parsing----
[(('used', 'VBN'), 'obl', ('trial', 'NN')), (('trial', 'NN'), 'case', ('After', 'IN')), (('trial', 'NN'), 'det', ('a', 'DT')), (('trial', 'NN'), 'nmod', ('canadensis', 'NN')), (('canadensis', 'NN'), 'case', ('with', 'IN')), (('canadensis', 'NN'), 'compound', ('geese', 'NNS')), (('geese', 'NNS'), 'compound', ('Canada', 'NNP')), (('canadensis', 'NN'), 'compound', ('Branta', 'NNP')), (('used', 'VBN'), 'punct', (',', ',')), (('used', 'VBN'), 'nsubj:pass', ('aircraft', 'NN')), (('aircraft', 'NN'), 'compound', ('microlight', 'NN')), (('used', 'VBN'), 'aux:pass', ('were', 'VBD')), (('used', 'VBN'), 'obl', ('US', 'NNP')), (('US', 'NNP'), 'case', ('in', 

[(('explains', 'VBZ'), 'nsubj', ('This', 'DT')), (('explains', 'VBZ'), 'obj', ('appearance', 'NN')), (('appearance', 'NN'), 'det', ('the', 'DT')), (('appearance', 'NN'), 'amod', ('rapid', 'JJ')), (('appearance', 'NN'), 'nmod', ('behavior', 'NN')), (('behavior', 'NN'), 'case', ('of', 'IN')), (('behavior', 'NN'), 'amod', ('migratory', 'JJ')), (('appearance', 'NN'), 'nmod', ('maximum', 'NN')), (('maximum', 'NN'), 'case', ('after', 'IN')), (('maximum', 'NN'), 'det', ('the', 'DT')), (('maximum', 'NN'), 'amod', ('recent', 'JJ')), (('recent', 'JJ'), 'advmod', ('most', 'RBS')), (('maximum', 'NN'), 'amod', ('glacial', 'JJ')), (('explains', 'VBZ'), 'punct', ('.', '.'))]

----Obtaining Heads----
[['explains']]

------SENTENCE------
Theoretical analyses show that detours that increase flight distance by up to 20% will often be adaptive on aerodynamic grounds - a bird that loads itself with food to cross a long barrier flies less efficiently.

----Word Tokenization----

----Word Lemmatization----



[['spread']]

------SENTENCE------
Birds may also have a role in the dispersal of propagules of plants and plankton.

----Word Tokenization----

----Word Lemmatization----

----POS Tagging----

----WordNet Feature Extraction----

----Dependency Parsing----
[(('have', 'VB'), 'nsubj', ('Birds', 'NNS')), (('have', 'VB'), 'aux', ('may', 'MD')), (('have', 'VB'), 'advmod', ('also', 'RB')), (('have', 'VB'), 'obj', ('role', 'NN')), (('role', 'NN'), 'det', ('a', 'DT')), (('role', 'NN'), 'nmod', ('dispersal', 'NN')), (('dispersal', 'NN'), 'case', ('in', 'IN')), (('dispersal', 'NN'), 'det', ('the', 'DT')), (('dispersal', 'NN'), 'nmod', ('propagules', 'NNS')), (('propagules', 'NNS'), 'case', ('of', 'IN')), (('propagules', 'NNS'), 'nmod', ('plants', 'NNS')), (('plants', 'NNS'), 'case', ('of', 'IN')), (('plants', 'NNS'), 'conj', ('plankton', 'NN')), (('plankton', 'NN'), 'cc', ('and', 'CC')), (('have', 'VB'), 'punct', ('.', '.'))]

----Obtaining Heads----
[['have']]

------SENTENCE------
Some predato

[['include']]

------SENTENCE------
For example, in the East Asian–Australasian Flyway, up to 65% of key intertidal habitat at the Yellow Sea migration bottleneck has been destroyed since the 1950s.

----Word Tokenization----

----Word Lemmatization----

----POS Tagging----

----WordNet Feature Extraction----

----Dependency Parsing----
[(('destroyed', 'VBN'), 'obl', ('example', 'NN')), (('example', 'NN'), 'case', ('For', 'IN')), (('destroyed', 'VBN'), 'punct', (',', ',')), (('destroyed', 'VBN'), 'obl', ('Asian', 'NNP')), (('Asian', 'NNP'), 'case', ('in', 'IN')), (('Asian', 'NNP'), 'det', ('the', 'DT')), (('Asian', 'NNP'), 'compound', ('East', 'NNP')), (('Asian', 'NNP'), 'dep', ('–', 'SYM')), (('destroyed', 'VBN'), 'nsubj:pass', ('Flyway', 'NNP')), (('Flyway', 'NNP'), 'compound', ('Australasian', 'NNP')), (('Flyway', 'NNP'), 'punct', (',', ',')), (('Flyway', 'NNP'), 'appos', ('%', 'NN')), (('%', 'NN'), 'nummod', ('65', 'CD')), (('65', 'CD'), 'advmod', ('up', 'RB')), (('65', 'CD'), 'adv

FileNotFoundError: [Errno 2] No such file or directory: 'pipeline/parsed-109.txt'