In [27]:
## Basic description (in progress)

import nltk
from nltk import FreqDist
from nltk.corpus import PlaintextCorpusReader
corpus_root = './'


wordlists = PlaintextCorpusReader(corpus_root, '.*') 
wordlists.fileids()
Narnia = wordlists.words('Narnia')
print(Narnia)

fdist1 = FreqDist(Narnia)
#print(fdist1.most_common(10))
print("most occcuring non-trivial words:")
print()
print(list(set([word for word in Narnia if len(word)>3 and fdist1[word] > 5])))



['There', 'were', 'four', 'children', 'whose', 'names', ...]
most occcuring non-trivial words:

['said', 'There', 'that', 'door', 'with', 'will', 'very', 'Edmund', 'room', 'Lucy', 'house', 'were', 'they']


In [28]:
## Function to check if sentence can be parsed with parser of choice

from nltk import CFG
from nltk.grammar import FeatureGrammar
from nltk.parse import RecursiveDescentParser, FeatureEarleyChartParser, ShiftReduceParser
from nltk.parse.generate import generate

# Function that works for multiple types of parsers (You are free to use something else if you want.)
def check_sentence(parser, sentence):
    print("--------------------------------------------------")
    print("Checking if provided sentence matches the grammar:")
    print(sentence)
    if isinstance(sentence, str):
        sentence = sentence.split()
    tree_found = False
    results = parser.parse(sentence)
    for tree in results:
        tree_found = True
        # print(tree)
    if not tree_found:
        print(sentence, "Does not match the provided grammar.")
    print("--------------------------------------------------")
    return tree_found

In [29]:
## The actual context-free grammar 

cfg = FeatureGrammar.fromstring("""
    
    # sentences
    S -> NP VP
    S -> VP
    S -> ADVP VP
    S -> S WHNP
    S -> S Comma CC S
    S -> S CC S
     
    SBAR -> WHNP S 
     
    # constituents
    NP -> Det N
    NP -> NNP
    NP -> Pos N
    NP -> ADJP N
    NP -> Det ADJP N
    NP -> ADJP NP
    NP -> NP Comma NP
    NP -> Pro
    NP -> NP SBAR
    NP -> NN SBAR
    NP -> NP PP
    NP -> NP CC NP
    NP -> Adjs
    
    VP -> V NP
    VP -> V ADVP PP
    VP -> V ADJP PP
    VP -> V Adv VP
    VP -> V NP PP
    VP -> V NP ADVP
    VP -> V PP
    VP -> V PP ADVP
    VP -> V VP
    
    PP -> TO NP
    PP -> IN NP
    PP -> IN IN NP
    PP -> Prep NP
    PP -> PP CC PP
    PP -> PP PP
    
    ADVP -> Adv
    ADVP -> Adv Adv
    

    ADJP -> Adj N
    ADJP -> Adj Pro
    ADJP -> IterAdj
    ADJP -> Adv Adj
    ADJP -> Adj
    ADJP -> Adj Adj
    
    WHADVP -> WhAdv
    
    WHNP -> Pos N VP
    WHNP -> Det
    WHNP -> WDT
    
    
    # lexicon
    Det ->  'the' | 'an' | 'this' | 'that' | 'no' | 'a'
    V -> 'were' | 'sent' | 'is' | 'happened' | 'lived' | 'had' | 'come' \
         | 'called' | 'was' | 'do' | 'grew' | 'liked' | 'came' \
         | 'meet' | 'wanted' | 'laugh' | 'keep' | 'pretending' \
         | 'blowing' | 'hide' | 'said' | 'gone' | 'talked' \
         | 'fallen' | 'will' | 'let' | 'like' | 'thought' \
         | 'pretending' | 'go' | 'talking' | 'trying' | 'talk' \
         | 'am' | 'heard' | 'would' | 'told' | 'going' | 'mind' \
         | 'did' | 'hear' | 'be' | 'made' | 'are' | 'say' \
         | 'have'
    Adj -> 'four' | 'old' | 'ten' | 'nearest' | 'two' | 'large' \
            | 'three' | 'mrs' | 'shaggy' | 'white' | 'first' \
            | 'front' | 'oddlooking' | 'little' | 'afraid' \
            | 'good' | 'splendid' | 'badtempered' | 'like' \
            | 'any' | 'down' | 'tempered'
    Adv -> 'away' | 'there' | 'very' | 'not' |  'much' | 'almost' \
           | 'immediately' | 'anything' | 'tired' | 'always' \
           | 'better' | 'so' | 'upstairs' | 'perfectly' | 'all' \
           | 'what' | 'anyway' | 'bad'
    IN -> 'from' | 'during' | 'because' | 'of' | 'in' | 'with' | 'into' | 'over' \
           | 'on' | 'at' | 'between'
    TO -> 'to'
    N -> 'children' | 'names' | 'war' | 'airraids' | 'house' | 'professor' \
          | 'story' | 'heart' | 'country' | 'postoffice' | 'railwaystation' \
          | 'miles' | 'wife' | 'housekeeper' | 'servants' | 'man' | 'face' \
          | 'hair' | 'head' | 'evening' | 'door' | 'nose' | 'night' | 'boys' \
          | 'room' | 'feet' | 'chap' | 'dear' | 'bed' | 'row' \
          | 'walk' |  'amount' | 'stairs' | 'passages' 'diningroom' \
          | 'girls' | 'minutes'
    NNP -> 'london' | 'time' | 'mother' | 'here' | 
    NN -> 'something'
    Pos -> 'whose' | 'their' | 'his'
    Pro -> 'peter' | 'susan' | 'edmund' | 'lucy' | 'they' | 'them' | 'he' \
           | 'ivy' | 'margaret' | 'betty' | 'macready' | 'him' \
           | 'it' | 'that' | 'what' | 'who' | 'yourself' | 'nobody' \
           | 'there' | 'we' | 'she' | 'you' | 'i'
    CC -> 'and' | 'but' | 'if' | 'where' | 'us'
    Comma -> ','
    Prep -> 'about' | 'over' | 'off' | 'on'
    WDT -> 'which'
    Adjs -> 'most'
    WhAdv -> 'when'
    PR -> 'out'
    

""")

In [20]:
## generating some senteces in non-random fashion

import random

generated_sentences = []

for sentence in generate(cfg, depth=4):
    generated_sentences.append(' '.join(sentence))

for i in range(10):
    print(random.choice(generated_sentences))

KeyboardInterrupt: 

In [30]:
## check if sentences can be parsed

cfg_parser = FeatureEarleyChartParser(cfg)
#check_sentence(cfg_parser, 'There were four children whose names were Peter , edmund , lucy and susan'.lower())
#check_sentence(cfg_parser, 'this story is about something that happened to them'.lower())
#check_sentence(cfg_parser, 'they were sent away from London during the war because of the air-raids'.lower())
#check_sentence(cfg_parser, 'They were sent to the house of an old Professor'.lower())
#check_sentence(cfg_parser, 'The Professor lived in the heart of the country'.lower())
#check_sentence(cfg_parser, 'he lived ten miles from the nearest railway station and he lived two miles from the nearest post office'.lower())
#check_sentence(cfg_parser, 'Their names were Ivy , Margaret and Betty , but they do not come into the story much'.lower())
#check_sentence(cfg_parser, 'the housekeeper was a very old man'.lower())
check_sentence(cfg_parser, 'they liked him almost immediately'.lower())

--------------------------------------------------
Checking if provided sentence matches the grammar:
they liked him almost immediately
--------------------------------------------------


True

In [31]:
sentence = ''
sentence_list = []

for word in Narnia:
    if not word == '.':
        sentence += word + ' '
    else:
        sentence = sentence[:-1]
        sentence_list.append([sentence])
        sentence = ''
        
print(sentence_list)
        

[['There were four children whose names were Peter , Susan , Edmund and Lucy'], ['this story is about something that happened to them'], ['they were sent away from London during the war because of the airraids'], ['They were sent to the house of an old Professor'], ['The Professor lived in the heart of the country'], ['he lived ten miles from the nearest railwaystation and he lived two miles from the nearest postoffice'], ['He had no wife and he lived in a very large house with a housekeeper and three servants'], ['Their names were Ivy , Margaret and Betty , but they do not come into the story much'], ['The housekeeper was called Mrs Macready'], ['The professor was a very old man with shaggy white hair which grew over most of his face and on his head'], ['they liked him almost immediately'], ['but on the first evening when he came out to meet them at the front door he was so oddlooking that Lucy was a little afraid of him'], ['and Edmund wanted to laugh and had to keep pretending he wa

In [32]:
for [sentence] in sentence_list:
    print(check_sentence(cfg_parser, sentence.lower()))


--------------------------------------------------
Checking if provided sentence matches the grammar:
there were four children whose names were peter , susan , edmund and lucy
--------------------------------------------------
True
--------------------------------------------------
Checking if provided sentence matches the grammar:
this story is about something that happened to them
--------------------------------------------------
True
--------------------------------------------------
Checking if provided sentence matches the grammar:
they were sent away from london during the war because of the airraids
--------------------------------------------------
True
--------------------------------------------------
Checking if provided sentence matches the grammar:
they were sent to the house of an old professor
--------------------------------------------------
True
--------------------------------------------------
Checking if provided sentence matches the grammar:
the professor lived i

--------------------------------------------------
True
--------------------------------------------------
Checking if provided sentence matches the grammar:
he told them this was the house where nobody was going to mind what they did
['he', 'told', 'them', 'this', 'was', 'the', 'house', 'where', 'nobody', 'was', 'going', 'to', 'mind', 'what', 'they', 'did'] Does not match the provided grammar.
--------------------------------------------------
False
--------------------------------------------------
Checking if provided sentence matches the grammar:
they will not hear us anyway
--------------------------------------------------
True
--------------------------------------------------
Checking if provided sentence matches the grammar:
the walk is ten minutes from here down to that diningroom , and there is any amount of stairs and passages in between
['the', 'walk', 'is', 'ten', 'minutes', 'from', 'here', 'down', 'to', 'that', 'diningroom', ',', 'and', 'there', 'is', 'any', 'amount', 'o

ValueError: Grammar does not cover some of the input words: "'suddenly', 'asked', 'noise'".