In [100]:
import os
from random import choice

# load random story
def random_story(folder=None):
    if folder is None:
        folder = choice(os.listdir('ESL-stuff'))
        
    story = choice(os.listdir('ESL-stuff/' + folder))
    print(folder + '/' + story)
    with open('ESL-stuff/' + folder + '/' + story, 'r') as f:
        story = f.readlines()
        # get rid of empty lines
        story = [line for line in story if line != '\n']
    return story

In [101]:
def get_random_sentence(story):
    """Get a random sentence from a story."""
    paragraph = choice(story).split('. ')
    sentence = choice(paragraph)
    if len(sentence) < 4: return get_random_sentence(story)

    if sentence[0].islower(): return get_random_sentence(story)
    
    return sentence + "."

In [102]:
story = random_story("level")
story

level/file-4-54.txt


['Sarah was very excited today. It was the first time she was going to ride a horse. Her uncle took her to the stables where the horses were. Horses have always been her favorite animal. When she saw the horses, her heart was filled with excitement. She picked one of the horses to ride. She got her equipment. All laced up and ready to go, she got on the horse. At first the ride was bumpy. Soon enough, she got the hang of it. She felt like she was flying. When it was time to leave, all she could think was when the next time was going to be. ']

In [103]:
get_random_sentence(story)

'Horses have always been her favorite animal.'

In [104]:
# find verb in sentence
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

def find_verb(sentence):
    verbs = []
    pos_tags = pos_tag(word_tokenize(sentence))
    print(pos_tags)
    for word, tag in pos_tags:
        if tag.startswith('VBD') or tag.startswith('VBP'):
            verbs.append(word)

    if len(verbs) == 0:
        for word, tag in pos_tags:
            if tag.startswith('VBZ') or tag.startswith('VB'):
                verbs.append(word)
    return verbs

In [105]:
import nltk
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\akash\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [106]:
# nltk.download('brown')
text = nltk.Text(word.lower() for word in nltk.corpus.brown.words())

from nltk.corpus import wordnet

def create_similar_word_bank(words, num=4):
    bank = set()

    for word in words:
        # similar_words = wordnet.similar_tos('dog')
        for syn in wordnet.synsets(word):
            bank.add(syn.lemmas()[0].name())

        """ for word in similar_words:
            bank.append(word) """
        # bank.append(text.similar(word, num=num))

    return bank


In [107]:
sentence = get_random_sentence(random_story())
verbs = find_verb(sentence)
print("Similar Verbs", create_similar_word_bank(verbs))
new_sentence = [sentence.replace(verb, "_"*len(verb), 1) for verb in verbs]
print(new_sentence)

couple/file-38.txt
[('Mike', 'NNP'), ('and', 'CC'), ('Maria', 'NNP'), ('were', 'VBD'), ('excited', 'VBN'), ('to', 'TO'), ('be', 'VB'), ('celebrating', 'VBG'), ('Halloween', 'NNP'), ('.', '.')]
Similar Verbs {'cost', 'be', 'embody', 'constitute', 'exist', 'equal'}
['Mike and Maria ____ excited to be celebrating Halloween.']


CC - coordinating conjunction

CD - cardinal number

DT - determiner

E - existential there (e.g., "there is")

FW - foreign word

IN - preposition or subordinating conjunction

JJ - adjective

JJR - comparative adjective

JJS - superlative adjective

LS - list item marker

MD - modal verb

NN - noun, singular or mass

NNS - noun, plural

NNP - proper noun, singular

NNPS - proper noun, plural

PDT - predeterminer

POS - possessive pronoun

PRP - personal pronoun

PRP$ - possessive pronoun

RB - adverb

RBR - comparative adverb

RBS - superlative adverb

詞 - Japanese particle

SYM - Chinese character

TO - infinitive marker

UH - interjection

VB - infinitive marker

VBD - past tense verb

VBG - past participle verb

VBN - past participle verb

VBP - present tense, perfect, and past participle verb

VBZ - present tense, present participle verb


In [108]:
from nltk.stem import WordNetLemmatizer

# Initialize the WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

def get_infinitive_form(word):
    return "to " + lemmatizer.lemmatize(word, pos='v')


In [117]:
sentences = []
problems = int(input("Enter the number of questions"))
while len(sentences) < problems:
    random_sentence = get_random_sentence(random_story())
    verbs = find_verb(random_sentence)

    if len(verbs) > 1 or len(verbs) == 0: continue

    new = random_sentence.replace(verbs[0], "_"*len(verbs[0]), 1)
    print(verbs[0])

    if new not in sentences:
        sentences.append([new + f" ({get_infinitive_form(verbs[0])})", verbs[0]])

customs/file-75.txt
[('The', 'DT'), ('VA', 'NNP'), ('Health', 'NNP'), ('Administration', 'NNP'), ('is', 'VBZ'), ('responsible', 'JJ'), ('for', 'IN'), ('providing', 'VBG'), ('veterans', 'NNS'), ('with', 'IN'), ('all', 'DT'), ('health', 'NN'), ('care', 'NN'), ('issues', 'NNS'), (',', ','), ('while', 'IN'), ('the', 'DT'), ('Benefits', 'NNP'), ('Administration', 'NNP'), ('is', 'VBZ'), ('designed', 'VBN'), ('to', 'TO'), ('help', 'VB'), ('veterans', 'NNS'), ('in', 'IN'), ('areas', 'NNS'), ('such', 'JJ'), ('as', 'IN'), ('guaranteed', 'JJ'), ('home', 'NN'), ('loans', 'NNS'), (',', ','), ('insurance', 'NN'), ('vocational', 'JJ'), ('rehabilitation', 'NN'), ('and', 'CC'), ('educational', 'JJ'), ('benefits', 'NNS'), ('.', '.')]
level/file-2-44.txt
[('John', 'NNP'), ('sat', 'VBD'), ('on', 'IN'), ('the', 'DT'), ('porch', 'NN'), ('playing', 'VBG'), ('his', 'PRP$'), ('electric', 'JJ'), ('bass', 'NN'), ('.', '.')]
sat
level/file-1-56.txt
[('She', 'PRP'), ('checks', 'VBZ'), ('her', 'PRP'), ('email', 'NN

In [118]:
worksheet = "\tFill in the Verb\n\n"

for index, sentence in enumerate(sentences): 
    worksheet += f"{index + 1}. {sentence[0]}\n"

worksheet += "\n\n\tAnswer Key\n\n"

for index, sentence in enumerate(sentences):
    worksheet += f"{index + 1}. {sentence[1]}\n"

print(worksheet)

	Fill in the Verb

1. John ___ on the porch playing his electric bass. (to sit)
2. She ______ her email on it. (to check)
3. They even ____ beach towels with body outlines. (to sell)
4. The second part of the meal __ the washing of hands. (to be)
5. He ________ the legendary comedian Jonathan Winters for being a big influence on his life. (to credit)
6. Kids ___ next to it. (to sit)
7. The night _____ pleasantly. (to end)
8. It is associated with the beginning of spring and symbolized by bunnies, flowers, and ____ eggs. (to dye)
9. All the team members ____ at Mary's house. (to meet)
10. The double-ended cars ___ a bit larger, and can hold up to 68 passengers. (to be)
11. It ___ going to be dark soon.18. (to be)
12. It ___ a wallet. (to be)
13. There ___ tax credits that people with low income, college students, and parents can get. (to be)
14. It __ a great place for a family vacation with many of its campsites open year round. (to be)
15. The day of the steakhouse dinner _______. (to

In [111]:
from nltk.corpus import wordnet 

In [112]:
word = "fires"

print("Alternate Definitions")
for syn in wordnet.synsets(word):
    print(syn.definition())

Alternate Definitions
the event of something burning (often destructive)
the act of firing weapons or artillery at an enemy
the process of combustion of inflammable materials producing heat and light and (often) smoke
a fireplace in which a relatively small fire is burning
once thought to be one of four elements composing the universe (Empedocles)
feelings of great warmth and intensity
fuel that is burning and is used as a means for cooking
a severe trial
intense adverse criticism
start firing a weapon
cause to go off
bake in a kiln so as to harden
terminate the employment of; discharge from an office or position
go off or discharge
drive out or away by or as if by fire
call forth (emotions, feelings, and responses)
destroy by fire
provide with fuel


In [113]:
# get all complex nouns in sentence
# input: sentence
# output: list of complex nouns
def get_complex_nouns(sentence):
    nouns = []
    for chunk in sentence.noun_chunks:
        if len(chunk.text.split()) > 1:
            nouns.append(chunk.text)
    return nouns

In [114]:
import nltk
from nltk.corpus import wordnet
from nltk.probability import FreqDist
from nltk.tokenize import word_tokenize

# Download NLTK resources (if not already downloaded)
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('words')

# Sample sentence
# sentence = "This is a complicated sentence with various intricate words and powerful emotions."
story = random_story()
sentence = ""

for sent in story:
    sentence += sent + " "

print(sentence)

# Tokenize the sentence
words = word_tokenize(sentence)

# Calculate word frequency using NLTK's FreqDist
word_freq = FreqDist(words)

corpus = nltk.corpus.brown.words()  # Replace with your own corpus if available
corpus_freq = FreqDist(corpus)

# Function to calculate lexical score based on word frequency
def calculate_lexical_score(word):
    synsets = wordnet.synsets(word)
    if synsets:
        # Consider the number of synsets (senses) for the word
        return len(synsets) * 10 * len(word) / (corpus_freq[word] + 1)
    else:
        # Assign a low score to words not found in WordNet
        return 0  # You can adjust this as needed

# Calculate lexical scores for each word in the sentence
lexical_scores = {word: calculate_lexical_score(word) for word in words}

# Print the lexical scores
for word, score in lexical_scores.items():
    print(f"{word}: {score:.2f}")

# Print top 5 words with highest lexical scores
print("\nTop 5 words with highest lexical scores:")
for word, score in sorted(lexical_scores.items(), key=lambda x: x[1], reverse=True)[:5]:
    print(f"{word}: {score:.2f}")


customs/file-44.txt
One of the most historic, and significant landmarks in the United States is the San Francisco cable car system. It is the world's only, manually operated cable car system. It is run by the San Francisco Municipal Transportation Agency. The system dates back to 1878, when the California Street line first opened. In all, there are three lines currently operating in the city, and boasts a fleet of 12 cable cars. There used to be 23 lines in use throughout the city connecting the diverse communities that make up San Francisco. The open-air cars are a major tourist attraction, and are still used by San Francisco commuters on a daily basis. The simple reason cable cars were first used in the city was because of its landscape. San Francisco is very hilly, which makes it impossible for municipal buses to scale. The solution for this problem was the cable car. These cars are literally pulled by cable, up and down the steep hills of the city, making it easier for people to ge

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\akash\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\akash\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\akash\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!


One: 0.64
of: 0.00
the: 0.00
most: 0.19
historic: 6.67
,: 0.00
and: 0.00
significant: 5.18
landmarks: 45.00
in: 0.01
United: 1.03
States: 1.48
is: 0.03
San: 0.00
Francisco: 0.00
cable: 80.00
car: 0.55
system: 1.37
.: 0.00
It: 0.01
world: 0.66
's: 0.00
only: 0.22
manually: 20.00
operated: 20.00
run: 8.26
by: 0.01
Municipal: 90.00
Transportation: 168.00
Agency: 33.33
The: 0.00
dates: 20.97
back: 1.18
to: 0.00
1878: 0.00
when: 0.00
California: 1.52
Street: 3.03
line: 5.09
first: 0.64
opened: 6.36
In: 0.08
all: 0.03
there: 0.11
are: 0.10
three: 0.27
lines: 9.23
currently: 2.73
operating: 9.88
city: 0.46
boasts: 60.00
a: 0.00
fleet: 26.92
12: 0.41
cars: 1.82
There: 0.23
used: 0.59
be: 0.04
23: 0.83
use: 0.69
throughout: 1.63
connecting: 157.14
diverse: 10.00
communities: 15.71
that: 0.00
make: 2.65
up: 0.15
open-air: 40.00
major: 2.85
tourist: 4.38
attraction: 31.25
still: 1.23
commuters: 180.00
on: 0.02
daily: 2.50
basis: 0.81
simple: 3.46
reason: 2.25
were: 0.16
was: 0.04
because: 0.00
it

In [115]:
import nltk
from nltk.tokenize import SyllableTokenizer

# Initialize the SyllableTokenizer
tokenizer = SyllableTokenizer()

def get_syllables(word):
    # Use the SyllableTokenizer to break the word into syllables
    syllables = tokenizer.tokenize(word)

    # Join the syllables with a dot (·)
    syllables_with_dots = ' · '.join(syllables)

    # Print the word with syllables separated by dots
    return syllables_with_dots


In [116]:
from nltk.corpus import cmudict
nltk.download('cmudict')

def get_sounds(text):
    d = cmudict.dict()

    phonetics = d[text.lower()]

    sounds = [sound[:-1] if sound[-1] in "0123" else sound for sound in phonetics[0]]
    line = " ".join(sounds)
    return line

[nltk_data] Downloading package cmudict to
[nltk_data]     C:\Users\akash\AppData\Roaming\nltk_data...
[nltk_data]   Package cmudict is already up-to-date!


In [123]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

# Download NLTK resources (if not already downloaded)
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# Sentence containing verbs with different tenses
story = random_story()
sentence = " ".join(story)
print(sentence)

# Tokenize the sentence
words = word_tokenize(sentence)

# Perform part-of-speech tagging
pos_tags = pos_tag(words)

# Initialize lists to categorize verbs by tense
infinitive_verbs = []
past_tense_verbs = []
present_participle_verbs = []
past_participle_verbs = []
present_simple_verbs = []

# Iterate through the tagged words to categorize verbs by tense
for word, tag in pos_tags:
    if tag == "VB":
        infinitive_verbs.append(word)
    elif tag == "VBD":
        past_tense_verbs.append(word)
    elif tag == "VBG":
        present_participle_verbs.append(word)
    elif tag == "VBN":
        past_participle_verbs.append(word)
    elif tag == "VBP" or tag == "VBZ":
        present_simple_verbs.append(word)

# Print the categorized verbs
print("Infinitive Verbs:", infinitive_verbs)
print("Past Tense Verbs:", past_tense_verbs)
print("Present Participle Verbs (Gerunds):", present_participle_verbs)
print("Past Participle Verbs:", past_participle_verbs)
print("Present Simple Verbs (3rd person singular and present participle):", present_simple_verbs)


essays/file-85.txt
Who says adult parties have to be boring. More and more adults are reliving their childhoods or creating memories they didn't have as children by having theme parties for their birthday or other occasions. Theme parties are based on an idea, a television show, a fictional character, or really anything. Sometimes guests are expected to dress according to the theme as well. For example, the Toga party is a type of theme party where guests are expected to dress in togas, really just white sheets. Toga parties used to be especially popular among college students. In a masquerade party, everyone wears a mask and has to guess who is behind it. The mystery is part of the fun. Speaking of mystery, there are murder mystery parties, where the guests have to solve a fake murder. Some adults throw parties based on seasons. A summer beach party, for example, might feature guests wearing their swimsuits. Another popular type of theme party is the game night party. In this type of 

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\akash\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\akash\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
