## IR Assignment-1 (Boolean Information Retrieval System)
 Harsh Daryani   2018B1A70645H                               
 Rohan Sachan    2018B3A70992H                                
 Aaryan Gupta    2018B1A70775H

In [1]:
import nltk
import collections
import string
import os
import timeit
import math
from binarytree import Node

## Stopwords input from text file

In [74]:
my_file = open(r"stopwords.txt", "r")
data = my_file.read()
stopwords_list = data.split("\n")
print("Number of stopwords: ",len(stopwords_list))
print(stopwords_list)
my_file.close()

Number of stopwords:  431
['A', 'a', 'about', 'above', 'across', 'after', 'again', 'against', 'all', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', 'among', 'an', 'and', 'another', 'any', 'anybody', 'anyone', 'anything', 'anywhere', 'are', 'area', 'areas', 'around', 'as', 'ask', 'asked', 'asking', 'asks', 'at', 'away', 'b', 'back', 'backed', 'backing', 'backs', 'be', 'became', 'because', 'become', 'becomes', 'been', 'before', 'began', 'behind', 'being', 'beings', 'best', 'better', 'between', 'big', 'both', 'but', 'by', 'c', 'came', 'can', 'cannot', 'case', 'cases', 'certain', 'certainly', 'clear', 'clearly', 'come', 'could', 'd', 'did', 'differ', 'different', 'differently', 'do', 'does', 'done', 'down', 'down', 'downed', 'downing', 'downs', 'during', 'e', 'each', 'early', 'either', 'end', 'ended', 'ending', 'ends', 'enough', 'even', 'evenly', 'ever', 'every', 'everybody', 'everyone', 'everything', 'everywhere', 'f', 'face', 'faces', 'fact', 'facts', 'far', 'felt',

## Index for Corpus Documents

In [80]:
path=r"Assignment 1_IR_Final\corpus"
docID_list = {i+1:doc for i, doc in enumerate(os.listdir(path))}
docID_list

{1: 'a-midsummer-nights-dream_TXT_FolgerShakespeare.txt',
 2: 'alls-well-that-ends-well_TXT_FolgerShakespeare.txt',
 3: 'antony-and-cleopatra_TXT_FolgerShakespeare.txt',
 4: 'as-you-like-it_TXT_FolgerShakespeare.txt',
 5: 'coriolanus_TXT_FolgerShakespeare.txt',
 6: 'cymbeline_TXT_FolgerShakespeare.txt',
 7: 'hamlet_TXT_FolgerShakespeare.txt',
 8: 'henry-iv-part-1_TXT_FolgerShakespeare.txt',
 9: 'henry-iv-part-2_TXT_FolgerShakespeare.txt',
 10: 'henry-vi-part-1_TXT_FolgerShakespeare.txt',
 11: 'henry-vi-part-2_TXT_FolgerShakespeare.txt',
 12: 'henry-vi-part-3_TXT_FolgerShakespeare.txt',
 13: 'henry-viii_TXT_FolgerShakespeare.txt',
 14: 'henry-v_TXT_FolgerShakespeare.txt',
 15: 'julius-caesar_TXT_FolgerShakespeare.txt',
 16: 'king-john_TXT_FolgerShakespeare.txt',
 17: 'king-lear_TXT_FolgerShakespeare.txt',
 18: 'loves-labors-lost_TXT_FolgerShakespeare.txt',
 19: 'lucrece_TXT_FolgerShakespeare.txt',
 20: 'macbeth_TXT_FolgerShakespeare.txt',
 21: 'measure-for-measure_TXT_FolgerShakespeare.

## Preprocessing, Creation of Inverted Index and Parsing Query 

In [81]:
#Initializing lists for tokens stemmed and unstemmed words
tokens_words_stemmed=[]
tokens_words_unstemmed=[]

#Initializing dictionary for word:frequency pair for unstemmed words
#To be utilized in spelling checker
unstemmed_dict={}

In [82]:
class IRSystem():

    def __init__(self, docs=None, stop_words=stopwords_list):
        if docs is None:
            raise UserWarning('No Docs')
        self._docs = docs
        self._stemmer = nltk.stem.porter.PorterStemmer()
        self._inverted_index = self._preprocess_corpus(stop_words)
        self._inverted_index1 = self._preprocess_corpus1(stop_words)
        self._print_inverted_index()

    def _preprocess_corpus1(self, stop_words=stopwords_list):
        index = {}
        for i, doc in enumerate(self._docs):
            for word in doc.split():
                    #print(word) #prints all words from 1 to 42 docs, docs in alphanumerical order of name
                    token = word.lower()
                    if ((len(token)<40) and token.isnumeric()==False ):
                        if index.get(token, -244) == -244:
                            index[token] = Node(i + 1)
                        elif isinstance(index[token], Node):
                            index[token].insert(i + 1)
                        else:
                            raise UserWarning('Wrong data type for posting list')
        return index
    
    def _preprocess_corpus(self, stop_words=stopwords_list):
        index = {}
        for i, doc in enumerate(self._docs):
            for word in doc.split():
                    #print(word) #prints all words from 1 to 42 docs, docs in alphanumerical order of name
                    token = self._stemmer.stem(word.lower())
                    if ((token not in stop_words) and (len(token)<40) and token.isnumeric()==False ):
                        if index.get(token, -244) == -244:
                            index[token] = Node(i + 1)
                        elif isinstance(index[token], Node):
                            index[token].insert(i + 1)
                        else:
                            raise UserWarning('Wrong data type for posting list')
        return index
    
    def _print_inverted_index(self):
        print('UNSTEMMED INVERTED INDEX:\n')
        for word, tree in self._inverted_index1.items():
            tokens_words_unstemmed.append(word)
            unstemmed_dict[word]=len([doc_id for doc_id in tree.tree_data() if doc_id != None ])
            print('{}: {}'.format(word, [doc_id for doc_id in tree.tree_data() if doc_id != None ]))
        print()
        
        print('PREPROCESSED INVERTED INDEX:\n')
        for word, tree in self._inverted_index.items():
            tokens_words_stemmed.append(word)
            print('{}: {}'.format(word, [doc_id for doc_id in tree.tree_data() if doc_id != None]))
        print()


    def _get_posting_list(self, word):
        return [doc_id for doc_id in self._inverted_index[word].tree_data() if doc_id != None]

    @staticmethod
    def _parse_query(infix_tokens):
        precedence = {}
        precedence['NOT'] = 3
        precedence['AND'] = 2
        precedence['OR'] = 1
        precedence['('] = 0
        precedence[')'] = 0    

        output = []
        operator_stack = []

        for token in infix_tokens:
            if (token == '('):
                operator_stack.append(token)
            
            # if right bracket, pop all operators from operator stack onto output until we hit left bracket
            elif (token == ')'):
                operator = operator_stack.pop()
                while operator != '(':
                    output.append(operator)
                    operator = operator_stack.pop()
            
            # if operator, pop operators from operator stack to queue if they are of higher precedence
            elif (token in precedence):
                # if operator stack is not empty
                if (operator_stack):
                    current_operator = operator_stack[-1]
                    while (operator_stack and precedence[current_operator] > precedence[token]):
                        output.append(operator_stack.pop())
                        if (operator_stack):
                            current_operator = operator_stack[-1]
                operator_stack.append(token) # add token to stack
            else:
                output.append(token.lower())

        # while there are still operators on the stack, pop them into the queue
        while (operator_stack):
            output.append(operator_stack.pop())

        return output

    def process_query(self, query):
        # prepare query list
        query = query.replace('(', '( ')
        query = query.replace(')', ' )')
        query = query.split(' ')

        indexed_docIDs = list(range(1, len(self._docs) + 1))

        results_stack = []
        postfix_queue = collections.deque(self._parse_query(query)) # get query in postfix notation as a queue

        while postfix_queue:
            token = postfix_queue.popleft()
            result = [] # the evaluated result at each stage
            # if operand, add postings list for term to results stack
            if (token != 'AND' and token != 'OR' and token != 'NOT'):
                token = self._stemmer.stem(token) # stem the token
                # default empty list if not in dictionary
                if (token in self._inverted_index):
                    result = self._get_posting_list(token)
            
            elif (token == 'AND'):
                right_operand = results_stack.pop()
                left_operand = results_stack.pop()
                result = BooleanModel.and_operation(left_operand, right_operand)   # evaluate AND

            elif (token == 'OR'):
                right_operand = results_stack.pop()
                left_operand = results_stack.pop()
                result = BooleanModel.or_operation(left_operand, right_operand)    # evaluate OR

            elif (token == 'NOT'):
                right_operand = results_stack.pop()
                result = BooleanModel.not_operation(right_operand, indexed_docIDs) # evaluate NOT
            
            results_stack.append(result)                        
        if len(results_stack) != 1: 
            print("ERROR: Invalid Query. Please check query syntax.") # check for errors
            return None
        
        return results_stack.pop()

## Boolean Operations Handling

In [83]:
class BooleanModel():
    
    @staticmethod
    def and_operation(left_operand, right_operand):
        # perform 'merge'
        result = []                                 # results list to be returned
        l_index = 0                                 # current index in left_operand
        r_index = 0                                 # current index in right_operand
        l_skip = int(math.sqrt(len(left_operand)))  # skip pointer distance for l_index
        r_skip = int(math.sqrt(len(right_operand))) # skip pointer distance for r_index

        while (l_index < len(left_operand) and r_index < len(right_operand)):
            l_item = left_operand[l_index]  # current item in left_operand
            r_item = right_operand[r_index] # current item in right_operand
            
            # case 1: if match
            if (l_item == r_item):
                result.append(l_item)   # add to results
                l_index += 1            # advance left index
                r_index += 1            # advance right index
            
            # case 2: if left item is more than right item
            elif (l_item > r_item):
                # if r_index can be skipped (if new r_index is still within range and resulting item is <= left item)
                if (r_index + r_skip < len(right_operand)) and right_operand[r_index + r_skip] <= l_item:
                    r_index += r_skip
                # else advance r_index by 1
                else:
                    r_index += 1

            # case 3: if left item is less than right item
            else:
                # if l_index can be skipped (if new l_index is still within range and resulting item is <= right item)
                if (l_index + l_skip < len(left_operand)) and left_operand[l_index + l_skip] <= r_item:
                    l_index += l_skip
                # else advance l_index by 1
                else:
                    l_index += 1

        return result

    @staticmethod
    def or_operation(left_operand, right_operand):
        result = []     # union of left and right operand
        l_index = 0     # current index in left_operand
        r_index = 0     # current index in right_operand

        # while lists have not yet been covered
        while (l_index < len(left_operand) or r_index < len(right_operand)):
            # if both list are not yet exhausted
            if (l_index < len(left_operand) and r_index < len(right_operand)):
                l_item = left_operand[l_index]  # current item in left_operand
                r_item = right_operand[r_index] # current item in right_operand
                
                # case 1: if items are equal, add either one to result and advance both pointers
                if (l_item == r_item):
                    result.append(l_item)
                    l_index += 1
                    r_index += 1

                # case 2: l_item greater than r_item, add r_item and advance r_index
                elif (l_item > r_item):
                    result.append(r_item)
                    r_index += 1

                # case 3: l_item lower than r_item, add l_item and advance l_index
                else:
                    result.append(l_item)
                    l_index += 1

            # if left_operand list is exhausted, append r_item and advance r_index
            elif (l_index >= len(left_operand)):
                r_item = right_operand[r_index]
                result.append(r_item)
                r_index += 1

            # else if right_operand list is exhausted, append l_item and advance l_index 
            else:
                l_item = left_operand[l_index]
                result.append(l_item)
                l_index += 1

        return result

    @staticmethod
    def not_operation(right_operand, indexed_docIDs):
        # complement of an empty list is list of all indexed docIDs
        if (not right_operand):
            return indexed_docIDs
        
        result = []
        r_index = 0 # index for right operand
        for item in indexed_docIDs:
            # if item do not match that in right_operand, it belongs to compliment 
            if (item != right_operand[r_index]):
                result.append(item)
            # else if item matches and r_index still can progress, advance it by 1
            elif (r_index + 1 < len(right_operand)):
                r_index += 1
        return result

## Inverted Index 

In [7]:
path = r"C:\Users\HP\Desktop\yr4\4-2\IR\Assignment -1\IR project\info-retrieval-master\corpus"
docs=[]
for root, dirs, files in sorted(os.walk(path)):
    for file in sorted(files):
        with open(os.path.join(path, file)) as f:
                docs.append(f.read().translate(str.maketrans('', '', string.punctuation)))

#stop_words = ['is', 'a', 'for', 'the', 'of', 'against']

def main():
    ir = IRSystem(docs, stopwords_list)

    while True:
        query = input('Enter boolean query: ')
        query.translate(str.maketrans('', '', string.punctuation))
        start = timeit.default_timer()

        results = ir.process_query(query)
        
        stop = timeit.default_timer()

        if results is not None:
            print ('Processing time: {:.5} secs'.format(stop - start))
            print('\nDoc IDS: ')
            print(results)
        print()

if __name__ == '__main__':
    try:
        main()
    except KeyboardInterrupt as e:
        print('EXIT')

UNSTEMMED INVERTED INDEX:

a: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]
midsummer: [1, 4, 8, 41]
nights: [1, 2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 35, 36, 38, 39, 41, 42]
dream: [1, 2, 3, 4, 6, 7, 9, 10, 12, 13, 14, 15, 17, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 41]
by: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]
william: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]
shakespeare: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]
edited: [1, 2, 3, 4, 5, 6, 7,

dukes: [1, 2, 4, 7, 10, 12, 13, 14, 17, 18, 21, 22, 23, 25, 26, 29, 33, 35, 36, 41]
oak: [1, 4, 5, 6, 13, 21, 22, 23, 31, 34, 36, 37, 38]
hold: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]
cut: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41]
bowstrings: [1]
door: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 36, 37, 38, 39, 40, 41, 42]
wander: [1, 3, 4, 6, 10, 11, 14, 15, 19, 25, 29, 33, 36, 37, 40]
over: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]
hill: [1, 3, 6, 7, 8, 9, 10, 13, 14, 15, 16, 17, 18, 20, 27, 28, 33, 38, 42]
dale: [1, 9, 19, 37, 42]
thorough: [1, 6, 12, 15, 18, 19, 38]
bush: [1, 4, 6, 12, 13,

sweep: [1, 3, 4, 7, 11, 12, 14, 20, 38]
sky: [1, 2, 3, 4, 6, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 25, 26, 28, 29, 31, 34, 35, 37, 39, 40, 42]
fellows: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 13, 14, 17, 18, 20, 21, 22, 23, 26, 27, 30, 31, 33, 34, 35, 37, 38, 40, 41]
stamp: [1, 5, 6, 7, 8, 11, 13, 14, 16, 17, 19, 20, 23, 26, 28, 30, 33, 36, 38, 39]
murder: [1, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 19, 20, 21, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 36, 37, 39, 41, 42]
calls: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 39, 40, 41, 42]
weak: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 22, 23, 25, 26, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 40, 41, 42]
fears: [1, 2, 3, 5, 6, 7, 8, 9, 10, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 33, 35, 36, 37, 38, 40]
senseless: [1, 2, 4, 5, 6, 7, 12, 15, 19, 22, 25, 26, 27, 29, 33, 35, 37, 38, 41, 42]
briers: [1, 2, 4, 34, 36, 37, 39]
a

confound: [1, 2, 3, 5, 7, 8, 9, 16, 18, 19, 20, 24, 25, 26, 28, 30, 36, 38, 39, 40, 42]
pap: [1, 18]
stabs: [1, 3, 13, 15, 20, 22, 23, 26, 27, 34, 37, 39]
ace: [1, 6]
less: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41]
surgeon: [1, 10, 11, 15, 23, 24, 27, 30, 41]
recover: [1, 3, 7, 9, 11, 12, 13, 15, 16, 23, 24, 25, 26, 29, 34, 35, 37, 41]
ends: [1, 2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 16, 17, 18, 20, 21, 22, 25, 26, 27, 34, 35, 36, 37, 38, 39, 40, 41, 42]
balance: [1, 2, 9, 12, 22, 23, 25, 30, 39]
warrant: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41]
spied: [1, 2, 9, 18, 19, 23, 33, 40, 42]
videlicet: [1, 4, 7, 18, 36]
dumb: [1, 2, 5, 6, 7, 9, 11, 12, 15, 16, 19, 22, 24, 26, 28, 30, 34, 35, 36, 39, 40, 42]
lily: [1, 6, 14, 16, 18, 19, 28, 35, 39, 40, 42]
nose: [1, 2, 3, 4, 5, 7, 8, 9, 10, 12, 13

commendation: [2, 4, 6, 8, 22, 30, 35, 41]
employment: [2, 6, 7, 9, 10, 14, 16, 17, 18, 22, 27, 31, 34, 35, 38, 41]
fruitfully: [2, 17]
philosophical: [2]
persons: [2, 3, 4, 7, 12, 14, 21, 22, 26, 27, 36, 37, 41]
modern: [2, 3, 4, 16, 20, 23, 27, 28]
familiar: [2, 3, 5, 6, 7, 9, 10, 11, 12, 15, 17, 18, 20, 21, 22, 23, 24, 25, 27, 28, 31, 40, 41]
supernatural: [2, 20]
causeless: [2, 12, 39, 42]
terrors: [2, 6, 12, 17, 31, 37]
ensconcing: [2]
submit: [2, 5, 6, 10, 11, 16, 24, 25]
rarest: [2, 5, 6, 17, 24, 34, 36, 37]
relinquished: [2]
artists: [2]
galen: [2, 5, 9]
paracelsus: [2]
authentic: [2, 31, 40]
incurable: [2, 9, 16, 40]
helped: [2, 17, 20, 23, 26, 27, 35, 37]
assured: [2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 19, 21, 23, 24, 25, 26, 28, 29, 30, 33, 36, 38]
uncertain: [2, 5, 6, 8, 26, 28, 35, 37, 38]
novelty: [2, 21, 40]
showing: [2, 5, 7, 10, 12, 13, 16, 18, 19, 21, 25, 28, 29, 30, 31, 33, 36, 38, 40]
whatdoyoucall: [2]
lafews: [2]
reads: [2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14

cramp: [2, 4, 34, 36]
saved: [2, 3, 4, 5, 6, 8, 10, 11, 13, 16, 17, 18, 21, 22, 23, 24, 26, 28, 30, 35, 36, 38, 40, 41]
drumming: [2, 19]
supposition: [2, 22, 29, 30]
traitorously: [2, 12]
discovered: [2, 6, 10, 11, 15, 22, 24, 26, 27, 29, 35, 36, 37, 38, 39, 40]
pestiferous: [2, 11]
diecome: [2, 12]
headsman: [2, 29]
removes: [2, 6, 9, 13, 18, 21, 33, 37, 41]
greeting: [2, 3, 4, 7, 9, 10, 12, 15, 16, 20, 21, 23, 25, 27, 33, 38, 40]
sonnet: [2, 10, 18, 22, 35, 41]
captainall: [2]
crushed: [2, 10, 28, 40]
impudent: [2, 8, 9, 13, 23, 40]
burst: [2, 3, 6, 7, 9, 11, 13, 15, 16, 17, 23, 24, 26, 33, 34, 36, 37, 38]
braggart: [2, 5, 10, 17, 18, 20, 27, 30]
rust: [2, 5, 9, 13, 16, 18, 23, 24, 25, 27, 36, 37, 42]
safest: [2, 4, 11, 20, 23, 26]
fooled: [2, 6, 8]
foolry: [2, 6, 18]
alive: [2, 3, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]
christian: [2, 4, 7, 8, 9, 10, 11, 12, 14, 16, 23, 25, 26, 27, 29, 30, 31, 33, 

mantles: [3]
philippan: [3]
ram: [3, 4, 19, 23, 37, 40]
killst: [3, 15, 22, 25, 36, 39, 41]
bluest: [3]
veins: [3, 5, 6, 8, 9, 10, 13, 16, 19, 24, 26, 27, 28, 30, 34, 37, 40]
lipped: [3]
trembled: [3, 10, 15, 33, 36]
illuttering: [3]
throat: [3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 17, 18, 20, 21, 23, 24, 25, 26, 33, 34, 36, 38, 39, 40, 41]
faceif: [3]
tart: [3, 33, 36]
formal: [3, 4, 7, 9, 15, 26, 29, 33, 41]
allay: [3, 5, 12, 14, 16, 17, 30, 34, 41]
precedence: [3, 18]
jailer: [3, 6, 11, 21, 29, 30, 36, 37]
malefactor: [3]
pack: [3, 5, 12, 14, 17, 26, 27, 29, 30, 31, 33, 35, 36, 37, 38, 39, 41]
infectious: [3, 6, 23, 27, 29, 37, 38]
pestilence: [3, 5, 7, 14, 22, 23, 25, 27, 35, 40, 41, 42]
horrible: [3, 7, 8, 12, 14, 16, 17, 20, 21, 23, 27, 34, 37, 38, 41]
unhair: [3]
hales: [3, 12]
wire: [3]
stewed: [3, 7, 8, 9, 17, 21, 31, 40]
smarting: [3, 8]
lingring: [3, 6, 11, 12, 19, 21, 30, 34, 37, 39]
pickle: [3, 34, 41]
province: [3, 21, 24, 37]
moving: [3, 5, 7, 8, 11, 13, 20, 21, 22, 23, 24

bridegroom: [3, 8, 17, 20, 27, 33]
scholar: [3, 7, 9, 10, 14, 18, 21, 22, 23, 24, 30, 31, 33, 41]
begun: [3, 4, 5, 7, 10, 11, 13, 15, 16, 19, 20, 21, 25, 27, 33, 34, 40, 41, 42]
anyone: [3, 11, 12, 20, 24, 30, 33, 39, 41]
diomed: [3, 13, 40]
sufficing: [3]
prophesying: [3, 20]
foundyou: [3]
suspect: [3, 6, 8, 12, 13, 16, 22, 23, 24, 25, 26, 28, 29, 30, 31, 37, 38, 42]
purged: [3, 7, 10, 16, 20, 26, 27, 41]
emperors: [3, 6, 10, 23, 35, 39]
bides: [3, 13, 20, 29]
lightly: [3, 10, 12, 18, 26, 27, 29, 35, 39]
aloft: [3, 6, 11, 12, 13, 16, 19, 26, 27, 28, 33, 35, 39]
events: [3, 4, 7, 13, 20, 21, 22, 23, 25, 30, 34, 39, 42]
proportioned: [3, 19, 27]
oerthrown: [3, 7, 11, 18, 34, 37]
triumphed: [3, 8, 19]
importune: [3, 17, 21, 23, 26, 28, 33, 35, 38]
imperious: [3, 6, 7, 9, 11, 12, 14, 23, 35, 39, 40, 42]
fullfortuned: [3]
brooched: [3]
drugs: [3, 6, 7, 23, 27, 28, 29, 38, 39]
sting: [3, 4, 7, 12, 13, 14, 15, 17, 19, 20, 25, 30, 33, 37, 39, 40]
demuring: [3]
womenwe: [3]
lifting: [3, 9, 17]

illinhabited: [4]
thatched: [4, 22, 34]
seconded: [4, 5, 40]
understanding: [4, 7, 9, 10, 13, 14, 17, 18, 21, 34, 36, 37]
reckoning: [4, 6, 8, 10, 18, 22, 27, 28, 40]
poetical: [4, 41]
poetry: [4, 8, 18, 30, 33, 39]
feign: [4, 6, 9, 11, 13, 27, 30, 41]
swearst: [4, 16, 17, 27, 34]
hardfavored: [4, 10, 11, 13, 19, 25, 35, 42]
sugar: [4, 7, 8, 10, 23, 25, 26, 30, 31, 37]
material: [4, 6, 17, 20, 37]
slut: [4]
foulness: [4, 14, 17, 22]
sluttishness: [4]
vicar: [4, 31, 33]
village: [4, 10, 14, 26]
stagger: [4, 14, 21]
assembly: [4, 5, 9, 10, 14, 15, 17, 22, 27, 38]
hornbeasts: [4]
goods: [4, 9, 12, 13, 14, 16, 25, 26, 29, 30, 33, 35, 36]
dowry: [4, 7, 10, 11, 12, 13, 16, 17, 18, 19, 21, 24, 30, 31, 33, 36, 41]
walled: [4, 6, 11, 17, 18]
oliversir: [4]
chapel: [4, 6, 7, 14, 16, 22, 36, 37]
whatyoucallt: [4]
ild: [4, 20]
toy: [4, 7, 11, 18, 19, 23, 26, 27, 33, 36, 41, 42]
covered: [4, 13, 15, 17, 22, 23, 26, 27, 29, 30, 38, 39]
curb: [4, 5, 7, 8, 9, 19, 24, 30, 33, 36, 38, 40, 42]
falcon: [4

kites: [5, 7, 12, 15, 20, 26, 33, 36, 37]
daws: [5, 18, 23, 40, 41]
pratst: [5, 29]
disturbing: [5, 42]
muffler: [5, 10, 31]
unmusical: [5]
appearance: [5, 9, 10, 11, 14, 22, 23, 28]
tackles: [5, 13]
showst: [5, 17, 28]
particularly: [5, 38]
surname: [5]
painful: [5, 10, 18, 19, 21, 22, 24, 28, 33, 34]
thankless: [5, 17, 38]
requited: [5, 11, 20, 22, 40]
permitted: [5, 25]
dastard: [5, 11, 13, 25]
whooped: [5, 36]
notto: [5]
voided: [5]
banishers: [5]
wreak: [5, 27, 39]
maims: [5]
revengeful: [5, 7, 13, 14, 19, 25, 26, 39]
cankered: [5, 8, 9, 16, 27]
fiends: [5, 6, 7, 10, 11, 17, 20, 23, 26, 31, 39, 42]
tuns: [5, 31]
weeded: [5]
allnoble: [5, 36]
twine: [5, 22]
whereagainst: [5]
grained: [5, 7, 29]
ash: [5, 30]
scarred: [5, 26, 39]
splinters: [5]
anvil: [5, 16]
contest: [5]
hotly: [5, 10, 17, 19, 23, 36, 42]
rapt: [5, 20, 34, 38, 40]
bestride: [5, 8, 9, 10, 13, 15, 20, 27]
hew: [5, 11, 12, 13, 15, 20, 38, 39, 42]
brawn: [5, 8, 9]
encounters: [5, 18, 22, 35, 37, 40]
unbuckling: [5]
fist

tenderst: [6]
untender: [6, 17]
winterly: [6]
needst: [6, 8, 26, 27, 28, 35]
countnance: [6, 7, 18, 33]
drugdamned: [6]
outcraftied: [6]
disdained: [6, 8, 9, 17, 19, 22, 24, 25, 40, 42]
testimonies: [6, 21]
surmises: [6, 9, 23, 37]
havenshe: [6]
purposewhere: [6]
pander: [6, 10, 17, 24, 31, 37, 40]
outvenoms: [6]
rides: [6, 8, 9, 20, 24, 29, 37, 40]
enterswhat: [6]
jay: [6, 33, 37]
ripped: [6, 20]
sinons: [6, 19]
leaven: [6, 40]
failcome: [6]
pisanios: [6]
scabbard: [6, 11, 22, 41]
mansion: [6, 9, 19, 20, 27, 28, 30, 35, 38]
riches: [6, 10, 12, 14, 21, 23, 24, 25, 26, 28, 31, 33, 34, 35, 38, 40]
throwing: [6, 7, 11, 15, 17, 19, 21, 23, 25, 31, 38, 42]
selfslaughter: [6, 7]
prohibition: [6]
cravens: [6, 11]
somethings: [6]
afore: [6, 8, 9, 10, 17, 23, 24, 25, 27, 31, 34, 36]
bodice: [6]
scriptures: [6]
corrupters: [6]
stomachers: [6, 37]
teachers: [6, 14]
strain: [6, 8, 9, 10, 15, 16, 17, 19, 22, 23, 24, 27, 31, 34, 38, 40, 41, 42]
rareness: [6, 7, 8]
disedged: [6]
tirest: [6]
panged: [

nerve: [7]
unhand: [7]
awaygo: [7]
sulfrous: [7]
tormenting: [7, 26]
lightest: [7, 30]
knotted: [7, 40]
quills: [7, 19]
porpentine: [7, 12, 29, 40]
blazon: [7, 22, 27, 28, 31, 36, 41]
wharf: [7]
rankly: [7]
prophetic: [7, 16, 20, 23, 28, 40]
adulterate: [7, 19, 26, 28, 29]
seducewon: [7]
seemingvirtuous: [7]
decline: [7, 14, 15, 17, 26, 29, 38, 40]
lewdness: [7, 14]
sate: [7]
scent: [7, 33, 41]
hebona: [7]
vial: [7, 25, 27, 28]
porches: [7]
leprous: [7, 36]
distilment: [7]
quicksilver: [7, 9]
courses: [7, 8, 9, 10, 14, 18, 21, 23, 24, 25, 28, 34, 40]
alleys: [7, 29]
posset: [7, 31]
droppings: [7]
lazarlike: [7]
loathsome: [7, 9, 11, 12, 19, 27, 28, 29, 33, 36, 39, 42]
crust: [7, 17, 26, 38]
blossoms: [7, 12, 14, 18, 36, 37]
unhouseled: [7]
disappointed: [7]
unaneled: [7]
reckning: [7, 10, 19, 21, 23, 27, 33, 36, 38]
imperfections: [7, 10, 17]
luxury: [7, 10, 17, 21, 26, 31, 40]
incest: [7, 19, 21, 24]
howsomever: [7]
glowworm: [7, 24]
matin: [7]
uneffectual: [7]
stiffly: [7]
records: [

yorick: [7]
horatioa: [7]
gorge: [7, 10, 17, 23, 37, 38, 42]
gambols: [7, 30, 36, 37]
flashes: [7, 17, 38]
grinning: [7, 8, 25, 36]
chapfallen: [7]
thatprithee: [7]
pah: [7, 17]
stopping: [7, 9, 37]
bunghole: [7]
returneth: [7, 12]
beer: [7, 9, 12, 23]
barrel: [7]
corpse: [7, 8, 9, 11, 12, 15, 24, 26, 37, 39, 41]
maimed: [7, 14, 23, 33]
betoken: [7]
fordo: [7]
enlarged: [7, 8, 28]
warranty: [7, 23, 30]
oersways: [7, 28]
unsanctified: [7, 17, 20]
flints: [7]
crants: [7]
strewments: [7]
requiem: [7, 32]
peaceparted: [7]
unpolluted: [7]
ministring: [7]
hoped: [7, 10, 22, 37]
decked: [7, 10, 11, 13, 15, 18, 26, 34]
deprived: [7, 8, 11, 17, 19, 27]
ofhold: [7]
leaps: [7, 12, 24, 30, 40, 42]
oertop: [7, 40]
pelion: [7, 31]
skyish: [7]
conjures: [7, 19, 24, 26]
wonderwounded: [7]
prayst: [7]
splenitive: [7]
separated: [7, 13, 20, 26, 27]
eisel: [7, 28]
singeing: [7]
zone: [7]
ossa: [7]
wart: [7, 9, 29, 31, 40]
rant: [7]
couplets: [7]
mew: [7, 8, 16, 17, 33]
strengthen: [7, 13, 14, 15, 16, 24,

draff: [8, 31]
unloaded: [8]
scarecrows: [8]
napkins: [8, 15, 20, 24, 27, 31]
tacked: [8]
heralds: [8, 10, 11, 12, 14, 15, 16, 26, 27]
albans: [8, 9, 12, 13, 26]
rednose: [8]
innkeeper: [8]
daventry: [8]
quilt: [8]
warwickshiremy: [8]
toss: [8, 9, 12, 17, 24]
encamped: [8, 13]
fighter: [8, 31, 37, 41]
wellrespected: [8]
drag: [8, 11, 12, 27, 37, 39]
vernons: [8]
worcesters: [8]
journeybated: [8]
exceedeth: [8]
anointed: [8, 9, 11, 13, 16, 17, 18, 20, 25, 26, 37]
hostility: [8, 11, 13, 16]
confesseth: [8]
suggestion: [8, 9, 14, 16, 17, 20, 26, 34, 42]
sixandtwenty: [8]
unminded: [8]
outlaw: [8, 11, 35]
sneaking: [8, 10, 40]
zeal: [8, 9, 10, 12, 13, 14, 16, 18, 22, 25, 26, 30, 35, 36, 37, 38, 39, 40]
barons: [8, 10, 12, 14]
boroughs: [8]
villages: [8, 10, 17, 25]
bridges: [8, 17]
lanes: [8, 21, 25, 36, 37]
proffered: [8, 11, 16, 25, 26]
furthercut: [8]
favorites: [8, 11, 13, 22, 25, 28, 36]
disgraced: [8, 10, 11, 13, 14, 19, 22, 25, 26, 28, 30, 35, 37, 38, 41]
victories: [8, 11, 26, 28, 

miscreate: [10]
impawn: [10]
baptism: [10, 14, 23]
peers: [10, 11, 12, 13, 14, 16, 19, 20, 24, 25, 26, 40]
pharamond: [10]
terram: [10]
salicam: [10]
mulieres: [10]
ne: [10, 18, 24]
succedant: [10]
gloze: [10, 24, 25, 39]
founder: [10, 14, 36]
authors: [10, 14, 18, 19, 24, 36, 40, 41]
germany: [10, 14, 16, 17, 30]
sala: [10]
elbe: [10]
saxons: [10]
inheritrix: [10]
meissen: [10]
defunction: [10]
redemption: [10, 13, 21, 22, 23, 25, 26, 36]
twentysix: [10]
pepin: [10, 14]
childeric: [10]
blithild: [10]
clothair: [10]
hugh: [10, 13, 22, 27, 31]
capet: [10]
usurped: [10, 13, 16, 17, 23, 25, 26, 41, 42]
lorraine: [10]
lingare: [10]
lewis: [10, 13]
usurper: [10, 12, 14, 16, 19]
grandmother: [10, 18, 34, 35]
ermengare: [10]
reunited: [10]
pepins: [10]
capets: [10]
claiming: [10]
net: [10, 13, 14, 20, 22, 23, 24, 42]
amply: [10, 34, 40]
imbar: [10]
progenitors: [10, 11, 19]
unwind: [10, 35]
greatgrandsires: [10]
invoke: [10]
greatuncles: [10]
forage: [10, 16, 18, 42]
puissant: [10, 12, 13, 15

encore: [10]
quil: [10]
contre: [10]
jurement: [10]
aucun: [10]
prisonnier: [10]
lui: [10]
avez: [10]
promis: [10]
donner: [10]
liberte: [10]
franchisement: [10]
sur: [10]
mes: [10, 17]
genoux: [10]
donne: [10]
mille: [10]
remerciments: [10]
mestime: [10]
heureux: [10]
tombe: [10]
entre: [10]
mains: [10]
dun: [10, 18, 27, 28]
chevalier: [10, 11]
plus: [10]
vaillant: [10]
tres: [10]
distingue: [10]
esteems: [10, 23, 35, 40, 42]
thriceworthy: [10, 18, 40]
suivezvous: [10]
capitaine: [10]
adventurously: [10]
diable: [10, 31]
jour: [10]
perdu: [10, 17]
mechante: [10]
perdurable: [10, 23]
contaminate: [10, 15, 29]
disorder: [10, 12, 16, 20, 25, 36, 40, 42]
thricevaliant: [10, 39]
larding: [10]
yokefellow: [10, 17]
honorowing: [10]
suffolk: [10, 11, 12, 13, 14]
haggled: [10]
insteeped: [10]
abreast: [10, 13, 40]
wellfoughten: [10]
suffolks: [10, 11, 12]
espoused: [10, 12, 19, 39]
nobleending: [10]
reinforced: [10]
poys: [10]
offert: [10]
porn: [10]
pig: [10, 29, 30, 39]
variations: [10]
mace

wingfield: [11]
furnival: [11]
sheffield: [11]
falconbridge: [11, 13, 18, 30]
twoandfifty: [11, 33, 40]
magnifist: [11]
flyblown: [11]
nemesis: [11]
proudest: [11, 12, 13, 14, 22, 26, 28, 31, 33, 39]
upstart: [11, 25]
putrefy: [11]
armagnac: [11]
godly: [11, 31, 40]
stablish: [11]
immanity: [11]
professors: [11, 14, 37]
papal: [11, 14, 16]
coequal: [11]
debated: [11, 19, 22, 39]
inshipped: [11]
holiness: [11, 12, 14, 16, 21, 29, 40]
clothing: [11, 37]
ornaments: [11, 13, 14, 18, 19, 26, 27, 28, 33, 37, 39]
parisians: [11]
accomplices: [11, 12]
repine: [11, 42]
spells: [11, 14, 20, 23, 31]
periapts: [11]
admonish: [11]
helpers: [11, 24, 36]
overlong: [11]
lop: [11, 14, 24, 25]
condescend: [11]
bloodsacrifice: [11]
furtherance: [11, 24]
soulmy: [11]
loftyplumed: [11]
incantations: [11]
droopeth: [11]
capture: [11]
unchain: [11]
spelling: [11]
circe: [11]
plaguing: [11]
banning: [11, 42]
enchantress: [11]
gazes: [11, 18, 24, 37, 42]
reverent: [11, 12, 14, 17, 22]
whosoeer: [11, 13, 26]
al

unwares: [13]
warwicks: [13, 26]
bereaved: [13, 17]
stoutly: [13, 19, 23]
foemans: [13]
butcherly: [13]
erroneous: [13, 26]
ruthful: [13, 39, 40]
misthink: [13]
rued: [13]
bemoaned: [13]
sadhearted: [13]
overgone: [13]
glued: [13, 36, 39, 42]
tough: [13, 17, 18, 36]
commixtures: [13]
impairing: [13]
strengthning: [13]
misproud: [13]
scorched: [13, 16, 20]
luckless: [13]
plaints: [13, 19, 25]
cureless: [13, 19, 30]
effuse: [13]
gust: [13, 19, 28, 37, 38, 41]
argosy: [13, 30, 33]
hewing: [13]
spray: [13]
oershades: [13, 37]
didand: [13]
pitiedst: [13]
issuing: [13, 27, 30, 39]
stifle: [13, 16, 21, 42]
unstaunched: [13]
gamekeepers: [13]
thickgrown: [13]
laund: [13, 36, 42]
culling: [13, 27]
crossbow: [13]
wishful: [13]
brinish: [13, 19, 39]
inferreth: [13]
wentst: [13, 27, 29]
obeying: [13, 14, 40]
repossess: [13]
plies: [13, 23, 30, 39]
cursy: [13, 30]
accords: [13, 29, 35]
bluntest: [13]
incomparable: [13, 14, 19, 33, 38]
concubine: [13]
ghostly: [13, 21, 27]
shrift: [13, 21, 23, 26, 2

brooked: [15, 26, 33]
chew: [15, 21]
villager: [15, 36]
chidden: [15, 23, 35, 40]
calphurnias: [15]
sleekheaded: [15]
fatter: [15]
liable: [15, 16, 18, 24, 36]
observer: [15, 21]
ashouting: [15]
shouted: [15]
puttingby: [15]
hooted: [15, 37]
nightcaps: [15]
swooned: [15, 27, 37]
tagrag: [15]
forgave: [15, 30]
pulling: [15, 31, 35]
obscurely: [15, 19]
glanced: [15, 29, 31]
rived: [15]
knotty: [15, 34]
foam: [15, 19, 38]
incenses: [15]
remained: [15, 19, 29, 37]
unscorched: [15]
glazed: [15, 25, 28]
annoying: [15]
noonday: [15]
shrieking: [15, 32, 34, 40]
prodigies: [15, 16, 39, 42]
conjointly: [15, 16]
strangedisposed: [15]
menace: [15, 26, 27]
submitting: [15]
bared: [15, 21]
gliding: [15]
preformed: [15]
qualitywhy: [15]
airless: [15]
retentive: [15, 38]
rubbish: [15, 25]
illuminate: [15]
fleering: [15]
noblestminded: [15]
honorabledangerous: [15]
friendcinna: [15]
praetors: [15]
alchemy: [15, 28]
conceited: [15, 19, 31, 37, 41]
daylucius: [15]
disjoins: [15]
climberupward: [15]
attai

germens: [17, 20]
rainwater: [17]
pities: [17, 26, 35, 37, 39]
rumble: [17]
subscription: [17]
highengendered: [17]
headpiece: [17, 37]
codpiece: [17, 21, 22, 35, 37]
louse: [17, 40]
gallow: [17]
bursts: [17]
pudder: [17]
undivulged: [17, 20]
unwhipped: [17]
guilts: [17]
concealing: [17, 19, 33, 35]
summoners: [17]
sinning: [17]
hovel: [17]
inreturn: [17]
myselfwhere: [17]
raineth: [17, 41, 42]
boycome: [17]
malt: [17]
tutors: [17, 18, 33, 34, 38]
heretics: [17, 27]
cutpurses: [17, 36]
losesno: [17]
contentious: [17, 34, 36]
invades: [17]
ponder: [17]
firstyou: [17]
houseless: [17]
unfed: [17]
looped: [17]
raggedness: [17]
superflux: [17]
grumble: [17, 33]
ford: [17, 31]
whirlpool: [17]
pew: [17]
trotting: [17, 18, 36]
fourinched: [17]
acold: [17]
whirlwinds: [17, 19, 33]
starblasting: [17]
vexes: [17]
thereand: [17]
pendulous: [17]
pillicock: [17]
alow: [17, 36]
loo: [17, 40]
outparamoured: [17]
greediness: [17, 26, 37, 38]
silks: [17, 29, 30, 40]
brothels: [17]
plackets: [17, 18, 37]

hyperboles: [18, 40]
affectation: [18]
pedanticalthese: [18]
maggot: [18, 20]
glovehow: [18]
yeas: [18]
kersey: [18, 21, 33]
noes: [18]
reject: [18]
itrosaline: [18]
remit: [18, 21, 39]
aforehand: [18]
carrytale: [18, 42]
pleaseman: [18]
zany: [18]
mumblenews: [18]
trencherknight: [18]
squier: [18]
jesting: [18, 22, 34, 40, 41]
tilting: [18, 23, 24, 29]
partst: [18]
vara: [18]
pursents: [18]
whereuntil: [18]
parfect: [18]
manpompion: [18]
shameproof: [18]
guerrai: [18]
couplement: [18, 28]
novum: [18]
leopards: [18, 25]
sirpompey: [18]
targe: [18]
declares: [18]
alisander: [18]
tendersmelling: [18]
dismayedproceed: [18]
polax: [18]
bowler: [18]
alisanderalas: [18]
oerparted: [18]
acoming: [18]
threeheaded: [18]
canus: [18]
manus: [18]
quoniam: [18]
iscariot: [18]
traitorhow: [18]
citternhead: [18]
pommel: [18]
carvedbone: [18]
flask: [18, 27]
halfcheek: [18]
toothdrawer: [18]
jude: [18]
himjudas: [18]
troyan: [18]
cleantimbered: [18]
lemon: [18]
cloves: [18]
ilion: [18, 19]
columbine: 

laborst: [21]
accommodations: [21]
nursed: [21, 24, 27, 28, 39]
fearst: [21, 41]
exists: [21]
strivst: [21]
forgetst: [21, 28, 34]
ingots: [21, 36]
unloads: [21]
serpigo: [21, 40]
afterdinners: [21, 40]
eld: [21, 31]
welcomelook: [21]
leiger: [21]
fetter: [21, 22, 39, 41]
vastidity: [21]
flowery: [21, 36]
conserve: [21]
outwardsainted: [21]
enew: [21]
fowlis: [21]
prenzie: [21]
damnedst: [21]
perdurably: [21]
obstruction: [21, 41]
kneaded: [21]
reside: [21, 23, 38]
thrilling: [21]
thickribbed: [21]
viewless: [21]
howlingtis: [21]
weariest: [21]
dispenses: [21]
fallible: [21]
farewellprovost: [21]
unlawfully: [21, 23, 26]
advisings: [21]
uprighteously: [21]
affianced: [21]
perished: [21, 34, 36]
combinate: [21]
wellseeming: [21, 27]
relents: [21]
rupture: [21, 24]
forenamed: [21]
plausible: [21]
refer: [21, 23, 37]
advantaged: [21]
doubleness: [21]
lukes: [21, 33]
moated: [21]
grange: [21, 23, 37]
resides: [21, 34, 37, 40]
lambskins: [21]
facing: [21]
sirbless: [21]
picklock: [21]
cause

priestly: [24]
caulked: [24]
bitumed: [24]
mariner: [24, 37]
pothecary: [24, 27]
principals: [24]
infusions: [24]
vegetives: [24]
disturbances: [24]
tottering: [24]
belches: [24]
odor: [24, 28, 36, 37, 41]
entreasured: [24]
mundane: [24]
seamake: [24]
boxes: [24, 27]
cloths: [24, 40]
entranced: [24]
fringes: [24]
richlive: [24]
aesculapius: [24, 31]
litigious: [24]
wondringly: [24]
vowstill: [24]
unscissored: [24]
gentlest: [24, 28]
offercome: [24]
hereaftercome: [24]
fastgrowing: [24]
marinas: [24]
philoten: [24]
weaved: [24, 36]
sleided: [24]
hurting: [24]
contends: [24]
paphos: [24, 34, 42]
darks: [24]
cleons: [24]
prest: [24, 30]
yellows: [24, 33]
blues: [24]
carpet: [24, 25, 41]
whirring: [24]
sorrowing: [24]
marge: [24, 34]
pierces: [24, 34]
sharpens: [24, 30, 36, 40]
stomachcome: [24]
youwalk: [24]
westerly: [24]
southwest: [24, 34]
haling: [24]
clasping: [24, 34]
laddertackle: [24]
canvasclimber: [24]
wolt: [24]
boatswain: [24, 34]
trebles: [24, 26, 34]
imply: [24]
foreshow: [2

capels: [27]
thouwhy: [27]
hazel: [27, 33]
addle: [27, 40]
quarreled: [27]
easter: [27]
consortest: [27]
alla: [27]
stoccato: [27]
ratcatcher: [27]
drybeat: [27]
pilcher: [27]
pagego: [27]
ally: [27]
slandertybalt: [27]
aspired: [27, 39]
mercutios: [27]
beginners: [27]
tilts: [27, 35]
retorts: [27, 40]
agile: [27]
ableeding: [27, 30]
amerce: [27]
fieryfooted: [27]
loveperforming: [27]
untalked: [27]
sobersuited: [27]
stainless: [27, 41]
maidenhoods: [27]
bating: [27]
weraday: [27]
vowel: [27]
deathdarting: [27]
bedaubed: [27]
angelical: [27]
dovefeathered: [27]
wolvishravening: [27]
dissemblers: [27]
aqua: [27, 29, 37, 41, 42]
vitae: [27, 29, 37, 39, 41]
threehours: [27]
needly: [27]
maidenwidowed: [27]
cordscome: [27]
mistermed: [27]
cuttst: [27]
smilest: [27]
sharpground: [27]
absolver: [27]
adversitys: [27]
displant: [27]
reverse: [27, 31, 39]
unmade: [27]
mistlike: [27]
knockwhos: [27]
thereromeo: [27]
takenstay: [27]
awhilestand: [27]
studyby: [27]
bygods: [27]
thisi: [27]
blubbri

whorumfie: [31]
lunatics: [31]
understandings: [31]
genders: [31]
declensions: [31]
pronouns: [31]
quae: [31]
quod: [31, 36]
quaes: [31]
quods: [31]
preeches: [31]
sprag: [31]
hughget: [31]
accoutrement: [31]
lunes: [31, 37]
birding: [31]
kilnhole: [31, 37]
johnunless: [31]
brentford: [31]
thrummed: [31]
toorun: [31]
unfool: [31]
againset: [31]
panderly: [31]
gang: [31]
shamedwhat: [31]
bleaching: [31, 37]
fordmistress: [31]
husbandi: [31]
brazenface: [31]
outcome: [31, 35]
reasonablepluck: [31]
fidelity: [31]
tablesport: [31]
walnut: [31, 33]
refill: [31]
fortunetelling: [31]
daubery: [31]
husbandgood: [31]
pratt: [31]
polecat: [31]
fortunetell: [31]
ittis: [31]
peard: [31]
unpitifully: [31]
meansif: [31]
unvirtuous: [31]
peaten: [31]
herne: [31]
wintertime: [31]
milchkine: [31, 33]
idleheaded: [31]
hernes: [31]
urchins: [31, 34, 39]
aufs: [31]
rattles: [31]
sawpit: [31]
amazedness: [31, 37]
encircle: [31]
fairylike: [31]
dishorn: [31]
taber: [31]
etongo: [31]
tricking: [31]
welllande

unrewarded: [34]
barnacles: [34]
foreheads: [34, 35]
convulsions: [34, 36]
pinchspotted: [34]
carriagehows: [34]
weatherfends: [34]
kindlier: [34]
lakes: [34]
printless: [34]
demipuppets: [34]
mushrumps: [34]
bedimmed: [34]
rifted: [34]
strongbased: [34]
spellstopped: [34]
fellowly: [34]
dropsthe: [34]
dissolves: [34, 35, 42]
reasono: [34]
followst: [34]
deedmost: [34]
furtherer: [34]
sebastianflesh: [34]
expelled: [34]
arttheir: [34]
meariel: [34]
ducal: [34]
discase: [34, 37]
milanquickly: [34]
inhabits: [34, 35, 41]
irreparable: [34]
supportable: [34]
oozy: [34]
lieswhen: [34]
breathbut: [34]
justled: [34]
befitting: [34]
cells: [34]
chess: [34]
eldst: [34]
chalked: [34]
strangersay: [34]
andhow: [34]
notall: [34]
jingling: [34, 36]
diversity: [34]
moping: [34]
infest: [34]
marketable: [34]
flyblowing: [34]
handsomely: [34, 37, 39]
thricedouble: [34]
dearbeloved: [34]
gales: [34]
expeditious: [34]
wellplease: [34]
deceiver: [34]
frees: [34]
indulgence: [34, 40]
pantino: [35]
julia: 

latches: [37]
shovels: [37]
himundone: [37]
irremovable: [37]
mistressfrom: [37]
disjunction: [37]
ruinmarry: [37]
discontenting: [37]
unthoughton: [37]
dedication: [37, 38, 41]
unpathed: [37]
undreamed: [37]
prosperitys: [37]
uponcamillo: [37]
pomander: [37]
pettitoes: [37]
placket: [37, 40]
instantlythou: [37]
unbuckle: [37]
mistresslet: [37]
disliken: [37]
overto: [37]
undescried: [37]
florizells: [37]
comefarewell: [37]
requisite: [37]
connive: [37]
fardel: [37]
rustics: [37]
enfoldings: [37]
toze: [37]
advocates: [37]
pheasant: [37]
sirabout: [37]
hangmanwhich: [37]
sheepwhistling: [37]
stoned: [37]
waspsnest: [37]
hottest: [37, 39]
traitorly: [37]
menwhat: [37]
elsesir: [37]
booties: [37]
courted: [37]
redeemedindeed: [37]
heirless: [37]
lookerson: [37]
holier: [37, 38]
royaltys: [37]
mewho: [37]
successor: [37]
soulvexed: [37]
marryif: [37]
bidst: [37]
princessshe: [37]
behelddesires: [37]
themeshe: [37]
equalled: [37]
forgotyour: [37]
proselytes: [37]
paired: [37]
unfurnish: [3

diomedgo: [40]
stints: [40]
deedless: [40]
vindicative: [40]
disposedthere: [40]
cousingerman: [40]
commixtion: [40]
dexter: [40]
multipotent: [40]
feud: [40]
borrowdst: [40]
neoptolemus: [40]
mirable: [40]
oyez: [40]
hecould: [40]
expectance: [40]
embracementajax: [40]
seld: [40]
expecters: [40]
purely: [40]
biasdrawing: [40]
wellfamed: [40]
untraded: [40]
subduments: [40]
understandst: [40]
himwhether: [40]
whereout: [40]
pleasantly: [40]
stithied: [40]
convive: [40]
concur: [40]
crusty: [40]
idiotworshippers: [40]
gutsgriping: [40]
ruptures: [40]
catarrhs: [40]
lethargies: [40]
palsies: [40]
dirtrotten: [40]
whissing: [40]
sciaticas: [40]
limekilns: [40]
rivelled: [40]
indistinguishable: [40]
immaterial: [40]
sleavesilk: [40]
sarsenet: [40]
flap: [40]
tassel: [40]
gaging: [40]
earwax: [40]
bullthe: [40]
primitive: [40]
oblique: [40]
shoeinghorn: [40]
legto: [40]
lizard: [40]
yondertis: [40]
diomeds: [40]
falsehearted: [40]
leers: [40]
astronomers: [40]
rump: [40]
potato: [40]
cognit

provid: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41]
advis: [1, 2, 3, 5, 6, 7, 8, 9, 10, 12, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 35, 36, 37, 38, 39, 41, 42]
maid: [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41]
god: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]
compos: [1, 2, 3, 7, 10, 20, 22, 24, 28, 34, 35, 36, 40]
beauti: [1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]
yea: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37, 38, 39, 40]
whom: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11

despis: [1, 3, 4, 5, 6, 7, 9, 11, 13, 14, 17, 18, 19, 20, 21, 23, 24, 25, 27, 28, 30, 31, 35, 36, 38, 39, 40, 42]
dank: [1, 8, 15, 19, 27, 31]
dirti: [1, 6, 7, 9, 10, 31, 41]
ground: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 41, 42]
durst: [1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 28, 29, 33, 34, 36, 37, 39, 40]
lacklov: [1]
killcourtesi: [1]
churl: [1, 6, 12, 27, 28, 29, 37, 38, 39]
owe: [1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]
forbid: [1, 2, 3, 5, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 35, 36, 37, 38, 39, 40, 41]
seat: [1, 3, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 19, 20, 21, 24, 25, 26, 28, 30, 36, 38, 39, 40, 41]
charg: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 

brideb: [1, 7]
issu: [1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 34, 36, 37, 38, 39, 40, 42]
blot: [1, 8, 10, 11, 12, 13, 16, 17, 18, 19, 22, 23, 25, 26, 28, 29, 30, 33, 35, 36, 38, 39, 42]
mole: [1, 6, 7, 16, 24, 29, 34, 37, 41]
harelip: [1, 17]
scar: [1, 2, 3, 4, 5, 6, 9, 10, 12, 19, 23, 26, 27, 29, 36, 39, 40]
prodigi: [1, 8, 13, 15, 16, 25, 26, 27, 33, 35, 39, 40, 42]
children: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37, 38, 39, 40]
fielddew: [1]
owner: [1, 2, 4, 7, 8, 12, 16, 19, 22, 24, 28, 29, 30, 31, 36, 38, 39, 40]
blest: [1, 3, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 21, 25, 28, 30, 31, 33, 34, 35, 36, 37, 38, 40]
safeti: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 23, 25, 26, 27, 29, 34, 36, 37, 38, 39, 40, 41]
mend: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 14, 15, 16, 17, 18, 20, 21, 22, 23, 25, 27, 28, 29, 30, 33, 35, 37, 3

unlaw: [2, 3, 11, 21, 23, 26, 29, 37]
guard: [2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 28, 29, 30, 34, 36, 37, 38, 39, 40, 41]
honestest: [2]
antonio: [2, 3, 22, 30, 33, 34, 35, 41]
eldest: [2, 4, 6, 7, 8, 9, 12, 13, 14, 16, 17, 20, 22, 29, 33, 37, 39, 41]
escalu: [2, 21, 27]
frenchman: [2, 6, 7, 10, 11, 12, 22, 30, 31]
plume: [2, 5, 8, 10, 11, 16, 17, 18, 21, 23, 34, 40, 41, 42]
goodlier: [2, 34]
handsom: [2, 3, 7, 9, 14, 22, 23, 24, 26, 27, 31, 34, 36, 37, 39, 40]
yond: [2, 3, 4, 5, 6, 7, 10, 11, 15, 17, 23, 27, 30, 31, 34, 35, 37, 38, 40, 41]
poison: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 36, 37, 38, 39, 41, 42]
rascal: [2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 17, 19, 21, 23, 26, 31, 33, 37, 38, 40, 41]
jackanap: [2, 6, 10, 31]
shrewdli: [2, 7, 10, 15, 22, 37, 40, 41]
vex: [2, 3, 5, 6, 8, 11, 12, 13, 14, 15, 16, 17, 22, 25, 27, 28, 33, 34, 35, 38, 39, 41]
ringcarri: [2]
hos

jolli: [3, 4, 6, 16, 17, 26, 33, 36, 41]
quicksand: [3, 13]
inclip: [3]
worldshar: [3]
cabl: [3, 13, 23, 34]
desist: [3, 9, 24]
pall: [3, 7, 20]
ashoreil: [3]
pledg: [3, 7, 9, 11, 12, 13, 14, 15, 17, 20, 23, 24, 25, 33, 36, 38, 39, 40]
hid: [3, 7, 8, 11, 12, 13, 14, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 33, 34, 36, 38, 39, 41, 42]
wheel: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 17, 19, 21, 22, 23, 27, 29, 34, 35, 37, 39, 40]
alexandrian: [3]
ripen: [3, 9, 10, 11, 21, 22, 25, 26, 27, 39]
fouler: [3, 12, 19, 21, 23, 26, 33]
emperor: [3, 6, 7, 10, 11, 14, 16, 21, 23, 27, 34, 35, 37, 39]
celebr: [3, 7, 11, 14, 20, 23, 24, 25, 34, 36, 37, 41]
batteri: [3, 5, 6, 7, 16, 24, 41]
volley: [3, 7, 16, 35, 42]
vine: [3, 6, 10, 11, 14, 17, 19, 26, 29, 34, 36, 38]
plumpi: [3]
bacchu: [3, 18]
pink: [3, 14, 27, 36]
vat: [3, 31]
morepompey: [3]
graver: [3, 5, 6, 14, 37, 42]
levitygentl: [3]
burnt: [3, 5, 7, 8, 9, 12, 14, 16, 19, 25, 27, 31, 33, 34, 37, 39, 42]
enobarb: [3]
weaker: [3, 4, 

seemst: [4, 6, 8, 19, 25, 27]
inland: [4, 9, 10, 30]
nurtur: [4, 34]
tabl: [4, 5, 6, 7, 9, 13, 14, 16, 18, 20, 21, 24, 26, 27, 28, 29, 30, 31, 33, 34, 35, 37, 38, 40]
inaccess: [4, 34]
bough: [4, 6, 7, 10, 20, 25, 28, 36, 37, 38, 39, 42]
knoll: [4, 20, 36]
sheath: [4, 7, 8, 10, 12, 13, 14, 15, 16, 19, 20, 27, 31, 33, 36, 39, 40, 42]
limp: [4, 10, 25, 27, 28, 30, 33, 38]
bit: [4, 5, 8, 10, 14, 17, 18, 21, 27, 33, 38, 40, 42]
theater: [4, 7, 15, 16, 25]
woeful: [4, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 36, 39, 40, 42]
entranc: [4, 5, 7, 8, 11, 14, 16, 19, 20, 24, 25, 27, 29, 33, 37, 39, 40]
infant: [4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 24, 25, 26, 27, 28, 36, 37, 39, 40, 42]
mewl: [4]
puke: [4]
schoolboy: [4, 5, 11, 15, 18, 22, 27, 35, 36, 37]
satchel: [4]
unwillingli: [4, 14, 30, 34, 35, 37]
furnac: [4, 6, 14, 19, 42]
eyebrow: [4, 37]
cannon: [4, 7, 8, 10, 13, 14, 16, 18, 19, 20, 23, 27, 31, 41]
belli: [4, 5, 8, 9, 13, 17, 18, 21, 24, 30, 31, 33, 37, 38,

unclog: [5]
pule: [5, 27, 35, 40]
junolik: [5]
accident: [5, 7, 15, 18, 19, 21, 29]
centurion: [5]
billet: [5, 21, 23]
edific: [5, 30, 31]
unsepar: [5]
bitterest: [5, 39]
fellest: [5, 40]
interjoin: [5]
birthplac: [5]
servingman: [5, 9, 11, 12, 16, 17, 25, 27, 30, 31, 33, 36, 40, 41]
cotu: [5]
sirha: [5]
porter: [5, 9, 10, 11, 14, 17, 18, 20, 27, 29, 31, 38, 40]
companionspray: [5]
troublesom: [5, 6, 9, 14, 16, 31]
hearth: [5, 31]
marvllou: [5]
batten: [5, 7]
notprithe: [5]
dwellst: [5]
canopi: [5, 6, 7, 9, 13, 14, 15, 19, 27, 28, 33, 41]
kite: [5, 7, 10, 12, 15, 17, 20, 26, 33, 36, 37]
daw: [5, 11, 18, 22, 23, 40, 41]
pratst: [5, 29]
muffler: [5, 10, 31]
unmus: [5]
showst: [5, 17, 28]
particularli: [5, 38]
thankless: [5, 17, 38]
dastard: [5, 11, 12, 13, 25]
notto: [5]
void: [5, 10, 12, 13, 14, 15, 21, 30, 36, 38]
wreak: [5, 27, 38, 39, 42]
maim: [5, 7, 8, 12, 14, 23, 25, 33, 36]
fiend: [5, 6, 7, 8, 9, 10, 11, 12, 16, 17, 20, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 39, 41, 42]


dexter: [7, 8, 19, 27, 31, 40]
incestu: [7, 17]
horatioor: [7]
truant: [7, 8, 11, 14, 18, 22, 28, 29, 31, 35, 40]
truster: [7, 38]
elsinor: [7]
student: [7, 18, 31, 41]
bake: [7, 16, 20, 27, 31, 34, 39, 40]
coldli: [7, 16, 22, 27, 29, 33, 35, 36, 37, 40]
fathermethink: [7]
yesternight: [7, 8, 15, 21, 22, 26, 27, 39, 40]
capapi: [7, 37]
fearsurpris: [7]
truncheon: [7, 8, 9, 12, 14, 21, 40]
jelli: [7, 17, 37]
platform: [7, 11, 13, 23]
beaver: [7, 8, 9, 10, 13, 26, 40]
frowningli: [7]
barnardomarcellu: [7]
sabl: [7, 19, 24, 28, 32]
hitherto: [7, 8, 10, 11, 13, 14, 23, 26]
tenabl: [7]
whatsomev: [7]
spiritin: [7]
primi: [7]
perman: [7]
supplianc: [7]
thew: [7, 9, 15]
besmirch: [7, 10]
unvalu: [7, 26]
circumscrib: [7, 39]
credent: [7, 21, 37]
unmast: [7]
chariest: [7]
unmask: [7, 14, 18, 19, 21, 22]
button: [7, 17, 27, 29, 31, 36]
blastment: [7]
immin: [7, 12, 15, 16, 23, 40]
wari: [7, 11, 15, 21, 23, 27, 28, 37]
watchman: [7, 22, 27, 28]
ungraci: [7, 8, 12, 17, 25, 26, 40, 41]
pastor: [7, 

gaunt: [8, 9, 11, 12, 13, 25]
jesu: [8, 9, 10, 12, 13, 25, 26, 27]
caterpillar: [8, 12, 24, 25, 42]
baconf: [8]
gorbelli: [8]
chuff: [8]
grandjuror: [8]
jure: [8]
equiti: [8, 12, 16, 17]
househ: [8]
barn: [8, 9, 19, 22, 33, 34, 39]
unsort: [8]
lackbrain: [8]
constanta: [8]
frostyspirit: [8]
thisan: [8]
infidel: [8, 25, 26, 30]
sitst: [8]
thickey: [8]
palisado: [8]
parapet: [8]
culverin: [8]
headi: [8, 10]
bestir: [8, 17, 34]
latedisturb: [8]
hest: [8, 18, 34]
gilliam: [8]
butler: [8, 34, 37]
roan: [8, 11, 25]
cropear: [8]
esper: [8, 17, 40]
madhead: [8]
toss: [8, 9, 12, 13, 17, 19, 24, 25, 26, 30, 38]
paraquito: [8]
trifler: [8]
mammet: [8, 27]
tilt: [8, 18, 23, 24, 27, 29, 35]
toogod: [8]
ahorseback: [8]
whereabout: [8, 20]
closer: [8, 13]
loggerhead: [8, 18, 27, 33]
hogshead: [8, 9, 18, 34, 37]
bass: [8, 29, 33, 34, 35, 37]
drawer: [8, 9, 27, 31]
flatli: [8, 16, 30, 33, 42]
corinthian: [8]
boybi: [8]
meand: [8]
scarlet: [8, 9, 11, 14, 19, 25, 27, 28, 31, 33, 38]
profici: [8]
nedto: [

net: [10, 13, 14, 20, 22, 23, 24, 27, 39, 42]
ampli: [10, 34, 40]
imbar: [10]
progenitor: [10, 11, 19]
unwind: [10, 35]
invok: [10, 28]
greatuncl: [10, 11, 12]
forag: [10, 16, 18, 40, 42]
puissant: [10, 12, 13, 15, 17, 26]
thricepuiss: [10]
maymorn: [10]
pavilion: [10, 18, 40]
spiritualti: [10]
invad: [10, 17]
pilfer: [10]
snatcher: [10]
unfurnish: [10, 25, 27, 30, 37, 39]
neighborhood: [10, 38]
impound: [10]
sumless: [10]
treasuri: [10, 12, 17, 25, 37, 39]
congre: [10]
honeybe: [10]
pillag: [10, 11, 12, 19, 39]
knead: [10, 21, 40]
sadey: [10]
executor: [10, 25, 28, 34]
tombless: [10]
tongueless: [10, 25, 26, 37]
sparingli: [10, 26]
galliard: [10, 41]
wrangler: [10, 40]
merriest: [10, 11, 21]
dazzl: [10, 11, 13, 18, 35, 39, 42]
gunston: [10]
ungotten: [10]
wellhallow: [10]
conductfar: [10]
furthranc: [10]
conspiraci: [10, 15, 17, 25, 31, 34, 37]
southampton: [10, 19, 42]
playhous: [10, 14]
trothplight: [10, 37]
herehow: [10]
tyke: [10]
callst: [10, 13, 17, 18, 20, 29, 33, 36, 37, 38, 3

talbotpaus: [11]
boldfac: [11, 42]
shortn: [11]
icaru: [11, 13]
tendr: [11, 18, 25, 26, 39]
dizzyey: [11]
clustr: [11, 34]
overmount: [11]
laughst: [11]
lither: [11]
giglot: [11]
inhears: [11, 28]
nurser: [11]
washford: [11]
waterford: [11]
valenc: [11]
goodrich: [11]
urchinfield: [11]
blackmer: [11]
verdon: [11]
alton: [11]
cromwel: [11, 14]
wingfield: [11]
furniv: [11]
sheffield: [11]
falconbridg: [11, 13, 18, 30]
twoandfifti: [11, 33, 40]
magnifist: [11]
flyblown: [11]
nemesi: [11]
proudest: [11, 12, 13, 14, 22, 26, 28, 31, 33, 39]
upstart: [11, 25]
putrefi: [11, 19, 40]
armagnac: [11]
godli: [11, 23, 31, 40, 41]
imman: [11]
professor: [11, 14, 37]
papal: [11, 14, 16]
coequal: [11]
inship: [11]
parisian: [11]
accomplic: [11, 12]
periapt: [11]
overlong: [11]
condescend: [11]
bloodsacrific: [11]
soulmi: [11]
loftyplum: [11]
incant: [11]
droopeth: [11]
captur: [11]
unchain: [11]
circ: [11, 29]
enchantress: [11]
whosoeer: [11, 13, 26]
allot: [11, 17, 19, 33]
cygnet: [11, 16, 40]
servil:

horridyet: [14]
endsinde: [14]
popedom: [14]
meridian: [14]
asher: [14]
maliceknow: [14]
eagerli: [14, 15]
wholl: [14]
juri: [14, 21]
elsemi: [14]
lifeil: [14]
assent: [14]
ego: [14]
meu: [14]
inscrib: [14]
cassado: [14]
ferrara: [14]
innumer: [14]
praemunir: [14]
tenement: [14, 25]
aripen: [14]
highblown: [14]
weakheart: [14]
naturenot: [14]
mention: [14, 15, 36, 37, 39]
shoal: [14, 20]
aimst: [14]
dunstabl: [14]
ampthil: [14]
kymmalton: [14]
marqu: [14]
demicoron: [14]
collar: [14, 27]
ss: [14]
marshalship: [14]
cinqueport: [14]
indi: [14, 29, 30, 31, 41]
circlet: [14]
abbey: [14, 16, 27, 29, 35]
goodliest: [14, 17, 39]
aros: [14, 15, 29]
woven: [14, 30, 42]
choicest: [14]
whitehal: [14]
stokeley: [14]
howev: [14, 15, 16, 17, 22, 23, 25, 39, 40, 41]
ledst: [14]
leicest: [14, 26]
abbot: [14, 16, 25]
simoni: [14]
untruth: [14, 22, 25, 40]
undoubtedli: [14]
fairspoken: [14]
unfinish: [14, 26, 29, 42]
theegood: [14]
kingdompati: [14]
welland: [14, 31]
avow: [14, 40]
liebut: [14]
decent: 

eleg: [18]
cadenc: [18]
poesycaret: [18]
ovidiu: [18]
naso: [18]
jerk: [18]
imitari: [18]
riderbut: [18]
damosella: [18]
overgl: [18]
mesir: [18]
ben: [18, 33]
venuto: [18, 33]
verba: [18, 31]
pitchpitch: [18]
entereth: [18]
birdbolt: [18, 22, 41]
eyebeam: [18]
onehalf: [18, 20]
shinst: [18]
ridest: [18]
makest: [18, 19, 36]
triumviri: [18]
cornercap: [18]
tyburn: [18]
prose: [18, 41]
exhalst: [18]
vaporvow: [18]
thiscompani: [18]
hidan: [18]
demigod: [18, 21, 30]
oerey: [18]
ambercolor: [18]
saucer: [18]
dayalack: [18]
desirst: [18, 30, 41]
compil: [18, 28]
minstrel: [18, 19, 22, 27, 33, 36]
teen: [18, 26, 27, 34, 42]
gig: [18]
pushpin: [18]
timon: [18, 38]
overview: [18]
adramadio: [18]
liegeand: [18]
turtl: [18, 31, 32, 33, 37, 40]
opn: [18]
eaglesight: [18]
seller: [18]
eboni: [18, 41]
doter: [18]
collier: [18, 27, 41]
overhead: [18]
menatarm: [18]
hesperid: [18, 24]
sphinx: [18]
strung: [18, 35]
promethean: [18, 23]
homeward: [18, 20, 29, 37, 42]
allon: [18]
sati: [18]
sufficit: [

covertli: [22]
dishonesti: [22, 31, 37, 41]
disloyalti: [22, 29]
loveand: [22]
cheapen: [22, 24]
kidfox: [22]
blith: [22, 24, 39]
mo: [22]
leavi: [22]
sitsi: [22]
youyou: [22, 37]
afear: [22]
necessarili: [22]
parlor: [22, 23, 33]
overheardst: [22, 27]
hearsay: [22, 28]
pleasantst: [22]
greedili: [22]
haggard: [22, 23, 33, 41]
newtroth: [22]
selfendear: [22]
illhead: [22]
purchaseth: [22]
reportingli: [22]
clapper: [22, 29]
sadder: [22, 33, 35]
marriagesur: [22]
atalk: [22]
untowardli: [22]
compartn: [22]
desartless: [22]
oatcak: [22]
vagrom: [22]
bidden: [22]
subjectsy: [22]
himmarri: [22]
hawel: [22]
goodnightcom: [22]
vigit: [22]
giddili: [22, 41]
fiveandthirti: [22, 34]
bel: [22]
shaven: [22]
oernight: [22, 35, 38]
rebato: [22]
yourscloth: [22]
underborn: [22]
bluish: [22]
tinsel: [22]
carduu: [22]
benedictu: [22]
prickst: [22]
headborough: [22, 33]
decern: [22]
palabra: [22]
seewel: [22]
vergeswel: [22]
aspici: [22]
suffig: [22]
noncom: [22]
herfriar: [22]
interject: [22]
friarfat

herbsgrac: [27]
unstuf: [27]
uprous: [27]
sallow: [27]
badst: [27, 34]
lovesong: [27]
bowboy: [27]
pricksong: [27]
bosomth: [27]
duelist: [27]
punto: [27]
reverso: [27]
tuner: [27]
pardonm: [27]
fishifi: [27]
petrarch: [27]
laura: [27]
dowdi: [27]
purposesignior: [27]
bonjour: [27, 39]
singlesol: [27]
wildgoos: [27]
drivel: [27]
twoa: [27]
roperi: [27]
flirtgil: [27]
skainsmat: [27]
topgal: [27]
thingo: [27]
versal: [27]
theno: [27]
timespet: [27]
nimblepinion: [27]
windswift: [27]
highmost: [27, 28]
comeso: [27]
nurseo: [27]
shamest: [27]
jaunt: [27]
virtuouswher: [27]
oddli: [27, 30, 34, 40]
repliest: [27]
poultic: [27]
lovedevour: [27]
capel: [27]
thouwhi: [27]
hazel: [27, 33]
addl: [27, 40]
easter: [27]
consortest: [27]
alla: [27]
stoccato: [27]
ratcatch: [27]
drybeat: [27]
pilcher: [27, 41]
pagego: [27]
slandertybalt: [27]
beginn: [27]
agil: [27]
ableed: [27, 30]
amerc: [27]
fieryfoot: [27]
loveperform: [27]
untalk: [27]
sobersuit: [27]
stainless: [27, 41]
weraday: [27]
deathdart:

honi: [31]
soit: [31]
mal: [31]
emrald: [31]
embroideri: [31]
trialfir: [31]
fingerend: [31]
pins: [31]
putter: [31]
fritter: [31]
hodgepud: [31]
flannel: [31]
plummet: [31, 34]
whoa: [31]
lubberli: [31]
postmast: [31]
garcon: [31]
undut: [31]
evit: [31]
irreligi: [31, 39]
eschew: [31]
nightdog: [31]
furthermast: [31]
loudest: [32]
precurr: [32]
obsequy: [32]
deathdivin: [32]
trebled: [32]
threne: [32]
cosuprem: [32]
threno: [32]
minola: [33]
gremio: [33]
hortensio: [33]
litio: [33]
cambio: [33]
vincentio: [33]
tranio: [33]
imperson: [33]
biondello: [33]
grumio: [33]
curti: [33]
phillip: [33]
joseph: [33]
feez: [33, 40]
pallabri: [33]
jeronimi: [33]
merriman: [33]
clowder: [33]
merest: [33]
basin: [33, 38, 39]
rosewat: [33]
bestrew: [33, 34]
ewer: [33, 38]
diaper: [33]
soto: [33]
overey: [33]
butteri: [33]
obeis: [33]
overmerri: [33]
christophero: [33]
drank: [33, 39]
overleath: [33]
cardmak: [33]
transmut: [33]
hacket: [33]
wincot: [33]
bestraught: [33]
semirami: [33, 39]
stud: [33, 4

heartwhat: [37]
queenlo: [37]
throwerout: [37]
fatherblossom: [37]
bundl: [37, 39]
fightinghark: [37]
stairwork: [37]
trunkwork: [37]
behinddoor: [37]
nowwhoahoho: [37]
hilloa: [37]
loa: [37]
ailst: [37]
landbut: [37]
mainmast: [37]
yeast: [37]
shoulderbon: [37]
allboth: [37]
untri: [37]
ancientst: [37]
allayor: [37]
sowhich: [37]
missingli: [37]
removed: [37]
daffodil: [37]
doxi: [37]
pug: [37]
tirralirra: [37]
thrush: [37]
sowskin: [37]
snapperup: [37]
leven: [37]
sheepshear: [37]
ricewhat: [37]
shearer: [37]
hornpip: [37]
warden: [37]
raisin: [37]
writh: [37]
footman: [37, 39]
trollmydam: [37]
apebear: [37]
processserv: [37]
bailiff: [37]
prig: [37]
bearbait: [37, 41]
goodfac: [37]
unrol: [37, 39]
stilea: [37]
milea: [37]
lifeno: [37]
flora: [37]
flaunt: [37]
firerob: [37]
hostessship: [37]
dorcasreverend: [37]
gillyvor: [37]
pied: [37]
dibbl: [37]
lavend: [37]
fairst: [37, 40]
proserpina: [37, 40]
diss: [37]
unmarri: [37]
strengtha: [37]
doricl: [37]
lowborn: [37]
greensward: [37]


decepti: [40]
negat: [40]
bifold: [40]
orifex: [40]
ariachn: [40]
woof: [40]
fivefingerti: [40]
oereaten: [40]
shipmen: [40]
constring: [40]
concupi: [40]
princemi: [40]
fairand: [40]
almond: [40]
commodi: [40]
preciousdear: [40]
oergal: [40]
enrapt: [40]
farewellyet: [40]
phthisic: [40]
twhat: [40]
whoremasterli: [40]
sleeveless: [40]
rascalsthat: [40]
mouseeaten: [40]
dogfox: [40]
ulyssesi: [40]
miracleyet: [40]
polydama: [40]
menon: [40]
margareton: [40]
doreu: [40]
colossuswis: [40]
epistrophu: [40]
cediu: [40]
polyxen: [40]
amphimachu: [40]
thoa: [40]
palamed: [40]
patrocluss: [40]
galath: [40]
strawi: [40]
mower: [40]
noseless: [40]
forceless: [40, 42]
boyquel: [40]
correctiontroilu: [40]
frush: [40]
empal: [40]
doublehorn: [40]
sticklerlik: [40]
halfsup: [40]
bragless: [40]
moldeth: [40]
holddoor: [40]
twelfth: [41]
viola: [41]
messalin: [41]
shipwreck: [41]
illyria: [41]
cesario: [41]
olivia: [41]
tobi: [41]
aguecheek: [41]
malvolio: [41]
fest: [41]
fabian: [41]
orsino: [41]
cu

In [8]:
#Stemmed words with preprocessing and stopwords excluded
print(tokens_words_stemmed)

['midsumm', 'night', 'dream', 'william', 'shakespear', 'edit', 'barbara', 'mowat', 'paul', 'werstin', 'michael', 'poston', 'rebecca', 'nile', 'folger', 'librari', 'creat', 'jul', 'fdt', 'version', 'charact', 'play', 'lover', 'hermia', 'lysand', 'helena', 'demetriu', 'theseu', 'duke', 'athen', 'hippolyta', 'queen', 'amazon', 'egeu', 'father', 'philostr', 'master', 'revel', 'nick', 'bottom', 'weaver', 'peter', 'quinc', 'carpent', 'franci', 'flute', 'bellowsmend', 'tom', 'snout', 'tinker', 'snug', 'joiner', 'robin', 'starvel', 'tailor', 'oberon', 'king', 'fairi', 'titania', 'goodfellow', 'puck', 'hobgoblin', 'servic', 'attend', 'peaseblossom', 'cobweb', 'mote', 'mustardse', 'lord', 'train', 'act', 'scene', 'enter', 'fair', 'nuptial', 'hour', 'draw', 'apac', 'happi', 'day', 'bring', 'anoth', 'moon', 'methink', 'slow', 'thi', 'wane', 'linger', 'desir', 'stepdam', 'dowag', 'wither', 'revenu', 'quickli', 'steep', 'themselv', 'time', 'silver', 'bow', 'newbent', 'heaven', 'behold', 'solemn', 's

In [9]:
#Unstemmed words with preprocessing but stopwords included
print(tokens_words_unstemmed)



## Spelling Correction Query Handling

In [10]:
def correct(word):
    "Find the best spelling correction for this word."
    # Prefer edit distance 0, then 1, then 2; otherwise default to word itself.
    candidates = (known(edits0(word)) or 
                  known(edits1(word)) or 
                  known(edits2(word)) or 
                  [word])
    return max(candidates, key=counts.get)

def known(words):
    "Return the subset of words that are actually in the dictionary."
    return {word for word in words 
                if word in counts}

def edits0(word): 
    "Return all strings that are zero edits away from word (i.e., just word itself)."
    return {word}

def edits2(word):
    "Return all strings that are two edits away from this word."
    return {e2 for e1 in edits1(word) 
                for e2 in edits1(e1)}

def edits1(word):
    "Return all strings that are one edit away from this word."
    pairs      = splits(word)
    deletes    = [a+b[1:]           for (a, b) in pairs if b]
    transposes = [a+b[1]+b[0]+b[2:] for (a, b) in pairs if len(b) > 1]
    replaces   = [a+c+b[1:]         for (a, b) in pairs for c in alphabet if b]
    inserts    = [a+c+b             for (a, b) in pairs for c in alphabet]
    return set(deletes + transposes + replaces + inserts)

def splits(word):
    "Return a list of all possible (first, rest) pairs that comprise word."
    return [(word[:i], word[i:]) 
                for i in range(len(word)+1)]


from string import ascii_lowercase as alphabet

assert alphabet == 'abcdefghijklmnopqrstuvwxyz'

import re
def tokens(text):
    "List all the word tokens (consecutive letters) in a text. Normalize to lowercase."
    return re.findall('[a-z]+', text.lower())

words = tokens_words_unstemmed
counts = unstemmed_dict
print(counts)




In [11]:
phrase_uncorrected = input("Enter Query for Spelling Correction: ")
phrase_corrected = map(correct, tokens(phrase_uncorrected))

print("Original Token, ", "Correct Token", end="\n\n")
print(*zip(tokens(phrase_uncorrected), 
           phrase_corrected), sep="\n")

def correct_text(text):
    "Correct all the words within a text, returning the corrected text."
    return re.sub('[a-zA-Z]+', correct_match, text)

def correct_match(match):
    "Spell-correct word in match, and preserve proper upper/lower/title case."
    word = match.group()
    return case_of(word)(correct(word.lower()))

def case_of(text):
    "Return the case-function appropriate for text: upper, lower, title, or just str."
    return (str.upper if text.isupper() else
            str.lower if text.islower() else
            str.title if text.istitle() else
            str)

print(phrase_uncorrected)
print(correct_text(phrase_uncorrected))

Enter Query for Spelling Correction: speling
Original Token,  Correct Token

('speling', 'spelling')
speling
spelling


In [12]:
phrase_uncorrected_2 = 'Audiance sayzs: spealling is difffucult...'
print(phrase_uncorrected_2)
print(correct_text(correct_text(phrase_uncorrected_2)))

Audiance sayzs: spealling is difffucult...
Audience says: spelling is difficult...


## WildCard Query Handling

In [13]:
#Rotate each word to create the permuterm index
def rotate(str, n):
    return str[n:] + str[:n]

# Create Permuterm Index
with open("permutermindex.txt","w") as f:
#     keys = tokens.keys()
    for token in sorted(tokens_words_unstemmed):
        dkey = token + "$"
        for i in range(len(dkey),0,-1):
            out = rotate(dkey,i)
            f.write(out)
            f.write(" ")
            f.write(token)
            f.write("\n")

In [44]:
# Wildcard Query Types
# 1) X* = $X 
# 2) *X = X$*
# 3) X*Y = Y$X*
# 4) X*Y*Z = (Z$X*) and (Y*)
# 5) *X* = can be converted to X* form 


In [70]:
def querying(query):
    final_result=[]
    queryA_list=[]
    queryB_list=[]
    #final_output=[]
    
    # Split query and determine it's type
    parts = query.split("*")

    final_result.append('Query Processed as:-')
    final_result.append(parts)
    
    #These are the different cases formed depending on the number of wild card characters and their position in our query
    if len(parts)==1:
        case =0
    elif len(parts) == 3:
        case = 4
    elif parts[1] == "":
        case = 1
    elif parts[0] == "":
        case = 2
    elif parts[0] != "" and parts[1] != "":
        case = 3

    #Case 4 is dealt sperately as it has 2 sub queries    
    if case == 4:
        if parts[0] == "":
            case = 1

    # Read Permuterm Index
    permuterm = {}
    with open("permutermindex.txt") as f:
        for line in f:
            temp = line.split()
            permuterm[temp[0]] = temp[1]
    
    #This function will match the prefix of the word/wildcard query to the words in index
    
    def common_words(A,B):
        return set(A).intersection(B)
    
    def prefix_match(term, prefix):
        term_list = []
        for tk in term.keys():
            if tk.startswith(prefix):
                 #final_result.append(tk)     # Permuterm Index where wildcard query is matched
                term_list.append(term[tk])
        return term_list

    #This function is used to process query (ie after prefix match, the word and document is extracted where the prefix match has occured)
    def process_query(query):    
        term_list = prefix_match(permuterm,query)
        #print(term_list)
        final_result.append('Words Matching Wildcard Query:-')
        final_result.append(term_list)
        #final_output.append(term_list)

    #Queries are processed on the bases of their cases
    if case == 0:
        pass
    elif case == 1:
        if (parts[0]==''):
            query = parts[1]
            final_result.append('This is how the query will be processed')
            final_result.append(query)
        else:
            query = "$" + parts[0]
            final_result.append('This is how the query will be processed')
            final_result.append(query)
    elif case == 2:
        query = parts[1] + "$"
        final_result.append('This is how the query will be processed')
        final_result.append(query)
    elif case == 3:
        query = parts[1] + "$" + parts[0]
        final_result.append('This is how the query will be processed')
        final_result.append(query)      
    elif case == 4:
        queryA = parts[2] + "$" + parts[0]
        queryB = parts[1]
        final_result.append('This is how the query will be processed')
        final_result.append([queryA, queryB])

    if case != 4:
        process_query(query)
    elif case == 4:
        
    # query A: Z$X*
        queryA_list = prefix_match(permuterm,queryA)
        final_result.append('This is out List contating the terms which match our desired queryA')
        final_result.append(queryA_list)
    # query B: Y*
        queryB_list= prefix_match(permuterm,queryB)
        final_result.append('This is out List contating the terms which match our desired queryB')
        final_result.append(queryB_list)  
        
    # Intersection of Query A and Query B words
        queryA_and_queryB = common_words(queryA_list,queryB_list)
        final_result.append('This is List contating common term documents for queryA and queryB')
        final_result.append(queryA_and_queryB)
        #final_output.append(queryA_and_queryB)
    
    return(final_result)

In [71]:
final_result= querying(input('Enter wildcard query: '))
final_result


Enter wildcard query: f*k*g


['Query Processed as:-',
 ['f', 'k', 'g'],
 'This is how the query will be processed',
 ['g$f', 'k'],
 'This is out List contating the terms which match our desired queryA',
 ['facing',
  'fading',
  'failing',
  'fainting',
  'fairestboding',
  'fairing',
  'falling',
  'falseboding',
  'falsecreeping',
  'falsespeaking',
  'falsing',
  'faltring',
  'famishing',
  'fang',
  'fanning',
  'faring',
  'farthing',
  'fashioning',
  'fashionmonging',
  'fastfalling',
  'fastgrowing',
  'fasting',
  'fathersnug',
  'fatting',
  'favoring',
  'fawning',
  'fearing',
  'feastfinding',
  'feasting',
  'feebling',
  'feeding',
  'feeling',
  'feigning',
  'felllurking',
  'fencing',
  'festring',
  'fetching',
  'fettering',
  'fig',
  'fighting',
  'figuring',
  'filching',
  'filling',
  'finding',
  'fingering',
  'fingring',
  'firing',
  'fisting',
  'fitting',
  'fixing',
  'flag',
  'flagging',
  'flailing',
  'flaming',
  'flaring',
  'flashing',
  'flatlong',
  'flattering',
  'flattr