In [2]:
import nltk
from nltk import pos_tag, word_tokenize
from collections import Counter
import math

def hindle_rooth_analysis(paragraph):

    # Tokenize the paragraph into words and tag each word with its part of speech
    words = word_tokenize(paragraph)
    print(words)
    tagged_words = pos_tag(words)
    print(tagged_words)

    # counters for verb, noun, verb-preposition and noun-preposition pairs
    verb_count = 0
    noun_count = 0
    verb_preposition_count = 0
    noun_preposition_count = 0

    for i in range(len(tagged_words)):

        word, tag = tagged_words[i]

        if tag.startswith('V'):
            verb_count += 1
        elif tag.startswith('N'):
            noun_count += 1

        # Check for prepositions ('IN')
        if tag == 'IN':  
            if i > 0: 
                prev_word, prev_tag = tagged_words[i-1]
                if prev_tag.startswith('V'):
                    verb_preposition_count += 1
                elif prev_tag.startswith('N'):
                    noun_preposition_count += 1
    
    total_pairs = verb_preposition_count + noun_preposition_count
    lambda_value = (total_pairs / (verb_count + noun_count)) * 100
    
    return {
        'verb_count': verb_count,
        'noun_count': noun_count,
        'verb_preposition_count': verb_preposition_count,
        'noun_preposition_count': noun_preposition_count,
        'lambda_value': lambda_value
    }

def determine_result(verb_preposition_count, noun_preposition_count, verb_count, noun_count):
    
    v_a = verb_preposition_count / verb_count # probability of preposition attaching to the verb 
    n_a = noun_preposition_count / noun_count # probability of preposition attaching to the noun
    n_na = 1 - n_a 
    res = (v_a * n_na) / n_a
    res = math.log2(res)
    if res > 0:
        print("Result: Preposition attached with Verb")
    else:
        print("Result: Preposition attached with Noun")

paragraph = "In the park, the squirrel leaped onto the branch. Behind the trees, the cat waited beside the mailbox. At the corner of the street, the ducks swam towards the shore. Beyond the lamppost, the kite soared above the hill."
result = hindle_rooth_analysis(paragraph)

print("Number of verbs:", result['verb_count'])
print("Number of nouns:", result['noun_count'])
print("Number of verb-preposition pairs:", result['verb_preposition_count'])
print("Number of noun-preposition pairs:", result['noun_preposition_count'])
print("Lambda value:", result['lambda_value'])

determine_result(result['verb_preposition_count'], result['noun_preposition_count'], result['verb_count'], result['noun_count'])


['In', 'the', 'park', ',', 'the', 'squirrel', 'leaped', 'onto', 'the', 'branch', '.', 'Behind', 'the', 'trees', ',', 'the', 'cat', 'waited', 'beside', 'the', 'mailbox', '.', 'At', 'the', 'corner', 'of', 'the', 'street', ',', 'the', 'ducks', 'swam', 'towards', 'the', 'shore', '.', 'Beyond', 'the', 'lamppost', ',', 'the', 'kite', 'soared', 'above', 'the', 'hill', '.']
[('In', 'IN'), ('the', 'DT'), ('park', 'NN'), (',', ','), ('the', 'DT'), ('squirrel', 'NN'), ('leaped', 'VBD'), ('onto', 'IN'), ('the', 'DT'), ('branch', 'NN'), ('.', '.'), ('Behind', 'IN'), ('the', 'DT'), ('trees', 'NNS'), (',', ','), ('the', 'DT'), ('cat', 'NN'), ('waited', 'VBD'), ('beside', 'IN'), ('the', 'DT'), ('mailbox', 'NN'), ('.', '.'), ('At', 'IN'), ('the', 'DT'), ('corner', 'NN'), ('of', 'IN'), ('the', 'DT'), ('street', 'NN'), (',', ','), ('the', 'DT'), ('ducks', 'NNS'), ('swam', 'VBP'), ('towards', 'IN'), ('the', 'DT'), ('shore', 'NN'), ('.', '.'), ('Beyond', 'IN'), ('the', 'DT'), ('lamppost', 'NN'), (',', ',')