In [3]:
def pos_tagger(corpus):
    words = corpus.lower().split()
    tagged = []
    for i, word in enumerate(words):
        tag = "NN" 
        if word.endswith("ing"):
            tag = "VBG" 
        elif word.endswith("ed"):
            tag = "VBD"  
        elif word.endswith("ly"):
            tag = "RB"  
        elif word in ["a", "an", "the"]:
            tag = "DT" 
        elif word in ["i", "you", "he", "she", "it", "we", "they"]:
            tag = "PRP" 
        elif word in ["from", "in", "of"]:
            tag = "PRE" 
        elif word.replace('.', '', 1).isdigit():
            tag = "CD" 
        elif i > 0 and tagged[i-1][1] == "DT" and not word.endswith("ing"): 
            tag = "NN" 
        elif word in ["is", "am", "are", "was", "were", "be", "been", "being"]:
            tag = "VBZ"
        tagged.append((word, tag))
    return tagged

In [16]:
corpus = "The chirping of birds called forth the sun, rising from the horizon in likeness of a phoenix."
pos_tagger(corpus)

[('the', 'DT'),
 ('chirping', 'VBG'),
 ('of', 'PRE'),
 ('birds', 'NN'),
 ('called', 'VBD'),
 ('forth', 'NN'),
 ('the', 'DT'),
 ('sun,', 'NN'),
 ('rising', 'VBG'),
 ('from', 'PRE'),
 ('the', 'DT'),
 ('horizon', 'NN'),
 ('in', 'PRE'),
 ('likeness', 'NN'),
 ('of', 'PRE'),
 ('a', 'DT'),
 ('phoenix.', 'NN')]

In [31]:
def is_noun(word):
    common_nouns = {'fox', 'dog'}
    return word in common_nouns
def is_verb(word):
    common_verbs = {'jumped'}
    return word in common_verbs
def is_adjective(word):
    common_adjectives = {'quick', 'brown', 'lazy'}
    return word in common_adjectives
def is_preposition(word):
    common_prepositions = {'over'}
    return word in common_prepositions

In [35]:
def find_noun_phrases(sentence):
    words = sentence.lower().split()
    noun_phrases = []
    i = 0
    while i < len(words):
        if words[i] in ['the', 'a', 'an']:
            np_words = [words[i]]
            i += 1
            while i < len(words) and (is_adjective(words[i]) or is_noun(words[i])):
                np_words.append(words[i])
                i += 1
            noun_phrases.append(' '.join(np_words))
        else:
            i += 1    
    return noun_phrases

In [37]:
def find_verb_phrases(sentence):
    words = sentence.lower().split()
    verb_phrases = []
    i = 0
    while i < len(words):
        if is_verb(words[i]):
            vp_words = [words[i]]
            i += 1
            while i < len(words) and (is_preposition(words[i]) or words[i] in ['over', 'to', 'for']):
                vp_words.append(words[i])
                i += 1
            verb_phrases.append(' '.join(vp_words))
        else:
            i += 1
    return verb_phrases

In [38]:
sentence = "The quick brown fox jumped over the lazy dog"
print("Noun Phrases:")
noun_phrases = find_noun_phrases(sentence)
for i, np in enumerate(noun_phrases, 1):
    print(f"{i}. {np}")
print("Verb Phrases:")
verb_phrases = find_verb_phrases(sentence)
for i, vp in enumerate(verb_phrases, 1):
    print(f"{i}. {vp}")

Noun Phrases:
1. the quick brown fox
2. the lazy dog
Verb Phrases:
1. jumped over


In [2]:
import nltk
from nltk.tokenize import word_tokenize
nltk.download("punkt")

corpus = "The cat is sitting"

# Tokenize
tokens = word_tokenize(corpus.lower())
vocabulary = sorted(set(tokens))

# Build BoW for each token
bow_vectors = []
for word in tokens:
    vector = [0] * len(vocabulary)
    vector[vocabulary.index(word)] = 1
    bow_vectors.append(vector)

# Print results
print("Vocabulary:", vocabulary)
for i, vector in enumerate(bow_vectors, 1):
    print(f"Token {i} BoW Vector: {vector}")

Vocabulary: ['cat', 'is', 'sitting', 'the']
Token 1 BoW Vector: [0, 0, 0, 1]
Token 2 BoW Vector: [1, 0, 0, 0]
Token 3 BoW Vector: [0, 1, 0, 0]
Token 4 BoW Vector: [0, 0, 1, 0]


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
