In [None]:
import nltk
from nltk import pos_tag, word_tokenize, FreqDist, bigrams
from nltk.chunk import conlltags2tree, tree2conlltags
from nltk.chunk.regexp import RegexpParser
from nltk.tree import Tree
import matplotlib.pyplot as plt

In [None]:
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker.zip.
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.


True

In [None]:
def get_input_sentences():
    sentences = []
    num_sentences = int(input("Enter the number of sentences: "))
    for _ in range(num_sentences):
        sentence = input("Enter a sentence: ")
        sentences.append(sentence)
    return sentences

# Get sentences from the user
sentences = get_input_sentences()

Enter the number of sentences: 3
Enter a sentence: Hi I am Navya
Enter a sentence: I am pleased to meet you.
Enter a sentence: Hope you are doing well.


In [None]:
tokenized_sentences = [word_tokenize(sentence) for sentence in sentences]
pos_tagged_sentences = [pos_tag(sentence) for sentence in tokenized_sentences]



##POS Tagging

In [None]:
print("POS Tagged Sentences:")
for sentence in pos_tagged_sentences:
    print(sentence)

# Define chunking grammar
grammar = r"""
    NP: {<DT|JJ|NN.*>+}   # Chunk sequences of DT, JJ, NN
    VP: {<VB.*><NP|PP|CLAUSE>+$}  # Chunk verbs and their arguments
    PP: {<IN><NP>}       # Chunk prepositions followed by NP
    CLAUSE: {<NP><VP>}   # Chunk NP, VP
"""

POS Tagged Sentences:
[('Hi', 'NNP'), ('I', 'PRP'), ('am', 'VBP'), ('Navya', 'RB')]
[('I', 'PRP'), ('am', 'VBP'), ('pleased', 'JJ'), ('to', 'TO'), ('meet', 'VB'), ('you', 'PRP'), ('.', '.')]
[('Hope', 'NN'), ('you', 'PRP'), ('are', 'VBP'), ('doing', 'VBG'), ('well', 'RB'), ('.', '.')]


##Chunking

In [None]:
chunk_parser = RegexpParser(grammar)
chunked_sentences = [chunk_parser.parse(sentence) for sentence in pos_tagged_sentences]

# Display chunked sentences
print("\nChunked Sentences:")
for chunked_sentence in chunked_sentences:
    print(chunked_sentence)



Chunked Sentences:
(S (NP Hi/NNP) I/PRP am/VBP Navya/RB)
(S I/PRP am/VBP (NP pleased/JJ) to/TO meet/VB you/PRP ./.)
(S (NP Hope/NN) you/PRP are/VBP doing/VBG well/RB ./.)


##CFG Tree

In [None]:
cfg_trees = [Tree.fromstring(str(chunked_sentence)) for chunked_sentence in chunked_sentences]

# Display CFG trees
print("\nCFG Trees:")
for tree in cfg_trees:
    tree.pretty_print()


CFG Trees:
        S                   
   _____|_______________     
  |     |       |       NP  
  |     |       |       |    
I/PRP am/VBP Navya/RB Hi/NNP

                      S                          
   ___________________|____________________       
  |     |      |      |       |     |      NP    
  |     |      |      |       |     |      |      
I/PRP am/VBP to/TO meet/VB you/PRP ./. pleased/JJ

                    S                        
    ________________|____________________     
   |       |        |        |     |     NP  
   |       |        |        |     |     |    
you/PRP are/VBP doing/VBG well/RB ./. Hope/NN



##Term frequency

In [None]:
all_words = [word for sentence in tokenized_sentences for word in sentence]
freq_dist = FreqDist(all_words)

# Display frequency of each term
print("\nTerm Frequency:")
for word, freq in freq_dist.items():
    print(f"{word}: {freq}")


Term Frequency:
Hi: 1
I: 2
am: 2
Navya: 1
pleased: 1
to: 1
meet: 1
you: 2
.: 2
Hope: 1
are: 1
doing: 1
well: 1


##Highest frequency

In [None]:
most_common_word = freq_dist.most_common(1)[0][0]
print(f"\nWord with Highest Frequency: {most_common_word} ({freq_dist[most_common_word]})")


Word with Highest Frequency: I (2)


##Highly occuring bigram

In [None]:
all_bigrams = list(bigrams(all_words))
bigram_freq = FreqDist(all_bigrams)

print("\nHighly Occurring Bigrams:")
for bigram, freq in bigram_freq.items():
    print(f"{bigram}: {freq}")

# Find and display the bigram with the highest frequency
highest_freq_bigram = max(bigram_freq, key=bigram_freq.get)
print(f"\nBigram with Highest Frequency: {highest_freq_bigram} ({bigram_freq[highest_freq_bigram]})")



Highly Occurring Bigrams:
('Hi', 'I'): 1
('I', 'am'): 2
('am', 'Navya'): 1
('Navya', 'I'): 1
('am', 'pleased'): 1
('pleased', 'to'): 1
('to', 'meet'): 1
('meet', 'you'): 1
('you', '.'): 1
('.', 'Hope'): 1
('Hope', 'you'): 1
('you', 'are'): 1
('are', 'doing'): 1
('doing', 'well'): 1
('well', '.'): 1

Bigram with Highest Frequency: ('I', 'am') (2)
