<strong><h3>Design of PoS tagger using HMM.</h3></strong>

In [None]:
from collections import defaultdict
import nltk
import numpy as np


class PosTagging:
    def __init__(self, train_sent):
        self.transition = defaultdict(int)
        self.emission = defaultdict(int)
        self.tag_set = set()
        self.word_set = set()

        self.train(train_sent)

    def train(self, train_sent):
        for sent in train_sent:
            prev_tag = None
            for word, tag in sent:
                self.transition[(prev_tag, tag)] += 1
                self.emission[(tag, word)] += 1
                self.tag_set.add(tag)
                self.word_set.add(word)
                prev_tag = tag

    def tag(self, sentence):
        tagged_sentence = []
        for word in sentence:
            max_prob = 0
            best_tag = None
            for tag in self.tag_set:
                count_total_tag = sum(v for k, v in self.transition.items() if k[0] == tagged_sentence[-1][1]) if tagged_sentence else 1.0
                transition_prob = self.transition[(tagged_sentence[-1][1], tag)] / count_total_tag if tagged_sentence else 1.0
                emission_prob = self.emission[(tag, word)] / count_total_tag
                prob = transition_prob * emission_prob
                if prob > max_prob:
                    max_prob = prob
                    best_tag = tag
            tagged_sentence.append((word, best_tag))
        return tagged_sentence

#Expamle
train_sent = [[('I', 'PRP'), ('love', 'VBP'), ('natural', 'JJ'), ('language', 'NN'), ('processing', 'NN')]]
test_sents = "I love nautue".split()

hmm_tagger = PosTagging(train_sent)
tags = hmm_tagger.tag(test_sents)
print(tags)


[('I', 'PRP'), ('love', 'VBP'), ('nautue', None)]
