<a href="https://colab.research.google.com/github/ManasviAtGitHub/Natural-Language-Processing/blob/main/Part_of_speech_tagging_with_HMM_example_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pomegranate

Collecting pomegranate
  Downloading pomegranate-0.14.8.tar.gz (4.3 MB)
[K     |████████████████████████████████| 4.3 MB 5.1 MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Building wheels for collected packages: pomegranate
  Building wheel for pomegranate (PEP 517) ... [?25l[?25hdone
  Created wheel for pomegranate: filename=pomegranate-0.14.8-cp37-cp37m-linux_x86_64.whl size=15006590 sha256=6910583b98cea931f91fb663f79fb05a5d686cd8cf68bf43be88739db020bb32
  Stored in directory: /root/.cache/pip/wheels/24/68/69/0eaab474ef1d65abedcd47de8a38ab21d221d329954d7edd24
Successfully built pomegranate
Installing collected packages: pomegranate
Successfully installed pomegranate-0.14.8


In [None]:
#Importing the libraries
import nltk
import re
import numpy as np
import matplotlib.pyplot as plt
import pomegranate


In [None]:
# initialize the hidden markov model
model = pomegranate.HiddenMarkovModel()

In [None]:
### emission probabilities
# "determine" emission and state

emission_determine = pomegranate.DiscreteDistribution({"the":0.2,
                                                       "fans":0,
                                                       "watch":0,
                                                       "race":0})
state_determine = pomegranate.State(emission_determine, name = "DT")

# "noun" emission and state

emission_noun = pomegranate.DiscreteDistribution({"the":0,
                                                       "fans":0.1,
                                                       "watch":0.3,
                                                       "race":0.3})
state_noun = pomegranate.State(emission_noun, name = "NN")

# "verb" emission and state

emission_verb = pomegranate.DiscreteDistribution({"the":0,
                                                       "fans":0.2,
                                                       "watch":0.15,
                                                       "race":0.3})
state_verb = pomegranate.State(emission_verb, name = "VB")

In [None]:
#add the states into our hmm model
model.add_states(state_determine, state_noun, state_verb)

In [None]:
#transition probability

#transition prob from "<s>" to "determine", "noun", "verb"
model.add_transition(model.start, state_determine, 0.8)
model.add_transition(model.start, state_noun, 0.2)
model.add_transition(model.start, state_verb, 0)

#transition prob from "determine" to "determine", "noun", "verb"
model.add_transition(state_determine, state_determine, 0)
model.add_transition(state_determine, state_noun, 0.9)
model.add_transition(state_determine, state_verb, 0.1)
                     
#transition prob from "noun" to "determine", "noun", "verb"
model.add_transition(state_noun, state_determine, 0)
model.add_transition(state_noun, state_noun, 0.5)
model.add_transition(state_noun, state_verb, 0.5)

#transition prob from "verb" to "determine", "noun", "verb"
model.add_transition(state_verb, state_determine, 0.5)
model.add_transition(state_verb, state_noun, 0.5)
model.add_transition(state_verb, state_verb, 0)
                     
          
                     
                     

In [None]:
#finalize the model
model.bake()

In [None]:
model.states

[{
     "class" : "State",
     "distribution" : {
         "class" : "Distribution",
         "dtype" : "str",
         "name" : "DiscreteDistribution",
         "parameters" : [
             {
                 "the" : 0.2,
                 "fans" : 0,
                 "watch" : 0,
                 "race" : 0
             }
         ],
         "frozen" : false
     },
     "name" : "DT",
     "weight" : 1.0
 }, {
     "class" : "State",
     "distribution" : {
         "class" : "Distribution",
         "dtype" : "str",
         "name" : "DiscreteDistribution",
         "parameters" : [
             {
                 "the" : 0,
                 "fans" : 0.1,
                 "watch" : 0.3,
                 "race" : 0.3
             }
         ],
         "frozen" : false
     },
     "name" : "NN",
     "weight" : 1.0
 }, {
     "class" : "State",
     "distribution" : {
         "class" : "Distribution",
         "dtype" : "str",
         "name" : "DiscreteDistribution",
         "

In [None]:
text= "The fans watch the race."
print(text)

The fans watch the race.


In [None]:
text = text.lower()
text= re.sub(r"[^a-zA-Z0-9]", " ", text)
text = text.split()
print("preprocessed text: \n", text)

preprocessed text: 
 ['the', 'fans', 'watch', 'the', 'race']


In [None]:
viterbi_likelihood, viterbi_path = model.viterbi(text)

In [None]:
viterbi_path

[(3, {
      "class" : "State",
      "distribution" : null,
      "name" : "None-start",
      "weight" : 1.0
  }), (0, {
      "class" : "State",
      "distribution" : {
          "class" : "Distribution",
          "dtype" : "str",
          "name" : "DiscreteDistribution",
          "parameters" : [
              {
                  "the" : 0.2,
                  "fans" : 0,
                  "watch" : 0,
                  "race" : 0
              }
          ],
          "frozen" : false
      },
      "name" : "DT",
      "weight" : 1.0
  }), (1, {
      "class" : "State",
      "distribution" : {
          "class" : "Distribution",
          "dtype" : "str",
          "name" : "DiscreteDistribution",
          "parameters" : [
              {
                  "the" : 0,
                  "fans" : 0.1,
                  "watch" : 0.3,
                  "race" : 0.3
              }
          ],
          "frozen" : false
      },
      "name" : "NN",
      "weight" : 1.0
  }), (

In [None]:
viterbi_path = [i_state[1].name for i_state in viterbi_path]
print("the most likely sequence: \n", viterbi_path, "\n")

the most likely sequence: 
 ['None-start', 'DT', 'NN', 'VB', 'DT', 'NN'] 



In [None]:
print("with likelihood of (approximate):\n {:.3f}%".format(np.exp(viterbi_likelihood)*100))

with likelihood of (approximate):
 0.003%
