In [1]:
import nltk

In [2]:
simple_sentence = "Paris is the capital of France."
print(simple_sentence)

Paris is the capital of France.


In [3]:
words_in_sentence = nltk.word_tokenize(simple_sentence)
print(words_in_sentence)

['Paris', 'is', 'the', 'capital', 'of', 'France', '.']


In [4]:
parts_of_speech_tags = nltk.pos_tag(words_in_sentence)
print(parts_of_speech_tags)

[('Paris', 'NNP'), ('is', 'VBZ'), ('the', 'DT'), ('capital', 'NN'), ('of', 'IN'), ('France', 'NNP'), ('.', '.')]


*Writing your own tagger*

In [5]:
def Learn_Default_Tagger(simple_sentence):
    words_in_sentence = nltk.word_tokenize(simple_sentence)
    tagger = nltk.DefaultTagger("NN")
    pos_enable_tag = tagger.tag(words_in_sentence)
    print(pos_enable_tag)

In [6]:
def Learn_Re_Tagger(simple_sentence):
    customer_patterns = [
        (r".*ing$", "ADJECTIVE"),
        (r".*ly$", "ADVERB"),
        (r".*ion", "NOUN"),
        (r"(.*ate|.*en|is)$", "VERB"),
        (r"^an$", "INDEFINITE-ARTICLE"),
        (r"^(with|on|at)$", "PREPOSITION"),
        (r"^\-?[0-9]+(\.[0-9]+)$", "NUMBER"),
        (r".*$", None)
    ]
    tagger = nltk.RegexpTagger(customer_patterns)
    words_in_sentence = nltk.word_tokenize(simple_sentence)
    pos_enable_tags = tagger.tag(words_in_sentence)
    print(pos_enable_tags)

In [7]:
def Learn_LookUp_Tagger(simple_sentence):
    mapping = {
        ".": ".",
        "place": "NN",
        "on": "IN",
        "earth": "NN",
        "Mysore": "NNP",
        "is": "VBZ",
        "an": "DT",
        "amazing": "JJ"
    }
    tagger = nltk.UnigramTagger(model= mapping)
    words_in_sentence = nltk.word_tokenize(simple_sentence)
    pos_enable_tag = tagger.tag(words_in_sentence)
    print(pos_enable_tag)

In [8]:
if __name__ == "__main__":
    test_sentence = "Mysore is an amazing place on earth. I have visited Mysore 10 times."
    Learn_Default_Tagger(test_sentence)
    Learn_Re_Tagger(test_sentence)
    Learn_LookUp_Tagger(test_sentence)

[('Mysore', 'NN'), ('is', 'NN'), ('an', 'NN'), ('amazing', 'NN'), ('place', 'NN'), ('on', 'NN'), ('earth', 'NN'), ('.', 'NN'), ('I', 'NN'), ('have', 'NN'), ('visited', 'NN'), ('Mysore', 'NN'), ('10', 'NN'), ('times', 'NN'), ('.', 'NN')]
[('Mysore', None), ('is', 'VERB'), ('an', 'INDEFINITE-ARTICLE'), ('amazing', 'ADJECTIVE'), ('place', None), ('on', 'PREPOSITION'), ('earth', None), ('.', None), ('I', None), ('have', None), ('visited', None), ('Mysore', None), ('10', None), ('times', None), ('.', None)]
[('Mysore', 'NNP'), ('is', 'VBZ'), ('an', 'DT'), ('amazing', 'JJ'), ('place', 'NN'), ('on', 'IN'), ('earth', 'NN'), ('.', '.'), ('I', None), ('have', None), ('visited', None), ('Mysore', 'NNP'), ('10', None), ('times', None), ('.', '.')]
