In [6]:
import nltk
from nltk.corpus import brown
from nltk.tag import hmm
from nltk.probability import LidstoneProbDist
from sklearn.model_selection import train_test_split
from transformers import pipeline

# Ensure necessary NLTK datasets are downloaded
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('brown')
nltk.download('universal_tagset')
nltk.download('maxent_ne_chunker')
nltk.download('words')

# 1. Named Entity Recognition (NER) using Hugging Face Transformers
def perform_ner(sentence):
    ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
    ner_results = ner_pipeline(sentence)
    print("Named Entities (Hugging Face):")
    for entity in ner_results:
        print(f"{entity['word']} -> {entity['entity']}")

# 2. POS tagging using NLTK
def pos_tag_nltk(sentence):
    words = nltk.word_tokenize(sentence)
    pos_tags = nltk.pos_tag(words)
    print("POS Tags:")
    print(pos_tags)

# 3. HMM-based POS tagger
def train_hmm_pos_tagger():
    tagged_sentences = brown.tagged_sents(tagset='universal')
    train_data, test_data = train_test_split(tagged_sentences, test_size=0.2, random_state=42)
    
    trainer = hmm.HiddenMarkovModelTrainer()
    model = trainer.train(train_data, estimator=lambda fd, bins: LidstoneProbDist(fd, 0.1, bins))
    
    return model, test_data

def predict_hmm(model, sentence):
    words = nltk.word_tokenize(sentence)
    predicted_tags = model.tag(words)
    print("HMM POS Tags:")
    print(predicted_tags)

if __name__ == "__main__":
    sentence = "Elon Musk founded SpaceX in 2002 and Tesla Motors in 2003."
    
    # Perform NER
    perform_ner(sentence)
    
    # Perform POS tagging using NLTK
    pos_tag_nltk(sentence)
    
    # Train HMM POS Tagger
    hmm_model, test_data = train_hmm_pos_tagger()
    
    # Predict POS tags using HMM POS Tagger
    predict_hmm(hmm_model, sentence)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Hema\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Hema\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package brown to
[nltk_data]     C:\Users\Hema\AppData\Roaming\nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package universal_tagset to
[nltk_data]     C:\Users\Hema\AppData\Roaming\nltk_data...
[nltk_data]   Package universal_tagset is already up-to-date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     C:\Users\Hema\AppData\Roaming\nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\Hema\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already

Named Entities (Hugging Face):
El -> I-PER
##on -> I-PER
Mu -> I-PER
##sk -> I-PER
Space -> I-ORG
##X -> I-ORG
Te -> I-ORG
##sla -> I-ORG
Motors -> I-ORG
POS Tags:
[('Elon', 'NNP'), ('Musk', 'NNP'), ('founded', 'VBD'), ('SpaceX', 'NNP'), ('in', 'IN'), ('2002', 'CD'), ('and', 'CC'), ('Tesla', 'NNP'), ('Motors', 'NNP'), ('in', 'IN'), ('2003', 'CD'), ('.', '.')]
HMM POS Tags:
[('Elon', 'X'), ('Musk', 'X'), ('founded', 'X'), ('SpaceX', 'X'), ('in', 'ADP'), ('2002', 'NUM'), ('and', 'CONJ'), ('Tesla', 'ADJ'), ('Motors', 'NOUN'), ('in', 'ADP'), ('2003', 'NUM'), ('.', '.')]
