# POS Tagging

In [2]:
import spacy 

# Load English tokenizer, tagger, 
spacy.cli.download("en_core_web_sm")
# parser, NER and word vectors 
nlp = spacy.load("en_core_web_sm") 

# Process whole documents 
text = "The quick brown fox jumps over the lazy dog."

doc = nlp(text) 

# Token and Tag 
for token in doc: 
    print(token, token.pos_) 

# You want list of Verb tokens 
print("Verbs:", [token.text for token in doc if token.pos_ == "VERB"]) 


Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m909.4 kB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m36m0:00:01[0mm
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
The DET
quick ADJ
brown ADJ
fox NOUN
jumps VERB
over ADP
the DET
lazy ADJ
dog NOUN
. PUNCT
Verbs: ['jumps']


In [3]:
import nltk
from nltk import word_tokenize, pos_tag
from nltk.corpus import stopwords
nltk.download('averaged_perceptron_tagger')

stop_words = set(stopwords.words('english'))


sentence = "The quick brown fox jumps over the lazy dog."


tokens = word_tokenize(sentence)

print(tokens)
pos_tags = pos_tag(tokens)


print("POS Tags:")
print(pos_tags)

['The', 'quick', 'brown', 'fox', 'jumps', 'over', 'the', 'lazy', 'dog', '.']
POS Tags:
[('The', 'DT'), ('quick', 'JJ'), ('brown', 'NN'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/subhan/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


# Coreference Resolution​

In [4]:
import spacy
import neuralcoref

# Load spaCy model
nlp = spacy.load('en_core_web_sm')

# Add neuralcoref to the pipeline
neuralcoref.add_to_pipe(nlp)

# Example text
text = "John said he would come. He didn't."

# Process the text
doc = nlp(text)

# Print coreferences
if doc._.has_coref:
    for cluster in doc._.coref_clusters:
        print(cluster)


ModuleNotFoundError: No module named 'neuralcoref'

# Data Augmentation

In [None]:
import textattack
from textattack.augmentation import WordNetAugmenter, EmbeddingAugmenter, EasyDataAugmenter, CharSwapAugmenter

# Example text
text = "Text augmentation is essential for improving model performance."

# Using WordNet augmenter (synonym replacement)
augmenter = WordNetAugmenter()
augmented_texts = augmenter.augment(text)
print("WordNet Augmentation:")
for augmented_text in augmented_texts:
    print(augmented_text)

# Using Embedding augmenter (replaces words with their embeddings)
augmenter = EmbeddingAugmenter()
augmented_texts = augmenter.augment(text)
print("\nEmbedding Augmentation:")
for augmented_text in augmented_texts:
    print(augmented_text)

# Using Easy Data Augmentation (includes synonym replacement, insertion, deletion)
augmenter = EasyDataAugmenter()
augmented_texts = augmenter.augment(text)
print("\nEasy Data Augmentation:")
for augmented_text in augmented_texts:
    print(augmented_text)

# Using Character Swap Augmenter (introduces character-level noise)
augmenter = CharSwapAugmenter()
augmented_texts = augmenter.augment(text)
print("\nCharacter Swap Augmentation:")
for augmented_text in augmented_texts:
    print(augmented_text)


ModuleNotFoundError: No module named 'textattack'

# NER

In [None]:
import spacy

# Load the pre-trained spaCy model
nlp = spacy.load("en_core_web_sm")

# Example text
text = "Apple is looking at buying U.K. startup for $1 billion. Jeff Bezos founded Amazon."

# Process the text with spaCy
doc = nlp(text)

# Print detected entities
print("Entities detected in the text:")
for ent in doc.ents:
    print(f"{ent.text} - {ent.label_}")

Entities detected in the text:
Apple - ORG
U.K. - GPE
$1 billion - MONEY
Jeff Bezos - PERSON
Amazon - ORG
