# POS Tagging

In [1]:
import spacy 

# Load English tokenizer, tagger, 
# parser, NER and word vectors 
nlp = spacy.load("en_core_web_sm") 

# Process whole documents 
text = "The quick brown fox jumps over the lazy dog."

doc = nlp(text) 

# Token and Tag 
for token in doc: 
    print(token, token.pos_) 

# You want list of Verb tokens 
print("Verbs:", [token.text for token in doc if token.pos_ == "VERB"]) 


The DET
quick ADJ
brown ADJ
fox NOUN
jumps VERB
over ADP
the DET
lazy ADJ
dog NOUN
. PUNCT
Verbs: ['jumps']


In [2]:
import nltk
from nltk import word_tokenize, pos_tag
from nltk.corpus import stopwords
nltk.download('averaged_perceptron_tagger')

stop_words = set(stopwords.words('english'))


sentence = "The quick brown fox jumps over the lazy dog."


tokens = word_tokenize(sentence)

print(tokens)
pos_tags = pos_tag(tokens)


print("POS Tags:")
print(pos_tags)

['The', 'quick', 'brown', 'fox', 'jumps', 'over', 'the', 'lazy', 'dog', '.']
POS Tags:
[('The', 'DT'), ('quick', 'JJ'), ('brown', 'NN'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\PMLS\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


# Coreference Resolution​

In [13]:
!pip install neuralcoref
import spacy
import neuralcoref

# Load spaCy model
nlp = spacy.load('en_core_web_sm')

# Add neuralcoref to the pipeline
neuralcoref.add_to_pipe(nlp)

# Example text
text = "John said he would come. He didn't."

# Process the text
doc = nlp(text)

# Print coreferences
if doc._.has_coref:
    for cluster in doc._.coref_clusters:
        print(cluster)


Collecting neuralcoref
  Using cached neuralcoref-4.0.tar.gz (368 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting boto3 (from neuralcoref)
  Obtaining dependency information for boto3 from https://files.pythonhosted.org/packages/cf/39/9244e5e3c7f980ac2136e66fdbe4d5c1e60cf0b98f8f965dc69832f11eb9/boto3-1.34.114-py3-none-any.whl.metadata
  Downloading boto3-1.34.114-py3-none-any.whl.metadata (6.6 kB)
Collecting botocore<1.35.0,>=1.34.114 (from boto3->neuralcoref)
  Obtaining dependency information for botocore<1.35.0,>=1.34.114 from https://files.pythonhosted.org/packages/49/20/61533c2b8044441d1d42011b5d9c418b97d96c626cf71497b6769c09e035/botocore-1.34.114-py3-none-any.whl.metadata
  Downloading botocore-1.34.114-py3-none-any.whl.metadata (5.7 kB)
Collecting s3transfer<0.11.0,>=0.10.0 (from boto3->neuralcoref)
  Obtaining dependency information for s3transfer<0.11.0,>=0.10.0 from https://files.pythonhosted.org/packages/83

  error: subprocess-exited-with-error
  
  × python setup.py bdist_wheel did not run successfully.
  │ exit code: 1
  ╰─> [35 lines of output]
      C:\Users\PMLS\anaconda3\Lib\site-packages\setuptools\__init__.py:84: _DeprecatedInstaller: setuptools.installer and fetch_build_eggs are deprecated.
      !!
      
              ********************************************************************************
              Requirements should be satisfied by a PEP 517 installer.
              If you are using pip, you can try `pip install --use-pep517`.
              ********************************************************************************
      
      !!
        dist.fetch_build_eggs(dist.setup_requires)
      running bdist_wheel
      running build
      running build_py
      creating build
      creating build\lib.win-amd64-cpython-311
      creating build\lib.win-amd64-cpython-311\neuralcoref
      copying neuralcoref\file_utils.py -> build\lib.win-amd64-cpython-311\neuralcoref

ModuleNotFoundError: No module named 'neuralcoref'

# Data Augmentation

In [14]:
!pip install textattack
import textattack
from textattack.augmentation import WordNetAugmenter, EmbeddingAugmenter, EasyDataAugmenter, CharSwapAugmenter

# Example text
text = "Text augmentation is essential for improving model performance."

# Using WordNet augmenter (synonym replacement)
augmenter = WordNetAugmenter()
augmented_texts = augmenter.augment(text)
print("WordNet Augmentation:")
for augmented_text in augmented_texts:
    print(augmented_text)

# Using Embedding augmenter (replaces words with their embeddings)
augmenter = EmbeddingAugmenter()
augmented_texts = augmenter.augment(text)
print("\nEmbedding Augmentation:")
for augmented_text in augmented_texts:
    print(augmented_text)

# Using Easy Data Augmentation (includes synonym replacement, insertion, deletion)
augmenter = EasyDataAugmenter()
augmented_texts = augmenter.augment(text)
print("\nEasy Data Augmentation:")
for augmented_text in augmented_texts:
    print(augmented_text)

# Using Character Swap Augmenter (introduces character-level noise)
augmenter = CharSwapAugmenter()
augmented_texts = augmenter.augment(text)
print("\nCharacter Swap Augmentation:")
for augmented_text in augmented_texts:
    print(augmented_text)


: 

# NER

In [None]:
import spacy

# Load the pre-trained spaCy model
nlp = spacy.load("en_core_web_sm")

# Example text
text = "Apple is looking at buying U.K. startup for $1 billion. Jeff Bezos founded Amazon."

# Process the text with spaCy
doc = nlp(text)

# Print detected entities
print("Entities detected in the text:")
for ent in doc.ents:
    print(f"{ent.text} - {ent.label_}")

Entities detected in the text:
Apple - ORG
U.K. - GPE
$1 billion - MONEY
Jeff Bezos - PERSON
Amazon - ORG
