# Pos tagger to recognize a new tag “NNS” associated to plural nouns

In [21]:
from spacy.training.example import Example
import random 
import spacy

In [22]:
TRAIN_DATA = [
    ("Cars are great. They're fast.", {"tags": ["NSS", "VERB", "ADJ", "PUNCT", "PRON", "VERB", "ADV", "PUNCT"]}),
    ("Cats and dogs are pets.", {"tags": ["NSS", "CCONJ", "NSS", "VERB", "NOUN", "PUNCT"]}),
    ("The doors of the house are red.", {"tags": ["DET", "NSS", "ADP", "DET", "NOUN", "VERB", "ADJ", "PUNCT"]}),
    ("I have pillows on my bed.", {"tags": ["PRON", "VERB", "NSS", "ADP", "PRON", "NOUN", "PUNCT"]}),
    ("The kids play with toys.", {"tags": ["DET", "NSS", "VERB", "ADP", "NOUN", "PUNCT"]}),
    ("These books belong to the library.", {"tags": ["DET", "NSS", "VERB", "ADP", "DET", "NOUN", "PUNCT"]}),
    ("The girls and boys are playing.", {"tags": ["DET", "NSS", "CCONJ", "NSS", "VERB", "VERB", "PUNCT"]}),
    ("They brought pizzas for us.", {"tags": ["PRON", "VERB", "NSS", "ADP", "PRON", "PUNCT"]}),
    ("The windows in the house are large.", {"tags": ["DET", "NSS", "ADP", "DET", "NOUN", "VERB", "ADJ", "PUNCT"]}),
    ("The trees and flowers look beautiful.", {"tags": ["DET", "NSS", "CCONJ", "NSS", "VERB", "ADJ", "PUNCT"]}),
    ("These chairs are comfortable.", {"tags": ["DET", "NSS", "VERB", "ADJ", "PUNCT"]}),
    ("She bought dresses for the party.", {"tags": ["PRON", "VERB", "NSS", "ADP", "DET", "NOUN", "PUNCT"]}),
    ("The workers are on strike.", {"tags": ["DET", "NOUN", "VERB", "ADP", "NOUN", "PUNCT"]}),
    ("The apples and oranges are juicy.", {"tags": ["DET", "NSS", "CCONJ", "NSS", "VERB", "ADJ", "PUNCT"]}),
    ("We need pencils and erasers.", {"tags": ["PRON", "VERB", "NSS", "CCONJ", "NSS", "PUNCT"]}),
    ("These cars have good mileage.", {"tags": ["DET", "NSS", "VERB", "ADJ", "NOUN", "PUNCT"]}),
    ("The hats and scarves are in the closet.", {"tags": ["DET", "NSS", "CCONJ", "NSS", "VERB", "ADP", "DET", "NOUN", "PUNCT"]}),
    ("She collects stamps and coins.", {"tags": ["PRON", "VERB", "NSS", "CCONJ", "NSS", "PUNCT"]}),
    ("The students passed their exams.", {"tags": ["DET", "NOUN", "VERB", "DET", "NOUN", "PUNCT"]}),
    ("The doors and windows are locked.", {"tags": ["DET", "NSS", "CCONJ", "NSS", "VERB", "VERB", "PUNCT"]}),
]

In [23]:
nlp = spacy.blank("en")
pos_tagger = nlp.add_pipe("tagger")

tags=["AUX", "PUNCT", "PROPN", "ADJ", "ADP", "NSS", "VERB", "PART", "DET", "NOUN", "ADV","CCONJ", "PRON","NUM"]
for tag in tags:
    pos_tagger.add_label(tag)
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'tagger']

In [24]:
with nlp.disable_pipes(*other_pipes):
    optimizer = nlp.begin_training()
    for i in range(10):
        random.shuffle(TRAIN_DATA)
        for text, annotation in TRAIN_DATA:
            doc = nlp.make_doc(text)
            example = Example.from_dict(doc, annotation)
            nlp.update([example], drop=0.5, losses={})

In [34]:
doc = nlp("I love cats")
for token in doc:
    print(token.text, token.tag_)

I PRON
love VERB
cats NSS


# A ner tagger to recognize new entity label called "TECH" associated to Python

In [68]:
import random
import spacy
from spacy.training import Example

In [None]:
TRAIN_DATA3 = [
    ("Python is a great language for data science.", {"entities": [(0, 6, "TECH")]}),  
    ("I use Python for machine learning projects.", {"entities": [(6, 14, "TECH")]}),  
    ("The Python community is very active.", {"entities": [(4, 10, "TECH")]}),  
    ("Python has libraries like Pandas and NumPy.", {"entities": [(0, 6, "TECH")]}),  
    ("I am coding in Python today.", {"entities": [(15, 21, "TECH")]}),  
    ("My favorite IDE for Python is PyCharm.", {"entities": [(20, 26, "TECH")]}),  
    ("We are building an application in python.", {"entities": [(34, 40, "TECH")]}),  
    ("Python is used for web development with frameworks like Django.", {"entities": [(0, 6, "TECH")]}),  
    ("I have been using Python since 2015.", {"entities": [(18, 24, "TECH")]}),  
    ("The Python syntax is easy to learn.", {"entities": [(4, 10, "TECH")]}),  
    ("Python is a powerful language for automation.", {"entities": [(0, 6, "TECH")]}),  
    ("There are many tutorials available for Python online.", {"entities": [(39, 45, "TECH")]}),  
    ("Python supports multiple programming paradigms.", {"entities": [(0, 6, "TECH")]}),  
    ("Many data scientists prefer python for their analysis.", {"entities": [(28, 34, "TECH")]}), 
    ("Python is one of the most popular programming languages.", {"entities": [(0, 6, "TECH")]}),  
    ("I am writing a script in python to automate tasks.", {"entities": [(25, 31, "TECH")]}),  
    ("Python is a versatile language that can be used for AI.", {"entities": [(0, 6, "TECH")]}), 
    ("I'm learning python to enhance my skills.", {"entities": [(13, 19, "TECH")]}),  
]


In [70]:
nlp_ner = spacy.load("en_core_web_sm")
ner_tagger = nlp_ner.get_pipe("ner")

tags=["TECH"]

for tag in tags:
    ner_tagger.add_label(tag)
other_pipes = [pipe for pipe in nlp_ner.pipe_names if pipe != 'ner']
with nlp_ner.disable_pipes(*other_pipes):
    optimizer2 = nlp_ner.create_optimizer()
    for i in range(10):
        random.shuffle(TRAIN_DATA3)
        for text, annotation in TRAIN_DATA3:
            doc = nlp_ner.make_doc(text)
            example = Example.from_dict(doc, annotation)
            nlp_ner.update([example], sgd=optimizer2)


I'm learning python
('python', 'TECH')
