# Learning how I can utilize spacy

- Spacy, a python lib, to be used in this project to train a model 

# Installation
- Installing spacy using pip
- Installing the pretrained EN NLP model for testing

In [None]:
pip install spacy

python -m spacy download en_core_web_sm

## Importing Spacy and doing some trials

- The bottom cell imports the `spacy` library and loads the English NLP model. It processes a sample text "Apple is looking at buying a UK startup for $1 billion." to create a `doc` object. The code then iterates over the named entities in the `doc` and prints each entity's text and label.

In [None]:
import spacy

# Load the English NLP model
nlp = spacy.load("en_core_web_sm")

# Process a text
doc = nlp("ANGA Hub is looking at buying a Western startup for $1 billion.")

# Extract entities
for ent in doc.ents:
    print(ent.text, ent.label_)

## Training with Spacy
### Training data

In [1]:
import spacy
from spacy.tokens import DocBin
from spacy.training.example import Example
from spacy.util import filter_spans

In [2]:
TRAIN_DATA = [
    ("Pick up the red box from table A and place it on table B at 4 PM.", 
     {"entities": [(12, 19, "OBJECT"), (25, 32, "LOCATION"), (46, 53, "LOCATION"), (57, 61, "TIME")]}),

    ("Move the blue box to table 5 before 10 AM.", 
     {"entities": [(9, 17, "OBJECT"), (21, 28, "LOCATION"), (36, 40, "TIME")]}),

    ("Transport the yellow box to table C at 2:30 PM.", 
     {"entities": [(13, 24, "OBJECT"), (28, 35, "LOCATION"), (39, 45, "TIME")]}),

    ("Shift the small box from table 3 to table 7 by 5:15 PM.", 
     {"entities": [(10, 20, "OBJECT"), (26, 33, "LOCATION"), (37, 44, "LOCATION"), (48, 54, "TIME")]}),

    ("Retrieve the green box from table X and set it up on table Y at noon.", 
     {"entities": [(13, 23, "OBJECT"), (29, 36, "LOCATION"), (54, 61, "LOCATION"), (65, 69, "TIME")]}),

    ("Deliver the big box to table 12 by 3 PM.", 
     {"entities": [(12, 20, "OBJECT"), (24, 31, "LOCATION"), (35, 38, "TIME")]}),

    ("Move the brown box to table B before 8 AM.", 
     {"entities": [(9, 18, "OBJECT"), (22, 29, "LOCATION"), (37, 40, "TIME")]}),

    ("Send the package box to table A at 6:45 AM.", 
     {"entities": [(9, 21, "OBJECT"), (25, 32, "LOCATION"), (36, 42, "TIME")]}),

    ("Take the cardboard box to table D by 11 AM.", 
     {"entities": [(9, 24, "OBJECT"), (28, 35, "LOCATION"), (39, 44, "TIME")]}),

    ("Bring the heavy box from table 6 and place it on table 9 at 7 PM.", 
     {"entities": [(11, 21, "OBJECT"), (27, 34, "LOCATION"), (52, 59, "LOCATION"), (63, 67, "TIME")]}),

    # New Variations
    ("Lift the silver box from shelf 2 and transfer it to rack 5 at 1 PM.",
     {"entities": [(9, 20, "OBJECT"), (26, 33, "LOCATION"), (52, 59, "LOCATION"), (63, 67, "TIME")]}),

    ("Shift the white container from station X to station Y by 4:45 PM.",
     {"entities": [(10, 26, "OBJECT"), (32, 41, "LOCATION"), (45, 54, "LOCATION"), (58, 64, "TIME")]}),

    ("Move the orange crate to bin 3 before noon.",
     {"entities": [(9, 21, "OBJECT"), (25, 30, "LOCATION"), (38, 42, "TIME")]}),

    ("Retrieve the black package from section 7 and drop it at section 12 at 9:30 AM.",
     {"entities": [(13, 27, "OBJECT"), (33, 42, "LOCATION"), (59, 68, "LOCATION"), (72, 79, "TIME")]}),

    ("Deliver the large parcel to counter B before 6 PM.",
     {"entities": [(12, 24, "OBJECT"), (28, 36, "LOCATION"), (44, 48, "TIME")]}),

    ("Pick up the metallic box from desk 4 and set it on shelf 8 at 10:15 AM.",
     {"entities": [(12, 25, "OBJECT"), (31, 37, "LOCATION"), (51, 58, "LOCATION"), (62, 70, "TIME")]}),

    ("Transport the heavy crate to loading bay C at 7:30 PM.",
     {"entities": [(13, 25, "OBJECT"), (29, 42, "LOCATION"), (46, 53, "TIME")]}),

    ("Move the white package from locker 11 and store it in locker 15 by 5 PM.",
     {"entities": [(9, 23, "OBJECT"), (29, 37, "LOCATION"), (56, 64, "LOCATION"), (68, 71, "TIME")]}),

    ("Take the wooden chest to zone A before 8:30 AM.",
     {"entities": [(9, 23, "OBJECT"), (27, 33, "LOCATION"), (41, 48, "TIME")]}),

    ("Send the lightweight box to panel 2 at 11:45 AM.",
     {"entities": [(9, 26, "OBJECT"), (30, 37, "LOCATION"), (41, 49, "TIME")]}),

    ("Retrieve the gold box from section F and move it to table 3 at 2 PM.",
     {"entities": [(13, 21, "OBJECT"), (27, 36, "LOCATION"), (54, 61, "LOCATION"), (65, 68, "TIME")]}),

    ("Transfer the fragile box from shelf 9 and place it on counter 7 at 3:30 PM.",
     {"entities": [(13, 25, "OBJECT"), (31, 38, "LOCATION"), (56, 65, "LOCATION"), (69, 76, "TIME")]}),

    ("Bring the steel package to cabinet 4 before midnight.",
     {"entities": [(11, 26, "OBJECT"), (30, 39, "LOCATION"), (47, 55, "TIME")]}),

    ("Shift the red suitcase from floor 1 to floor 3 at 9 PM.",
     {"entities": [(10, 23, "OBJECT"), (29, 36, "LOCATION"), (40, 47, "LOCATION"), (51, 55, "TIME")]}),

    ("Pick up the gray container from dock A and load it onto truck B at 5:45 PM.",
     {"entities": [(12, 27, "OBJECT"), (33, 39, "LOCATION"), (57, 64, "LOCATION"), (68, 74, "TIME")]}),

    ("Move the lightweight box to conveyor 10 by 8:15 AM.",
     {"entities": [(9, 26, "OBJECT"), (30, 41, "LOCATION"), (45, 51, "TIME")]}),

    ("Transport the sealed package to area Z before 6:30 AM.",
     {"entities": [(13, 27, "OBJECT"), (31, 37, "LOCATION"), (45, 52, "TIME")]}),

    ("Lift the plastic container from bay 5 and place it in bay 9 by 4 PM.",
     {"entities": [(9, 27, "OBJECT"), (33, 38, "LOCATION"), (57, 62, "LOCATION"), (66, 69, "TIME")]}),
]

In [3]:
# Load a blank English model
nlp = spacy.blank("en")

# Add the NER component if it's not already present
if "ner" not in nlp.pipe_names:
    ner = nlp.add_pipe("ner", last=True)
else:
    ner = nlp.get_pipe("ner")

In [4]:
# Add labels to the NER component
for _, annotations in TRAIN_DATA:
    for ent in annotations["entities"]:
        ner.add_label(ent[2])

In [5]:
# Prepare training examples
doc_bin = DocBin()
for text, annotations in TRAIN_DATA:
    doc = nlp.make_doc(text)
    ents = []
    for start, end, label in annotations["entities"]:
        span = doc.char_span(start, end, label=label)
        if span:
            ents.append(span)
    doc.ents = filter_spans(ents)
    doc_bin.add(doc)

In [6]:
# Training loop
nlp.begin_training()
optimizer = nlp.resume_training()

for i in range(30):  # Training for 30 iterations
    losses = {}
    for text, annotations in TRAIN_DATA:
        example = Example.from_dict(nlp.make_doc(text), annotations)
        nlp.update([example], drop=0.3, losses=losses)
    print(f"Iteration {i+1}: Losses - {losses}")



Iteration 1: Losses - {'ner': 101.67150656813548}
Iteration 2: Losses - {'ner': 37.60334010249971}
Iteration 3: Losses - {'ner': 32.16715796887578}
Iteration 4: Losses - {'ner': 39.83000926843897}
Iteration 5: Losses - {'ner': 37.16629142423336}
Iteration 6: Losses - {'ner': 17.3953892072243}
Iteration 7: Losses - {'ner': 13.633921551028623}
Iteration 8: Losses - {'ner': 12.374973254719581}
Iteration 9: Losses - {'ner': 11.464808898951325}
Iteration 10: Losses - {'ner': 16.02827886002787}
Iteration 11: Losses - {'ner': 14.738978374450632}
Iteration 12: Losses - {'ner': 13.269475577507045}
Iteration 13: Losses - {'ner': 9.196134885464357}
Iteration 14: Losses - {'ner': 6.996138207939732}
Iteration 15: Losses - {'ner': 7.794066351309302}
Iteration 16: Losses - {'ner': 2.618262086647651}
Iteration 17: Losses - {'ner': 1.8945431576479757}
Iteration 18: Losses - {'ner': 6.58648641994138}
Iteration 19: Losses - {'ner': 1.8793278869173617}
Iteration 20: Losses - {'ner': 3.9072475739671173}
It

In [7]:
# Save the trained model
nlp.to_disk("ner_GC_model")
print("Model saved successfully!")

Model saved successfully!


## Using Trained Model

In [3]:
import spacy

# Load the trained model
nlp = spacy.load("ner_GC_model")

# Test with a new sentence
sentence = "Pick the ketchup box from Shelf A to the table 3E at 12 PM"
doc = nlp(sentence)

# Print the recognized entities
for ent in doc.ents:
    print(f"{ent.text} - {ent.label_}")

ketchup box - OBJECT
Shelf A - LOCATION
table 3E - LOCATION
