In [1]:
!pip install spacy



## Basic NER using Pre-trained SpaCy Models

In [2]:
# Import SpaCy and load a small pre-trained English model
import spacy

# Load the English NER model
nlp = spacy.load("en_core_web_sm")

# Sample text
text = "Apple Inc. is planning to open a new office in Berlin by 2025."

# Process the text to perform NER
doc = nlp(text)

# Extract and print entities with their labels
for ent in doc.ents:
    print(f"Entity: {ent.text}, Label: {ent.label_}")

# Explanation of labels
spacy.explain("ORG")  # Organization
spacy.explain("GPE")  # Geopolitical entity (like countries, cities)


Entity: Apple Inc., Label: ORG
Entity: Berlin, Label: GPE
Entity: 2025, Label: DATE


'Countries, cities, states'

## Training a Custom NER Model in SpaCy

In [3]:
TRAINING_DATA = [
    ("Apple is releasing the iPhone 15 next month.", {"entities": [(21, 30, "PRODUCT")]}),
    ("Samsung introduced Galaxy Z Fold5 in August.", {"entities": [(18, 34, "PRODUCT")]}),
]

## Training the Custom NER Model

In [4]:
# Step 1: Import necessary libraries
import spacy
from spacy.util import minibatch, compounding
from spacy.training import Example

# Step 2: Load the small English model and enable training
nlp = spacy.load("en_core_web_sm")
ner = nlp.get_pipe("ner")

# Step 3: Add the new entity label (PRODUCT) to the NER pipeline
ner.add_label("PRODUCT")

# Disable other components during training to focus on NER
unaffected_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]

# Start the training process
with nlp.disable_pipes(*unaffected_pipes):
    optimizer = nlp.resume_training()
    for epoch in range(30):  # 30 training iterations
        losses = {}
        batches = minibatch(TRAINING_DATA, size=compounding(4.0, 32.0, 1.001))
        for batch in batches:
            for text, annotations in batch:
                example = Example.from_dict(nlp.make_doc(text), annotations)
                nlp.update([example], drop=0.5, losses=losses)
        print(f"Losses at iteration {epoch}: {losses}")

# Test the trained model
doc = nlp("I recently bought an iPhone 15.")
for ent in doc.ents:
    print(f"Entity: {ent.text}, Label: {ent.label_}")



Losses at iteration 0: {'ner': 4.943846055886369}
Losses at iteration 1: {'ner': 4.435759466957448}
Losses at iteration 2: {'ner': 1.9514614882537085}
Losses at iteration 3: {'ner': 0.8867795169133991}
Losses at iteration 4: {'ner': 0.061301185036473686}
Losses at iteration 5: {'ner': 1.6376555680341738}
Losses at iteration 6: {'ner': 0.04281547645305468}
Losses at iteration 7: {'ner': 0.032576980769488395}
Losses at iteration 8: {'ner': 0.47473649628747466}
Losses at iteration 9: {'ner': 0.15194939416552278}
Losses at iteration 10: {'ner': 4.9973367688475875e-05}
Losses at iteration 11: {'ner': 0.010227832632493014}
Losses at iteration 12: {'ner': 1.886193052287633e-06}
Losses at iteration 13: {'ner': 9.351136902705714e-07}
Losses at iteration 14: {'ner': 2.394752272808079e-06}
Losses at iteration 15: {'ner': 8.671697458970379e-08}
Losses at iteration 16: {'ner': 1.9660556271711235e-05}
Losses at iteration 17: {'ner': 3.0362805797438977e-07}
Losses at iteration 18: {'ner': 0.000178843

# Saving and Loading the Trained Model

In [5]:
# Save the model to disk
nlp.to_disk("custom_ner_model")

# Load the saved model
nlp_custom = spacy.load("custom_ner_model")

# Test the loaded model
doc = nlp_custom("Samsung Galaxy Z Fold5 was just released.")
for ent in doc.ents:
    print(f"Entity: {ent.text}, Label: {ent.label_}")

## Visualizing NER with SpaCy's displacy

In [6]:
from spacy import displacy

# Visualize the entities in the text
doc = nlp("Apple will release iPhone 15 in September.")
displacy.render(doc, style="ent", jupyter=True)  # Use jupyter=True in Jupyter notebooks

