In [1]:
import random
import spacy
import json
import shutil
from spacy.util import minibatch, compounding
import zipfile
# Load the pre-trained spaCy model
nlp = spacy.load("en_core_web_sm")

# Create a new blank NER pipeline
ner = nlp.get_pipe("ner")

# Add your own labels to the NER pipeline
ner.add_label('CASE_NUMBER')
ner.add_label('STATUTE')
ner.add_label('GPE')
ner.add_label('COURT')
ner.add_label('OTHER_PERSON')
ner.add_label('PRECEDENT')
ner.add_label('ORG')# Add more labels as needed
ner.add_label('DATE')
ner.add_label('PETITIONER')
ner.add_label('JUDGE')
ner.add_label('WITNESS')
ner.add_label('RESPONDENT')
ner.add_label('PROVISION')

# Load your training data from the JSON file
with open("extracted_data.json") as f:
    training_data = json.load(f)

# Define the number of training iterations
n_iter = 10

# Start the training process
for _ in range(n_iter):
    random.shuffle(training_data)
    losses = {}

    # Create mini-batches
    batches = minibatch(training_data, size=compounding(4.0, 32.0, 1.001))

    # Iterate over the mini-batches
    for batch in batches:
        examples = []
        texts = [data["data"] for data in batch]
        annotations = [data["labels"] for data in batch]
        nlp.update(
            examples,
            drop=0.5,  # Dropout rate
            losses=losses,
        )

# Save the trained model
output_dir = "ner_pretrained_model"
nlp.to_disk(output_dir)
print("Model saved successfully")

# Create a zip file of the model directory
zipfile_path = "ner_pretrained_model.zip"
shutil.make_archive(output_dir, 'zip', output_dir)

# Provide a download link for the zip file
print("Download the trained model:")
print(zipfile_path)

Model saved successfully
Download the trained model:
ner_pretrained_model.zip
