![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/entity-ruler/EntityRuler_LightPipeline.ipynb)

In [3]:
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash

This notebook showcases serialization and LightPipeline for EntityRuler

In [4]:
import sparknlp
from sparknlp.base import *
from sparknlp.annotator import *
from pyspark.sql import SparkSession

In [None]:
spark = sparknlp.start()

In [6]:
data = spark.createDataFrame([[""]]).toDF("text")

In [7]:
import json

person = [
          {
            "label": "PERSON",
            "patterns": ["Jon", "John", "John Snow", "Jon Snow"]
          },
          {
            "label": "PERSON",
            "patterns": ["Eddard", "Eddard Stark"]
          },
          {
            "label": "LOCATION",
            "patterns": ["Winterfell"]
          },
         ]

with open('./keywords.json', 'w') as jsonfile:
    json.dump(person, jsonfile)

In [8]:
entity_ruler = EntityRulerApproach() \
    .setInputCols(["document"]) \
    .setOutputCol("entity") \
    .setPatternsResource("./keywords.json")

entity_ruler_model = entity_ruler.fit(data)
entity_ruler_model.write().overwrite().save("tmp_entity_ruler_model")

In [9]:
entity_ruler_loaded = EntityRulerModel().load("tmp_entity_ruler_model")

In [10]:
document_assembler = DocumentAssembler().setInputCol("text").setOutputCol("document")

pipeline = Pipeline(stages=[document_assembler, entity_ruler])
pipeline_model = pipeline.fit(data)
light_pipeline = LightPipeline(pipeline_model)

In [11]:
result = light_pipeline.annotate("Lord Eddard Stark was the head of House Stark. John Snow lives in Winterfell.")
print(result)

{'document': ['Lord Eddard Stark was the head of House Stark. John Snow lives in Winterfell.'], 'entity': ['Eddard Stark', 'John Snow', 'Winterfell']}
