![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)

# Use pretrained `ner_glove_dl` Pipeline

### Spark `2.4` and Spark NLP `???`

 * DocumentAssembler
 * SentenceDetector
 * Tokenizer
 * Glove
 * NerDL


In [1]:
import sys
sys.path.append('../../')

#Spark ML and SQL
from pyspark.ml import Pipeline, PipelineModel
from pyspark.sql.functions import array_contains
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, StringType
#Spark NLP
from sparknlp.annotator import *
from sparknlp.common import RegexRule
from sparknlp.base import DocumentAssembler, Finisher

### Let's create a Spark Session for our app

In [2]:
spark = SparkSession.builder \
    .appName("Ner_Glove")\
    .master("local[*]")\
    .config("spark.driver.memory","8G")\
    .config("spark.driver.maxResultSize", "2G")\
    .config("spark.jars", "/tmp/sparknlp.jar")\
    .config("spark.driver.extraClassPath", "/tmp/sparknlp.jar")\
    .config("spark.executor.extraClassPath", "/tmp/sparknlp.jar")\
    .config("spark.kryoserializer.buffer.max", "500m")\
    .getOrCreate()

In [3]:
spark.version

'2.4.0'

In [4]:
testSents = [
    "Germany is a country in Europe.",
    "The European Comission should take action.",
    "All the views turned to Mr. John Doe."
]

In [5]:
pipeline = PipelineModel.load("../demo_pipelines/entity_recognizer_dl_en_1.8.0_2.4_1552766050145")

In [8]:
from sparknlp.base import LightPipeline
lp = LightPipeline(pipeline)
result = lp.annotate(testSents)
[list(zip(x['token'], x['ner'])) for x in result]

[[('Germany', 'I-LOC'),
  ('is', 'O'),
  ('a', 'O'),
  ('country', 'O'),
  ('in', 'O'),
  ('Europe', 'I-LOC'),
  ('.', 'O')],
 [('The', 'O'),
  ('European', 'I-MISC'),
  ('Comission', 'I-ORG'),
  ('should', 'O'),
  ('take', 'O'),
  ('action', 'O'),
  ('.', 'O')],
 [('All', 'O'),
  ('the', 'O'),
  ('views', 'O'),
  ('turned', 'O'),
  ('to', 'O'),
  ('Mr', 'O'),
  ('.', 'O'),
  ('John', 'I-PER'),
  ('Doe', 'I-PER'),
  ('.', 'O')]]