Show how to use pretrained assertion status

In [None]:
import sys
sys.path.append('../../')

from pyspark.sql import SparkSession
from pyspark.ml import PipelineModel

from sparknlp.annotator import *
from sparknlp.common import *
from sparknlp.base import *
from sparknlp.pretrained import ResourceDownloader

from pathlib import Path

if sys.version_info[0] < 3:
    from urllib import urlretrieve
else:
    from urllib.request import urlretrieve

In [None]:
spark = SparkSession.builder \
    .appName("assertion-status")\
    .master("local[*]")\
    .config("spark.driver.memory","6G")\
    .config("spark.driver.maxResultSize", "2G") \
    .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.0")\
    .getOrCreate()

Create some data for testing purposes

In [None]:
from pyspark.sql import Row
R = Row('sentence', 'start', 'end')
test_data = spark.createDataFrame([R('Peter is a good person, and he was working at IBM',0,1)])

Create some pipelines

In [None]:
import time

documentAssembler = DocumentAssembler() \
    .setInputCol("sentence") \
    .setOutputCol("document")

tokenizer = Tokenizer() \
    .setInputCols(["document"]) \
    .setOutputCol("token")

lemmatizer = LemmatizerModel.pretrained() \
    .setInputCols(["token"]) \
    .setOutputCol("lemma")

spell = NorvigSweetingModel.pretrained() \
    .setInputCols(["token"]) \
    .setOutputCol("spell")

ner_dl = NerDLModel().pretrained() \
    .setInputCols(["document", "token"]) \
    .setOutputCol("ner_dl")

finisher = Finisher() \
    .setInputCols(["ner_dl", "lemma", "spell"]) \
    .setIncludeMetadata(True)

pipeline_fast_dl = PipelineModel(stages = [documentAssembler, tokenizer, lemmatizer, spell, ner_dl, finisher])

Now let's use these pipelines and see the results

In [None]:
pipeline_fast_dl.transform(test_data).show(truncate=False)