Show how to use pretrained assertion status

In [None]:
import sys
sys.path.append('../../')

from pyspark.sql import SparkSession
from pyspark.ml import PipelineModel

from sparknlp.annotator import *
from sparknlp.common import *
from sparknlp.base import *
from sparknlp.pretrained import ResourceDownloader

from pathlib import Path

if sys.version_info[0] < 3:
    from urllib import urlretrieve
else:
    from urllib.request import urlretrieve

In [None]:
spark = SparkSession.builder \
    .appName("assertion-status")\
    .master("local[1]")\
    .config("spark.driver.memory","4G")\
    .config("spark.driver.maxResultSize", "2G") \
    .config("spark.driver.extraClassPath", "lib/sparknlp.jar")\
    .getOrCreate()

Create some data for testing purposes

In [None]:
from pyspark.sql import Row
R = Row('sentence', 'start', 'end')
test_data = spark.createDataFrame([R('Sister with stomach cancer .',2,3),
                      R('A thallium stress test showed tachycardia and severe dyspnea',5,5),
                      R('Positive for shortness of breath, no cough',2,4),
                      R('Positive for shortness of breath, no cough',7,7)])

Create some pipelines

In [None]:
import time

documentAssembler = DocumentAssembler() \
    .setInputCol("sentence") \
    .setOutputCol("document")

tokenizer = Tokenizer() \
    .setInputCols(["document"]) \
    .setOutputCol("token")
    
ner_dl = NerDLModel().pretrained() \
    .setInputCols(["document", "token"]) \
    .setOutputCol("ner_dl")

finisher = Finisher() \
    .setInputCols(["ner_dl"]) \
    .setIncludeKeys(True)

pipeline_fast_dl = PipelineModel(stages = [documentAssembler, tokenizer, ner_dl, finisher])

Now let's use these pipelines and see the results

In [None]:
pipeline_fast_dl.transform(test_data).show()