<img src="https://nlp.johnsnowlabs.com/assets/images/logo.png" width="180" height="50" style="float: left;">

In [67]:
from pyspark.sql import SparkSession
from pyspark.ml import Pipeline

from sparknlp.annotator import *
from sparknlp.common import RegexRule
from sparknlp.base import DocumentAssembler, Finisher

In [68]:
import sparknlp
from sparknlp.pretrained import PretrainedPipeline

spark = sparknlp.start()

In [77]:
data = ["Yesterday I lost my blue unikorn .", "Through a note of introduction from Bettina."]

In [70]:
  documentAssembler = DocumentAssembler()\
      .setInputCol("region")\
      .setOutputCol("text")

  tokenizer = Tokenizer()\
    .setInputCols(["text"])\
    .setOutputCol("token")

  ocrspellModel = ContextSpellCheckerModel()\
    .pretrained()\
    .setInputCols(["token"])\
    .setOutputCol("spell_checked")\
    .setTradeoff(10.0)
    
    
  finisher = Finisher()\
    .setInputCols(["spell_checked"])\
    .setValueSplitSymbol("@")


In [None]:
pipeline = Pipeline(stages=[
    documentAssembler,
    tokenizer,
    ocrspellModel,
    finisher
])

empty = spark.sparkContext.parallelize([['empty']]).toDF().toDF("region").cache()

pipeline_model = pipeline.fit(empty)
checked_data.select("finished_spell_checked").show(truncate=False)


In [79]:
from sparknlp.base import LightPipeline
lp = LightPipeline(pipeline_model)
lp.annotate(data)

[{'spell_checked': ['Yesterday', 'I', 'lost', 'my', 'blue', 'uniform', '.']},
 {'spell_checked': ['Through',
   'a',
   'note',
   'of',
   'introduction',
   'from',
   'Bettina',
   '.']}]