![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)

<H1> 6. Context Spell Checker - Medical v2.7.0 </H1>

In [0]:
import os
import json
import string
import numpy as np
import pandas as pd

import sparknlp
import sparknlp_jsl
from sparknlp.util import *
from sparknlp.base import *
from sparknlp.annotator import *
from sparknlp_jsl.annotator import *
from sparknlp.pretrained import ResourceDownloader

from pyspark.sql import functions as F
from pyspark.ml import Pipeline, PipelineModel


pd.set_option('max_colwidth', 100)
pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)

print('sparknlp_jsl.version : ',sparknlp_jsl.version())

spark

In [0]:
documentAssembler = DocumentAssembler()\
  .setInputCol("text")\
  .setOutputCol("document")

tokenizer = RecursiveTokenizer()\
  .setInputCols(["document"])\
  .setOutputCol("token")\
  .setPrefixes(["\"", "(", "[", "\n"])\
  .setSuffixes([".", ",", "?", ")","!", "'s"])

spellModel = ContextSpellCheckerModel\
    .pretrained('spellcheck_clinical', 'en', 'clinical/models')\
    .setInputCols("token")\
    .setOutputCol("checked")

In [0]:
pipeline = Pipeline(
    stages = [
    documentAssembler,
    tokenizer,
    spellModel
  ])

empty_ds = spark.createDataFrame([[""]]).toDF("text")

lp = LightPipeline(pipeline.fit(empty_ds))

Ok!, at this point we have our spell checking pipeline as expected. Let's see what we can do with it, see these errors,

_
__Witth__ the __hell__ of __phisical__ __terapy__ the patient was __imbulated__ and on posoperative, the __impatient__ tolerating a post __curgical__ soft diet._

_With __paint__ __wel__ controlled on __orall__ pain medications, she was discharged __too__ __reihabilitation__ __facilitay__._

_She is to also call the __ofice__ if she has any __ever__ greater than 101, or __leeding__ __form__ the surgical wounds._

_Abdomen is __sort__, nontender, and __nonintended__._

_Patient not showing pain or any __wealth__ problems._
            
_No __cute__ distress_

Check that some of the errors are valid English words, only by considering the context the right choice can be made.

In [0]:
example = ["Witth the hell of phisical terapy the patient was imbulated and on posoperative, the impatient tolerating a post curgical soft diet.",
            "With paint wel controlled on orall pain medications, she was discharged too reihabilitation facilitay.",
            "She is to also call the ofice if she has any ever greater than 101, or leeding form the surgical wounds.",
            "Abdomen is sort, nontender, and nonintended.",
            "Patient not showing pain or any wealth problems.",
            "No cute distress"
            
]

for pairs in lp.annotate(example):

  print (list(zip(pairs['token'],pairs['checked'])))

End of Notebook # 6