![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Healthcare/19.Financial_Contract_NER.ipynb)

## 19.Finance Contract NER with Chunk Merger

In [2]:
import os

jsl_secret = os.getenv('SECRET')

import sparknlp
sparknlp_version = sparknlp.version()
import sparknlp_jsl
jsl_version = sparknlp_jsl.version()

print (jsl_secret)

In [3]:
import json
import os
from pyspark.ml import Pipeline,PipelineModel
from pyspark.sql import SparkSession

from sparknlp.annotator import *
from sparknlp_jsl.annotator import *
from sparknlp.base import *
from sparknlp.util import *
import sparknlp_jsl
import sparknlp
from pyspark.sql import functions as F

params = {"spark.driver.memory":"16G",
"spark.kryoserializer.buffer.max":"2000M",
"spark.driver.maxResultSize":"2000M"}

spark = sparknlp_jsl.start(jsl_secret,params=params)

print (sparknlp.version())
print (sparknlp_jsl.version())

3.1.2
3.1.2


## Prediction Pipeline

In [4]:
documentAssembler = DocumentAssembler()\
    .setInputCol("text")\
    .setOutputCol("document")

# Sentence Detector annotator, processes various sentences per line

sentenceDetector = SentenceDetector()\
    .setInputCols(["document"])\
    .setOutputCol("sentence")\

# Tokenizer splits words in a relevant format for NLP

tokenizer = Tokenizer()\
    .setInputCols(["sentence"])\
    .setOutputCol("token")

word_embeddings = WordEmbeddingsModel.pretrained("glove_6B_300",'xx')\
    .setInputCols(["sentence", 'token'])\
    .setOutputCol("word_embeddings")\
    .setCaseSensitive(False)

financial_ner_model =MedicalNerModel.pretrained('ner_financial_contract', 'en', 'clinical/models')\
    .setInputCols(["sentence", "token", "word_embeddings"])\
    .setOutputCol("ner")

ner_converter_1 = NerConverter() \
    .setInputCols(["sentence", "token", "ner"]) \
    .setOutputCol("ner_chunk_fin")

onto_embeddings = WordEmbeddingsModel.pretrained("glove_100d",'en')\
    .setInputCols(["sentence", 'token'])\
    .setOutputCol("onto_embeddings")\
    .setCaseSensitive(False)

ner_onto = NerDLModel.pretrained(name='onto_100', lang='en')\
    .setInputCols(["sentence", "token", "onto_embeddings"])\
    .setOutputCol("ner_onto")  

ner_converter_2 = NerConverter() \
    .setInputCols(["sentence", "token", "ner_onto"]) \
    .setOutputCol("ner_chunk_onto")


chunk_merger = ChunkMergeApproach()\
    .setInputCols('ner_chunk_onto', "ner_chunk_fin")\
    .setOutputCol('ner_chunk')

text_pipeline = Pipeline(stages = [
    documentAssembler,
    sentenceDetector,
    tokenizer,
    word_embeddings,
    onto_embeddings,
    financial_ner_model,
    ner_onto,
    ner_converter_1,ner_converter_2,
    chunk_merger    
    ])

empty_df = spark.createDataFrame([['']]).toDF("text")

model_for_text = text_pipeline.fit(empty_df)

glove_6B_300 download started this may take some time.
Approximate size to download 426.2 MB
[OK!]
ner_financial_contract download started this may take some time.
Approximate size to download 14.2 MB
[OK!]
glove_100d download started this may take some time.
Approximate size to download 145.3 MB
[OK!]
onto_100 download started this may take some time.
Approximate size to download 13.5 MB
[OK!]


In [5]:
ner_onto.getStorageRef()

'glove_100d'

In [6]:
financial_ner_model.getStorageRef()

'glove_6B_300'

In [7]:
financial_ner_model.getClasses()

['O', 'I-ORG', 'I-MISC', 'I-PER', 'I-LOC']

In [8]:
text = '''6 AFFIRMATIVE COVENANTS                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
Borrower has good title to the Collateral , free from liens  on 29 November 2018 in Michogan.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
6 . 6 FURTHER ASSURANCES .                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
During the additional time , the failure to cure the default is not an Event of Default ( but no Credit Extensions will be made during the cure period );                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
( d ) Apply to the Obligations any ( i ) balances and deposits of Borrower it holds , or ( ii ) any amount held by Bank owing to or for the credit or the account of Borrower ;                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
Bank ' s appointment as Borrower ' s attorney in fact , and all of Bank of Michigan ' s rights and powers , coupled                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
If Bank complies with reasonable banking practices it is not liable for ( a ) the safekeeping of the Collateral ; ( b ) any loss or damage to the Collateral ; ( c ) any diminution in the value of the Collateral ; or ( d ) any act or default of any carrier , warehouseman , bailee , or other person .                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
If there is a default in any agreement between Borrower and a third party that gives the third party the right to accelerate any Indebtedness exceeding $ 100,000 or that could cause a Material Adverse Change ;                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
13 CONTRACT CLAIMS , TORT CLAIMS , BREACH OF DUTY CLAIMS , AND ALL OTHER COMMON LAW OR STATUTORY CLAIMS .                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
Borrower waives demand, notice of default or dishonor, notice of payment and nonpayment, notice of any default, nonpayment at maturity, release, compromise, settlement, extension, or renewal of accounts, documents, instruments, chattel paper, and guarantees held by Bank on , which Borrower is liable .'''

In [9]:
light_model = LightPipeline(model_for_text)

ann_text = light_model.fullAnnotate(text)


In [10]:
result = light_model.annotate(text)

list(zip(result['token'], result['ner']))

[('6', 'O'),
 ('AFFIRMATIVE', 'O'),
 ('COVENANTS', 'O'),
 ('|', 'O'),
 ('Borrower', 'I-PER'),
 ('has', 'O'),
 ('good', 'O'),
 ('title', 'O'),
 ('to', 'O'),
 ('the', 'O'),
 ('Collateral', 'O'),
 (',', 'O'),
 ('free', 'O'),
 ('from', 'O'),
 ('liens', 'O'),
 ('on', 'O'),
 ('29', 'O'),
 ('November', 'O'),
 ('2018', 'O'),
 ('in', 'O'),
 ('Michogan', 'O'),
 ('.', 'O'),
 ('|', 'O'),
 ('6', 'O'),
 ('.', 'O'),
 ('6', 'O'),
 ('FURTHER', 'O'),
 ('ASSURANCES', 'O'),
 ('.', 'O'),
 ('|', 'O'),
 ('During', 'O'),
 ('the', 'O'),
 ('additional', 'O'),
 ('time', 'O'),
 (',', 'O'),
 ('the', 'O'),
 ('failure', 'O'),
 ('to', 'O'),
 ('cure', 'O'),
 ('the', 'O'),
 ('default', 'O'),
 ('is', 'O'),
 ('not', 'O'),
 ('an', 'O'),
 ('Event', 'O'),
 ('of', 'O'),
 ('Default', 'O'),
 ('(', 'O'),
 ('but', 'O'),
 ('no', 'O'),
 ('Credit', 'O'),
 ('Extensions', 'O'),
 ('will', 'O'),
 ('be', 'O'),
 ('made', 'O'),
 ('during', 'O'),
 ('the', 'O'),
 ('cure', 'O'),
 ('period', 'O'),
 (');', 'O'),
 ('|', 'O'),
 ('(', 'O'),
 ('d'

In [11]:
import pandas as pd

result = light_model.fullAnnotate(text)

ner_df= pd.DataFrame([(int(x.metadata['sentence']), x.result, x.begin, x.end, y.result) for x,y in zip(result[0]["token"], result[0]["ner"])], 
                      columns=['sent_id','token','start','end','ner'])

print('Number of Detected NERs in the given Text is :', ner_df.ner[ner_df.ner!='O'].count())


ner_df

Number of Detected NERs in the given Text is : 12


Unnamed: 0,sent_id,token,start,end,ner
0,0,6,0,0,O
1,0,AFFIRMATIVE,2,12,O
2,0,COVENANTS,14,22,O
3,0,|,1749,1749,O
4,0,Borrower,1751,1758,I-PER
...,...,...,...,...,...
305,11,which,16065,16069,O
306,11,Borrower,16071,16078,I-PER
307,11,is,16080,16081,O
308,11,liable,16083,16088,O


In [12]:
chunks = []
entities = []

for n in result[0]['ner_chunk']:    
    chunks.append(n.result)
    entities.append(n.metadata['entity']) 

df = pd.DataFrame({'ner_chunk':chunks, 'entities':entities})

df.sample(15)

Unnamed: 0,ner_chunk,entities
17,13,CARDINAL
15,third,ORDINAL
16,100000,MONEY
18,Borrower,PER
5,6,CARDINAL
7,Bank,ORG
4,6,CARDINAL
10,Borrower,PER
9,Bank,ORG
1,Borrower,PER


## Highlighting NERs in the Text

In [13]:
from sparknlp_display import NerVisualizer

visualiser = NerVisualizer()

visualiser.display(result[0], label_col='ner_chunk', document_col='document')