From 3c572303035aaec3bcc0609890dfb4ab306d1ac8 Mon Sep 17 00:00:00 2001
From: jsl-models <74001263+jsl-models@users.noreply.github.com>
Date: Thu, 3 Aug 2023 21:02:40 +0700
Subject: [PATCH 01/15] Add model 2023-08-03-finner_bert_subpoenas_sm_en (#493)

Co-authored-by: gadde5300 <gadde5300@gmail.com>
---
 .../2023-08-03-finner_bert_subpoenas_sm_en.md | 165 ++++++++++++++++++
 1 file changed, 165 insertions(+)
 create mode 100644 docs/_posts/gadde5300/2023-08-03-finner_bert_subpoenas_sm_en.md
diff --git a/docs/_posts/gadde5300/2023-08-03-finner_bert_subpoenas_sm_en.md b/docs/_posts/gadde5300/2023-08-03-finner_bert_subpoenas_sm_en.md
new file mode 100644
index 0000000000..58f3acd0dd
--- /dev/null
+++ b/docs/_posts/gadde5300/2023-08-03-finner_bert_subpoenas_sm_en.md
@@ -0,0 +1,165 @@
+---
+layout: model
+title: Financial NER on Subpoenas (Small)
+author: John Snow Labs
+name: finner_bert_subpoenas_sm
+date: 2023-08-03
+tags: [en, bert, finance, subpoena, licensed, tensorflow]
+task: Named Entity Recognition
+language: en
+edition: Finance NLP 1.0.0
+spark_version: 3.0
+supported: true
+engine: tensorflow
+annotator: FinanceBertForTokenClassification
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+This is a financial NER model aimed to extract 19 entities from subpoenas. This is called a small version because it has been trained on more generic labels. The larger versions of this model will be available on models hub.
+
+## Predicted Entities
+
+`COURT`, `APPOINTMENT_DATE`, `DEADLINE_DATE`, `DOCUMENT_DATE_FROM`, `ADDRESS`, `APPOINTMENT_HOUR`, `DOCUMENT_DATE_TO`, `DOCUMENT_PERSON`, `DOCUMENT_DATE_YEAR`, `STATE`, `MATTER_VS`, `CASE`, `COUNTY`, `DOCUMENT_TOPIC`, `MATTER`, `SUBPOENA_DATE`, `SIGNER`, `RECEIVER`, `DOCUMENT_TYPE`
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finner_bert_subpoenas_sm_en_1.0.0_3.0_1691055550911.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finner_bert_subpoenas_sm_en_1.0.0_3.0_1691055550911.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+from pyspark.sql import functions as F
+
+document_assembler = nlp.DocumentAssembler()\
+    .setInputCol("text")\
+    .setOutputCol("document")\
+
+sentence_detector = nlp.SentenceDetector()\
+    .setInputCols(["document"])\
+    .setOutputCol("sentence")\
+
+tokenizer = nlp.Tokenizer() \
+    .setInputCols(["sentence"]) \
+    .setOutputCol("token")
+
+ner_model = finance.BertForTokenClassification.pretrained("finner_bert_subpoenas_sm", "en", "finance/models")\
+    .setInputCols(["sentence", "token"])\
+    .setOutputCol("ner")\
+    .setCaseSensitive(True)\
+    .setMaxSentenceLength(512)
+
+ner_converter = nlp.NerConverter()\
+    .setInputCols(["sentence", "token", "ner"])\
+    .setOutputCol("ner_chunk")
+
+pipeline =  nlp.Pipeline(stages=[
+    document_assembler,
+    sentence_detector,
+    tokenizer,
+    ner_model,
+    ner_converter
+])
+
+
+empty_data = spark.createDataFrame([[""]]).toDF("text")
+
+model = pipeline.fit(empty_data)
+
+text = """In addition , in an earlier motion for summary disposition in which all Respondents joined , and which this Court denied in its Order of April30 , 2013 , Respondent Deloitte Touche Tohmatsu Certified Public Accountants Ltd ."""
+data = spark.createDataFrame([[text]]).toDF("text")
+
+result = model.transform(data)
+
+result.select(F.explode(F.arrays_zip('ner_chunk.result', 'ner_chunk.metadata')).alias("cols")) \
+          .select(F.expr("cols['0']").alias("chunk"),
+                       F.expr("cols['1']['entity']").alias("label")).show(50, truncate = False)
+```
+
+</div>
+
+## Results
+
+```bash
++------------------------+---------------+
+|chunk                   |label          |
++------------------------+---------------+
+|summary disposition     |DOCUMENT_TYPE  |
+|Deloitte Touche Tohmatsu|DOCUMENT_PERSON|
++------------------------+---------------+
+```
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|finner_bert_subpoenas_sm|
+|Compatibility:|Finance NLP 1.0.0+|
+|License:|Licensed|
+|Edition:|Official|
+|Input Labels:|[document, token]|
+|Output Labels:|[ner]|
+|Language:|en|
+|Size:|401.1 MB|
+|Case sensitive:|true|
+|Max sentence length:|128|
+
+## References
+
+In House annotated dataset
+
+## Benchmarking
+
+```bash
+label                      precision    recall  f1-score   support
+             B-COURT       1.00      0.60      0.75        30
+  I-APPOINTMENT_DATE       0.57      0.65      0.60        20
+             I-COURT       0.93      0.89      0.91       166
+  B-APPOINTMENT_DATE       0.67      0.44      0.53         9
+     I-DEADLINE_DATE       0.83      0.26      0.40        19
+B-DOCUMENT_DATE_FROM       0.80      1.00      0.89        16
+           I-ADDRESS       0.87      0.94      0.90      1046
+  B-APPOINTMENT_HOUR       0.43      0.92      0.59        13
+  B-DOCUMENT_DATE_TO       0.88      1.00      0.93         7
+  I-APPOINTMENT_HOUR       1.00      0.15      0.26        20
+   B-DOCUMENT_PERSON       0.79      0.84      0.82      2919
+B-DOCUMENT_DATE_YEAR       0.00      0.00      0.00         5
+             B-STATE       0.59      0.79      0.68        24
+         I-MATTER_VS       0.65      0.79      0.71       150
+              I-CASE       0.00      0.00      0.00        11
+            I-COUNTY       0.00      0.00      0.00         0
+    B-DOCUMENT_TOPIC       0.64      0.77      0.70       208
+            B-COUNTY       0.00      0.00      0.00         0
+            B-MATTER       0.85      0.86      0.86       328
+I-DOCUMENT_DATE_FROM       0.87      1.00      0.93        48
+     I-SUBPOENA_DATE       0.56      0.28      0.38        53
+            I-SIGNER       0.56      0.46      0.50        59
+  I-DOCUMENT_DATE_TO       0.83      1.00      0.91        25
+          I-RECEIVER       0.71      0.52      0.60        98
+            B-SIGNER       0.76      0.49      0.59        39
+    I-DOCUMENT_TOPIC       0.83      0.80      0.81       725
+             I-STATE       0.67      0.29      0.40        14
+         B-MATTER_VS       0.78      0.82      0.80       136
+     I-DOCUMENT_TYPE       0.83      0.87      0.85       621
+     B-DEADLINE_DATE       0.00      0.00      0.00         6
+            I-MATTER       0.88      0.82      0.85       479
+     B-DOCUMENT_TYPE       0.87      0.90      0.88      1714
+           B-ADDRESS       0.81      0.83      0.82       101
+     B-SUBPOENA_DATE       0.42      0.28      0.33        18
+              B-CASE       0.91      0.97      0.94       312
+   I-DOCUMENT_PERSON       0.80      0.83      0.81      3672
+          B-RECEIVER       0.76      0.63      0.69        46
+           micro-avg       0.82      0.84      0.83     13157
+           macro-avg       0.66      0.61      0.61     13157
+        weighted-avg       0.82      0.84      0.83     13157
+```
\ No newline at end of file

From 1a7486b6b5ad42dd90403b414318f05a680c606b Mon Sep 17 00:00:00 2001
From: GADDE SAI SHAILESH <gadde5300@gmail.com>
Date: Mon, 7 Aug 2023 23:11:12 +0530
Subject: [PATCH 02/15] Delete subpoenas ner finance

---
 .../2023-08-03-finner_bert_subpoenas_sm_en.md | 165 ------------------
 1 file changed, 165 deletions(-)
 delete mode 100644 docs/_posts/gadde5300/2023-08-03-finner_bert_subpoenas_sm_en.md

diff --git a/docs/_posts/gadde5300/2023-08-03-finner_bert_subpoenas_sm_en.md b/docs/_posts/gadde5300/2023-08-03-finner_bert_subpoenas_sm_en.md
deleted file mode 100644
index 58f3acd0dd..0000000000
--- a/docs/_posts/gadde5300/2023-08-03-finner_bert_subpoenas_sm_en.md
+++ /dev/null
@@ -1,165 +0,0 @@
----
-layout: model
-title: Financial NER on Subpoenas (Small)
-author: John Snow Labs
-name: finner_bert_subpoenas_sm
-date: 2023-08-03
-tags: [en, bert, finance, subpoena, licensed, tensorflow]
-task: Named Entity Recognition
-language: en
-edition: Finance NLP 1.0.0
-spark_version: 3.0
-supported: true
-engine: tensorflow
-annotator: FinanceBertForTokenClassification
-article_header:
-  type: cover
-use_language_switcher: "Python-Scala-Java"
----
-
-## Description
-
-This is a financial NER model aimed to extract 19 entities from subpoenas. This is called a small version because it has been trained on more generic labels. The larger versions of this model will be available on models hub.
-
-## Predicted Entities
-
-`COURT`, `APPOINTMENT_DATE`, `DEADLINE_DATE`, `DOCUMENT_DATE_FROM`, `ADDRESS`, `APPOINTMENT_HOUR`, `DOCUMENT_DATE_TO`, `DOCUMENT_PERSON`, `DOCUMENT_DATE_YEAR`, `STATE`, `MATTER_VS`, `CASE`, `COUNTY`, `DOCUMENT_TOPIC`, `MATTER`, `SUBPOENA_DATE`, `SIGNER`, `RECEIVER`, `DOCUMENT_TYPE`
-
-{:.btn-box}
-<button class="button button-orange" disabled>Live Demo</button>
-<button class="button button-orange" disabled>Open in Colab</button>
-[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finner_bert_subpoenas_sm_en_1.0.0_3.0_1691055550911.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
-[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finner_bert_subpoenas_sm_en_1.0.0_3.0_1691055550911.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
-
-## How to use
-
-
-
-<div class="tabs-box" markdown="1">
-{% include programmingLanguageSelectScalaPythonNLU.html %}
-```python
-from pyspark.sql import functions as F
-
-document_assembler = nlp.DocumentAssembler()\
-    .setInputCol("text")\
-    .setOutputCol("document")\
-
-sentence_detector = nlp.SentenceDetector()\
-    .setInputCols(["document"])\
-    .setOutputCol("sentence")\
-
-tokenizer = nlp.Tokenizer() \
-    .setInputCols(["sentence"]) \
-    .setOutputCol("token")
-
-ner_model = finance.BertForTokenClassification.pretrained("finner_bert_subpoenas_sm", "en", "finance/models")\
-    .setInputCols(["sentence", "token"])\
-    .setOutputCol("ner")\
-    .setCaseSensitive(True)\
-    .setMaxSentenceLength(512)
-
-ner_converter = nlp.NerConverter()\
-    .setInputCols(["sentence", "token", "ner"])\
-    .setOutputCol("ner_chunk")
-
-pipeline =  nlp.Pipeline(stages=[
-    document_assembler,
-    sentence_detector,
-    tokenizer,
-    ner_model,
-    ner_converter
-])
-
-
-empty_data = spark.createDataFrame([[""]]).toDF("text")
-
-model = pipeline.fit(empty_data)
-
-text = """In addition , in an earlier motion for summary disposition in which all Respondents joined , and which this Court denied in its Order of April30 , 2013 , Respondent Deloitte Touche Tohmatsu Certified Public Accountants Ltd ."""
-data = spark.createDataFrame([[text]]).toDF("text")
-
-result = model.transform(data)
-
-result.select(F.explode(F.arrays_zip('ner_chunk.result', 'ner_chunk.metadata')).alias("cols")) \
-          .select(F.expr("cols['0']").alias("chunk"),
-                       F.expr("cols['1']['entity']").alias("label")).show(50, truncate = False)
-```
-
-</div>
-
-## Results
-
-```bash
-+------------------------+---------------+
-|chunk                   |label          |
-+------------------------+---------------+
-|summary disposition     |DOCUMENT_TYPE  |
-|Deloitte Touche Tohmatsu|DOCUMENT_PERSON|
-+------------------------+---------------+
-```
-
-{:.model-param}
-## Model Information
-
-{:.table-model}
-|---|---|
-|Model Name:|finner_bert_subpoenas_sm|
-|Compatibility:|Finance NLP 1.0.0+|
-|License:|Licensed|
-|Edition:|Official|
-|Input Labels:|[document, token]|
-|Output Labels:|[ner]|
-|Language:|en|
-|Size:|401.1 MB|
-|Case sensitive:|true|
-|Max sentence length:|128|
-
-## References
-
-In House annotated dataset
-
-## Benchmarking
-
-```bash
-label                      precision    recall  f1-score   support
-             B-COURT       1.00      0.60      0.75        30
-  I-APPOINTMENT_DATE       0.57      0.65      0.60        20
-             I-COURT       0.93      0.89      0.91       166
-  B-APPOINTMENT_DATE       0.67      0.44      0.53         9
-     I-DEADLINE_DATE       0.83      0.26      0.40        19
-B-DOCUMENT_DATE_FROM       0.80      1.00      0.89        16
-           I-ADDRESS       0.87      0.94      0.90      1046
-  B-APPOINTMENT_HOUR       0.43      0.92      0.59        13
-  B-DOCUMENT_DATE_TO       0.88      1.00      0.93         7
-  I-APPOINTMENT_HOUR       1.00      0.15      0.26        20
-   B-DOCUMENT_PERSON       0.79      0.84      0.82      2919
-B-DOCUMENT_DATE_YEAR       0.00      0.00      0.00         5
-             B-STATE       0.59      0.79      0.68        24
-         I-MATTER_VS       0.65      0.79      0.71       150
-              I-CASE       0.00      0.00      0.00        11
-            I-COUNTY       0.00      0.00      0.00         0
-    B-DOCUMENT_TOPIC       0.64      0.77      0.70       208
-            B-COUNTY       0.00      0.00      0.00         0
-            B-MATTER       0.85      0.86      0.86       328
-I-DOCUMENT_DATE_FROM       0.87      1.00      0.93        48
-     I-SUBPOENA_DATE       0.56      0.28      0.38        53
-            I-SIGNER       0.56      0.46      0.50        59
-  I-DOCUMENT_DATE_TO       0.83      1.00      0.91        25
-          I-RECEIVER       0.71      0.52      0.60        98
-            B-SIGNER       0.76      0.49      0.59        39
-    I-DOCUMENT_TOPIC       0.83      0.80      0.81       725
-             I-STATE       0.67      0.29      0.40        14
-         B-MATTER_VS       0.78      0.82      0.80       136
-     I-DOCUMENT_TYPE       0.83      0.87      0.85       621
-     B-DEADLINE_DATE       0.00      0.00      0.00         6
-            I-MATTER       0.88      0.82      0.85       479
-     B-DOCUMENT_TYPE       0.87      0.90      0.88      1714
-           B-ADDRESS       0.81      0.83      0.82       101
-     B-SUBPOENA_DATE       0.42      0.28      0.33        18
-              B-CASE       0.91      0.97      0.94       312
-   I-DOCUMENT_PERSON       0.80      0.83      0.81      3672
-          B-RECEIVER       0.76      0.63      0.69        46
-           micro-avg       0.82      0.84      0.83     13157
-           macro-avg       0.66      0.61      0.61     13157
-        weighted-avg       0.82      0.84      0.83     13157
-```
\ No newline at end of file

From 1170489ec22e5d0e2807763ae77b70f511ea5f55 Mon Sep 17 00:00:00 2001
From: jsl-models <74001263+jsl-models@users.noreply.github.com>
Date: Wed, 30 Aug 2023 20:51:32 +0700
Subject: [PATCH 03/15] Add model 2023-08-30-finpipe_deid_en (#566)

Co-authored-by: Meryem1425 <vildansarikaya25@gmail.com>
---
 .../Meryem1425/2023-08-30-finpipe_deid_en.md  | 156 ++++++++++++++++++
 1 file changed, 156 insertions(+)
 create mode 100644 docs/_posts/Meryem1425/2023-08-30-finpipe_deid_en.md

diff --git a/docs/_posts/Meryem1425/2023-08-30-finpipe_deid_en.md b/docs/_posts/Meryem1425/2023-08-30-finpipe_deid_en.md
new file mode 100644
index 0000000000..5be19f30ec
--- /dev/null
+++ b/docs/_posts/Meryem1425/2023-08-30-finpipe_deid_en.md
@@ -0,0 +1,156 @@
+---
+layout: model
+title: Financial Deidentification Pipeline
+author: John Snow Labs
+name: finpipe_deid
+date: 2023-08-30
+tags: [licensed, en, finance, deid, deidentification, anonymization]
+task: Pipeline Finance
+language: en
+edition: Finance NLP 1.0.0
+spark_version: 3.0
+supported: true
+annotator: PipelineModel
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+This is a Pretrained Pipeline aimed to deidentify legal and financial documents to be compliant with data privacy regulations as GDPR and CCPA. Since the models used in this pipeline are statistical, make sure you use this model in a human-in-the-loop process to guarantee a 100% accuracy.
+
+You can carry out both masking and obfuscation with this pipeline, on the following entities:
+`ALIAS`, `EMAIL`, `PHONE`, `PROFESSION`, `ORG`, `DATE`, `PERSON`, `ADDRESS`, `STREET`, `CITY`, `STATE`, `ZIP`, `COUNTRY`, `TITLE_CLASS`, `TICKER`, `STOCK_EXCHANGE`, `CFN`, `IRS`
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.0_1693402722551.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.0_1693402722551.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+
+from sparknlp.pretrained import PretrainedPipeline
+
+deid_pipeline = PretrainedPipeline("finpipe_deid", "en", "finance/models")
+
+result = deid_pipeline.annotate("""CARGILL, INCORPORATED
+
+By:     Pirkko Suominen
+
+
+
+Name: Pirkko Suominen Title: Director, Bio Technology Development  Center,  Date:   10/19/2011
+
+BIOAMBER, SAS
+
+By:     Jean-François Huc
+
+
+
+Name: Jean-François Huc  Title: President Date:   October 15, 2011
+
+email : jeanfran@gmail.com
+phone : 18087339090 """)
+
+```
+
+</div>
+
+## Results
+
+```bash
+Masked with entity labels
+------------------------------
+<PARTY>, <PARTY>
+By:     <SIGNING_PERSON>
+Name: <PARTY>: <SIGNING_TITLE>,  Date:   <EFFDATE>
+<PARTY>, <PARTY>
+By:     <SIGNING_PERSON>
+Name: <PARTY>: <SIGNING_TITLE>Date:   <EFFDATE>
+
+email : <EMAIL>
+phone : <PHONE>
+
+Masked with chars
+------------------------------
+[*****], [**********]
+By:     [*************]
+Name: [*******************]: [**********************************]  Center,  Date:   [********]
+[******], [*]
+By:     [***************]
+Name: [**********************]: [*******]Date:   [**************]
+
+email : [****************]
+phone : [********]
+
+Masked with fixed length chars
+------------------------------
+****, ****
+By:     ****
+Name: ****: ****,  Date:   ****
+****, ****
+By:     ****
+Name: ****: ****Date:   ****
+
+email : ****
+phone : ****
+
+Obfuscated
+------------------------------
+MGT Trust Company, LLC., Clarus llc.
+By:     Benjamin Dean
+Name: John Snow Labs Inc: Sales Manager,  Date:   03/08/2025
+Clarus llc., SESA CO.
+By:     JAMES TURNER
+Name: MGT Trust Company, LLC.: Business ManagerDate:   11/7/2016
+
+email : Tyrus@google.com
+phone : 78 834 854
+
+```
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|finpipe_deid|
+|Type:|pipeline|
+|Compatibility:|Finance NLP 1.0.0+|
+|License:|Licensed|
+|Edition:|Official|
+|Language:|en|
+|Size:|475.2 MB|
+
+## Included Models
+
+- DocumentAssembler
+- SentenceDetector
+- TokenizerModel
+- BertEmbeddings
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ChunkMergeModel
+- DeIdentificationModel
+- DeIdentificationModel
+- DeIdentificationModel
+- DeIdentificationModel
\ No newline at end of file

From 3f22dc2c8c379f2aaefa7205b378dcfcf5d88543 Mon Sep 17 00:00:00 2001
From: jsl-models <74001263+jsl-models@users.noreply.github.com>
Date: Wed, 30 Aug 2023 21:01:06 +0700
Subject: [PATCH 04/15] Add model 2023-08-30-finpipe_deid_en (#570)

Co-authored-by: SKocer <samedkocer22@gmail.com>
---
 .../SKocer/2023-08-30-finpipe_deid_en.md      | 156 ++++++++++++++++++
 1 file changed, 156 insertions(+)
 create mode 100644 docs/_posts/SKocer/2023-08-30-finpipe_deid_en.md

diff --git a/docs/_posts/SKocer/2023-08-30-finpipe_deid_en.md b/docs/_posts/SKocer/2023-08-30-finpipe_deid_en.md
new file mode 100644
index 0000000000..e4653a749d
--- /dev/null
+++ b/docs/_posts/SKocer/2023-08-30-finpipe_deid_en.md
@@ -0,0 +1,156 @@
+---
+layout: model
+title: Financial Deidentification Pipeline
+author: John Snow Labs
+name: finpipe_deid
+date: 2023-08-30
+tags: [licensed, en, finance, deid, deidentification, anonymization]
+task: Pipeline Finance
+language: en
+edition: Finance NLP 1.0.0
+spark_version: 3.2
+supported: true
+annotator: PipelineModel
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+This is a Pretrained Pipeline aimed to deidentify legal and financial documents to be compliant with data privacy regulations as GDPR and CCPA. Since the models used in this pipeline are statistical, make sure you use this model in a human-in-the-loop process to guarantee a 100% accuracy.
+
+You can carry out both masking and obfuscation with this pipeline, on the following entities:
+`ALIAS`, `EMAIL`, `PHONE`, `PROFESSION`, `ORG`, `DATE`, `PERSON`, `ADDRESS`, `STREET`, `CITY`, `STATE`, `ZIP`, `COUNTRY`, `TITLE_CLASS`, `TICKER`, `STOCK_EXCHANGE`, `CFN`, `IRS`
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.2_1693403994104.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.2_1693403994104.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+
+from sparknlp.pretrained import PretrainedPipeline
+
+deid_pipeline = PretrainedPipeline("finpipe_deid", "en", "finance/models")
+
+result = deid_pipeline.annotate("""CARGILL, INCORPORATED
+
+By:     Pirkko Suominen
+
+
+
+Name: Pirkko Suominen Title: Director, Bio Technology Development  Center,  Date:   10/19/2011
+
+BIOAMBER, SAS
+
+By:     Jean-François Huc
+
+
+
+Name: Jean-François Huc  Title: President Date:   October 15, 2011
+
+email : jeanfran@gmail.com
+phone : 18087339090 """)
+
+```
+
+</div>
+
+## Results
+
+```bash
+Masked with entity labels
+------------------------------
+<PARTY>, <PARTY>
+By:     <SIGNING_PERSON>
+Name: <PARTY>: <SIGNING_TITLE>,  Date:   <EFFDATE>
+<PARTY>, <PARTY>
+By:     <SIGNING_PERSON>
+Name: <PARTY>: <SIGNING_TITLE>Date:   <EFFDATE>
+
+email : <EMAIL>
+phone : <PHONE>
+
+Masked with chars
+------------------------------
+[*****], [**********]
+By:     [*************]
+Name: [*******************]: [**********************************]  Center,  Date:   [********]
+[******], [*]
+By:     [***************]
+Name: [**********************]: [*******]Date:   [**************]
+
+email : [****************]
+phone : [********]
+
+Masked with fixed length chars
+------------------------------
+****, ****
+By:     ****
+Name: ****: ****,  Date:   ****
+****, ****
+By:     ****
+Name: ****: ****Date:   ****
+
+email : ****
+phone : ****
+
+Obfuscated
+------------------------------
+MGT Trust Company, LLC., Clarus llc.
+By:     Benjamin Dean
+Name: John Snow Labs Inc: Sales Manager,  Date:   03/08/2025
+Clarus llc., SESA CO.
+By:     JAMES TURNER
+Name: MGT Trust Company, LLC.: Business ManagerDate:   11/7/2016
+
+email : Tyrus@google.com
+phone : 78 834 854
+
+```
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|finpipe_deid|
+|Type:|pipeline|
+|Compatibility:|Finance NLP 1.0.0+|
+|License:|Licensed|
+|Edition:|Official|
+|Language:|en|
+|Size:|474.8 MB|
+
+## Included Models
+
+- DocumentAssembler
+- SentenceDetector
+- TokenizerModel
+- BertEmbeddings
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ChunkMergeModel
+- DeIdentificationModel
+- DeIdentificationModel
+- DeIdentificationModel
+- DeIdentificationModel
\ No newline at end of file

From 616ba4ad2af8c610bb959b8e6d35fb80112f9643 Mon Sep 17 00:00:00 2001
From: jsl-models <74001263+jsl-models@users.noreply.github.com>
Date: Wed, 30 Aug 2023 21:25:39 +0700
Subject: [PATCH 05/15] Add model 2023-08-30-finpipe_deid_en (#571)

Co-authored-by: SKocer <samedkocer22@gmail.com>
---
 docs/_posts/SKocer/2023-08-30-finpipe_deid_en.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/_posts/SKocer/2023-08-30-finpipe_deid_en.md b/docs/_posts/SKocer/2023-08-30-finpipe_deid_en.md
index e4653a749d..c6e7709dfe 100644
--- a/docs/_posts/SKocer/2023-08-30-finpipe_deid_en.md
+++ b/docs/_posts/SKocer/2023-08-30-finpipe_deid_en.md
@@ -8,7 +8,7 @@ tags: [licensed, en, finance, deid, deidentification, anonymization]
 task: Pipeline Finance
 language: en
 edition: Finance NLP 1.0.0
-spark_version: 3.2
+spark_version: 3.4
 supported: true
 annotator: PipelineModel
 article_header:
@@ -26,8 +26,8 @@ You can carry out both masking and obfuscation with this pipeline, on the follow
 {:.btn-box}
 <button class="button button-orange" disabled>Live Demo</button>
 <button class="button button-orange" disabled>Open in Colab</button>
-[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.2_1693403994104.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
-[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.2_1693403994104.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.4_1693405407355.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.4_1693405407355.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
 
 ## How to use
 
@@ -128,7 +128,7 @@ phone : 78 834 854
 |License:|Licensed|
 |Edition:|Official|
 |Language:|en|
-|Size:|474.8 MB|
+|Size:|475.2 MB|
 
 ## Included Models
 

From b2fe6348f8baf88f297ea3708e5f47a6fe32161a Mon Sep 17 00:00:00 2001
From: Merve Ertas Uslu <67653613+Mary-Sci@users.noreply.github.com>
Date: Wed, 30 Aug 2023 16:52:45 +0200
Subject: [PATCH 06/15] Delete 2023-08-30-finpipe_deid_en.md

---
 .../SKocer/2023-08-30-finpipe_deid_en.md      | 156 ------------------
 1 file changed, 156 deletions(-)
 delete mode 100644 docs/_posts/SKocer/2023-08-30-finpipe_deid_en.md

diff --git a/docs/_posts/SKocer/2023-08-30-finpipe_deid_en.md b/docs/_posts/SKocer/2023-08-30-finpipe_deid_en.md
deleted file mode 100644
index c6e7709dfe..0000000000
--- a/docs/_posts/SKocer/2023-08-30-finpipe_deid_en.md
+++ /dev/null
@@ -1,156 +0,0 @@
----
-layout: model
-title: Financial Deidentification Pipeline
-author: John Snow Labs
-name: finpipe_deid
-date: 2023-08-30
-tags: [licensed, en, finance, deid, deidentification, anonymization]
-task: Pipeline Finance
-language: en
-edition: Finance NLP 1.0.0
-spark_version: 3.4
-supported: true
-annotator: PipelineModel
-article_header:
-  type: cover
-use_language_switcher: "Python-Scala-Java"
----
-
-## Description
-
-This is a Pretrained Pipeline aimed to deidentify legal and financial documents to be compliant with data privacy regulations as GDPR and CCPA. Since the models used in this pipeline are statistical, make sure you use this model in a human-in-the-loop process to guarantee a 100% accuracy.
-
-You can carry out both masking and obfuscation with this pipeline, on the following entities:
-`ALIAS`, `EMAIL`, `PHONE`, `PROFESSION`, `ORG`, `DATE`, `PERSON`, `ADDRESS`, `STREET`, `CITY`, `STATE`, `ZIP`, `COUNTRY`, `TITLE_CLASS`, `TICKER`, `STOCK_EXCHANGE`, `CFN`, `IRS`
-
-{:.btn-box}
-<button class="button button-orange" disabled>Live Demo</button>
-<button class="button button-orange" disabled>Open in Colab</button>
-[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.4_1693405407355.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
-[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.4_1693405407355.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
-
-## How to use
-
-
-
-<div class="tabs-box" markdown="1">
-{% include programmingLanguageSelectScalaPythonNLU.html %}
-```python
-
-from sparknlp.pretrained import PretrainedPipeline
-
-deid_pipeline = PretrainedPipeline("finpipe_deid", "en", "finance/models")
-
-result = deid_pipeline.annotate("""CARGILL, INCORPORATED
-
-By:     Pirkko Suominen
-
-
-
-Name: Pirkko Suominen Title: Director, Bio Technology Development  Center,  Date:   10/19/2011
-
-BIOAMBER, SAS
-
-By:     Jean-François Huc
-
-
-
-Name: Jean-François Huc  Title: President Date:   October 15, 2011
-
-email : jeanfran@gmail.com
-phone : 18087339090 """)
-
-```
-
-</div>
-
-## Results
-
-```bash
-Masked with entity labels
-------------------------------
-<PARTY>, <PARTY>
-By:     <SIGNING_PERSON>
-Name: <PARTY>: <SIGNING_TITLE>,  Date:   <EFFDATE>
-<PARTY>, <PARTY>
-By:     <SIGNING_PERSON>
-Name: <PARTY>: <SIGNING_TITLE>Date:   <EFFDATE>
-
-email : <EMAIL>
-phone : <PHONE>
-
-Masked with chars
-------------------------------
-[*****], [**********]
-By:     [*************]
-Name: [*******************]: [**********************************]  Center,  Date:   [********]
-[******], [*]
-By:     [***************]
-Name: [**********************]: [*******]Date:   [**************]
-
-email : [****************]
-phone : [********]
-
-Masked with fixed length chars
-------------------------------
-****, ****
-By:     ****
-Name: ****: ****,  Date:   ****
-****, ****
-By:     ****
-Name: ****: ****Date:   ****
-
-email : ****
-phone : ****
-
-Obfuscated
-------------------------------
-MGT Trust Company, LLC., Clarus llc.
-By:     Benjamin Dean
-Name: John Snow Labs Inc: Sales Manager,  Date:   03/08/2025
-Clarus llc., SESA CO.
-By:     JAMES TURNER
-Name: MGT Trust Company, LLC.: Business ManagerDate:   11/7/2016
-
-email : Tyrus@google.com
-phone : 78 834 854
-
-```
-
-{:.model-param}
-## Model Information
-
-{:.table-model}
-|---|---|
-|Model Name:|finpipe_deid|
-|Type:|pipeline|
-|Compatibility:|Finance NLP 1.0.0+|
-|License:|Licensed|
-|Edition:|Official|
-|Language:|en|
-|Size:|475.2 MB|
-
-## Included Models
-
-- DocumentAssembler
-- SentenceDetector
-- TokenizerModel
-- BertEmbeddings
-- FinanceNerModel
-- NerConverterInternalModel
-- FinanceNerModel
-- NerConverterInternalModel
-- FinanceNerModel
-- NerConverterInternalModel
-- FinanceNerModel
-- NerConverterInternalModel
-- ContextualParserModel
-- ContextualParserModel
-- ContextualParserModel
-- ContextualParserModel
-- ContextualParserModel
-- ChunkMergeModel
-- DeIdentificationModel
-- DeIdentificationModel
-- DeIdentificationModel
-- DeIdentificationModel
\ No newline at end of file

From 134aeb45c0feaf5100b97e6d25002a449c7b0757 Mon Sep 17 00:00:00 2001
From: jsl-models <74001263+jsl-models@users.noreply.github.com>
Date: Wed, 30 Aug 2023 21:59:29 +0700
Subject: [PATCH 07/15] Add model 2023-08-30-finpipe_deid_en (#572)

Co-authored-by: gokhanturer <mgturer@gmail.com>
---
 .../gokhanturer/2023-08-30-finpipe_deid_en.md | 156 ++++++++++++++++++
 1 file changed, 156 insertions(+)
 create mode 100644 docs/_posts/gokhanturer/2023-08-30-finpipe_deid_en.md

diff --git a/docs/_posts/gokhanturer/2023-08-30-finpipe_deid_en.md b/docs/_posts/gokhanturer/2023-08-30-finpipe_deid_en.md
new file mode 100644
index 0000000000..74260483ea
--- /dev/null
+++ b/docs/_posts/gokhanturer/2023-08-30-finpipe_deid_en.md
@@ -0,0 +1,156 @@
+---
+layout: model
+title: Financial Deidentification Pipeline
+author: John Snow Labs
+name: finpipe_deid
+date: 2023-08-30
+tags: [licensed, en, finance, deid, deidentification, anonymization]
+task: Pipeline Finance
+language: en
+edition: Finance NLP 1.0.0
+spark_version: 3.4
+supported: true
+annotator: PipelineModel
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+This is a Pretrained Pipeline aimed to deidentify legal and financial documents to be compliant with data privacy regulations as GDPR and CCPA. Since the models used in this pipeline are statistical, make sure you use this model in a human-in-the-loop process to guarantee a 100% accuracy.
+
+You can carry out both masking and obfuscation with this pipeline, on the following entities:
+`ALIAS`, `EMAIL`, `PHONE`, `PROFESSION`, `ORG`, `DATE`, `PERSON`, `ADDRESS`, `STREET`, `CITY`, `STATE`, `ZIP`, `COUNTRY`, `TITLE_CLASS`, `TICKER`, `STOCK_EXCHANGE`, `CFN`, `IRS`
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.4_1693407345452.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.4_1693407345452.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+
+from sparknlp.pretrained import PretrainedPipeline
+
+deid_pipeline = PretrainedPipeline("finpipe_deid", "en", "finance/models")
+
+result = deid_pipeline.annotate("""CARGILL, INCORPORATED
+
+By:     Pirkko Suominen
+
+
+
+Name: Pirkko Suominen Title: Director, Bio Technology Development  Center,  Date:   10/19/2011
+
+BIOAMBER, SAS
+
+By:     Jean-François Huc
+
+
+
+Name: Jean-François Huc  Title: President Date:   October 15, 2011
+
+email : jeanfran@gmail.com
+phone : 18087339090 """)
+
+```
+
+</div>
+
+## Results
+
+```bash
+Masked with entity labels
+------------------------------
+<PARTY>, <PARTY>
+By:     <SIGNING_PERSON>
+Name: <PARTY>: <SIGNING_TITLE>,  Date:   <EFFDATE>
+<PARTY>, <PARTY>
+By:     <SIGNING_PERSON>
+Name: <PARTY>: <SIGNING_TITLE>Date:   <EFFDATE>
+
+email : <EMAIL>
+phone : <PHONE>
+
+Masked with chars
+------------------------------
+[*****], [**********]
+By:     [*************]
+Name: [*******************]: [**********************************]  Center,  Date:   [********]
+[******], [*]
+By:     [***************]
+Name: [**********************]: [*******]Date:   [**************]
+
+email : [****************]
+phone : [********]
+
+Masked with fixed length chars
+------------------------------
+****, ****
+By:     ****
+Name: ****: ****,  Date:   ****
+****, ****
+By:     ****
+Name: ****: ****Date:   ****
+
+email : ****
+phone : ****
+
+Obfuscated
+------------------------------
+MGT Trust Company, LLC., Clarus llc.
+By:     Benjamin Dean
+Name: John Snow Labs Inc: Sales Manager,  Date:   03/08/2025
+Clarus llc., SESA CO.
+By:     JAMES TURNER
+Name: MGT Trust Company, LLC.: Business ManagerDate:   11/7/2016
+
+email : Tyrus@google.com
+phone : 78 834 854
+
+```
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|finpipe_deid|
+|Type:|pipeline|
+|Compatibility:|Finance NLP 1.0.0+|
+|License:|Licensed|
+|Edition:|Official|
+|Language:|en|
+|Size:|475.2 MB|
+
+## Included Models
+
+- DocumentAssembler
+- SentenceDetector
+- TokenizerModel
+- BertEmbeddings
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ChunkMergeModel
+- DeIdentificationModel
+- DeIdentificationModel
+- DeIdentificationModel
+- DeIdentificationModel
\ No newline at end of file

From 8f8cb724872a0110ef7ab19ee764931e7dcd2358 Mon Sep 17 00:00:00 2001
From: jsl-models <74001263+jsl-models@users.noreply.github.com>
Date: Wed, 30 Aug 2023 22:03:44 +0700
Subject: [PATCH 08/15] Add model 2023-08-30-finpipe_deid_en (#574)

Co-authored-by: SKocer <samedkocer22@gmail.com>
---
 .../SKocer/2023-08-30-finpipe_deid_en.md      | 156 ++++++++++++++++++
 1 file changed, 156 insertions(+)
 create mode 100644 docs/_posts/SKocer/2023-08-30-finpipe_deid_en.md

diff --git a/docs/_posts/SKocer/2023-08-30-finpipe_deid_en.md b/docs/_posts/SKocer/2023-08-30-finpipe_deid_en.md
new file mode 100644
index 0000000000..aec4f50987
--- /dev/null
+++ b/docs/_posts/SKocer/2023-08-30-finpipe_deid_en.md
@@ -0,0 +1,156 @@
+---
+layout: model
+title: Financial Deidentification Pipeline
+author: John Snow Labs
+name: finpipe_deid
+date: 2023-08-30
+tags: [licensed, en, finance, deid, deidentification, anonymization]
+task: Pipeline Finance
+language: en
+edition: Finance NLP 1.0.0
+spark_version: 3.2
+supported: true
+annotator: PipelineModel
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+This is a Pretrained Pipeline aimed to deidentify legal and financial documents to be compliant with data privacy regulations as GDPR and CCPA. Since the models used in this pipeline are statistical, make sure you use this model in a human-in-the-loop process to guarantee a 100% accuracy.
+
+You can carry out both masking and obfuscation with this pipeline, on the following entities:
+`ALIAS`, `EMAIL`, `PHONE`, `PROFESSION`, `ORG`, `DATE`, `PERSON`, `ADDRESS`, `STREET`, `CITY`, `STATE`, `ZIP`, `COUNTRY`, `TITLE_CLASS`, `TICKER`, `STOCK_EXCHANGE`, `CFN`, `IRS`
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.2_1693407757918.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.2_1693407757918.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+
+from sparknlp.pretrained import PretrainedPipeline
+
+deid_pipeline = PretrainedPipeline("finpipe_deid", "en", "finance/models")
+
+result = deid_pipeline.annotate("""CARGILL, INCORPORATED
+
+By:     Pirkko Suominen
+
+
+
+Name: Pirkko Suominen Title: Director, Bio Technology Development  Center,  Date:   10/19/2011
+
+BIOAMBER, SAS
+
+By:     Jean-François Huc
+
+
+
+Name: Jean-François Huc  Title: President Date:   October 15, 2011
+
+email : jeanfran@gmail.com
+phone : 18087339090 """)
+
+```
+
+</div>
+
+## Results
+
+```bash
+Masked with entity labels
+------------------------------
+<PARTY>, <PARTY>
+By:     <SIGNING_PERSON>
+Name: <PARTY>: <SIGNING_TITLE>,  Date:   <EFFDATE>
+<PARTY>, <PARTY>
+By:     <SIGNING_PERSON>
+Name: <PARTY>: <SIGNING_TITLE>Date:   <EFFDATE>
+
+email : <EMAIL>
+phone : <PHONE>
+
+Masked with chars
+------------------------------
+[*****], [**********]
+By:     [*************]
+Name: [*******************]: [**********************************]  Center,  Date:   [********]
+[******], [*]
+By:     [***************]
+Name: [**********************]: [*******]Date:   [**************]
+
+email : [****************]
+phone : [********]
+
+Masked with fixed length chars
+------------------------------
+****, ****
+By:     ****
+Name: ****: ****,  Date:   ****
+****, ****
+By:     ****
+Name: ****: ****Date:   ****
+
+email : ****
+phone : ****
+
+Obfuscated
+------------------------------
+MGT Trust Company, LLC., Clarus llc.
+By:     Benjamin Dean
+Name: John Snow Labs Inc: Sales Manager,  Date:   03/08/2025
+Clarus llc., SESA CO.
+By:     JAMES TURNER
+Name: MGT Trust Company, LLC.: Business ManagerDate:   11/7/2016
+
+email : Tyrus@google.com
+phone : 78 834 854
+
+```
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|finpipe_deid|
+|Type:|pipeline|
+|Compatibility:|Finance NLP 1.0.0+|
+|License:|Licensed|
+|Edition:|Official|
+|Language:|en|
+|Size:|474.8 MB|
+
+## Included Models
+
+- DocumentAssembler
+- SentenceDetector
+- TokenizerModel
+- BertEmbeddings
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ChunkMergeModel
+- DeIdentificationModel
+- DeIdentificationModel
+- DeIdentificationModel
+- DeIdentificationModel
\ No newline at end of file

From d335c79f5457ae66f43502a86e24f6dc03323e55 Mon Sep 17 00:00:00 2001
From: jsl-models <74001263+jsl-models@users.noreply.github.com>
Date: Sat, 2 Sep 2023 03:22:25 +0700
Subject: [PATCH 09/15] Add model 2023-09-01-finpipe_deid_en (#586)

Co-authored-by: Meryem1425 <vildansarikaya25@gmail.com>
---
 .../Meryem1425/2023-09-01-finpipe_deid_en.md  | 156 ++++++++++++++++++
 1 file changed, 156 insertions(+)
 create mode 100644 docs/_posts/Meryem1425/2023-09-01-finpipe_deid_en.md

diff --git a/docs/_posts/Meryem1425/2023-09-01-finpipe_deid_en.md b/docs/_posts/Meryem1425/2023-09-01-finpipe_deid_en.md
new file mode 100644
index 0000000000..d0db2e9098
--- /dev/null
+++ b/docs/_posts/Meryem1425/2023-09-01-finpipe_deid_en.md
@@ -0,0 +1,156 @@
+---
+layout: model
+title: Financial Deidentification Pipeline
+author: John Snow Labs
+name: finpipe_deid
+date: 2023-09-01
+tags: [licensed, en, finance, deid, deidentification, anonymization]
+task: Pipeline Finance
+language: en
+edition: Finance NLP 1.0.0
+spark_version: 3.0
+supported: true
+annotator: PipelineModel
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+This is a Pretrained Pipeline aimed to deidentify legal and financial documents to be compliant with data privacy regulations as GDPR and CCPA. Since the models used in this pipeline are statistical, make sure you use this model in a human-in-the-loop process to guarantee a 100% accuracy.
+
+You can carry out both masking and obfuscation with this pipeline, on the following entities:
+`ALIAS`, `EMAIL`, `PHONE`, `PROFESSION`, `ORG`, `DATE`, `PERSON`, `ADDRESS`, `STREET`, `CITY`, `STATE`, `ZIP`, `COUNTRY`, `TITLE_CLASS`, `TICKER`, `STOCK_EXCHANGE`, `CFN`, `IRS`
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.0_1693599372226.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.0_1693599372226.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+
+from sparknlp.pretrained import PretrainedPipeline
+
+deid_pipeline = PretrainedPipeline("finpipe_deid", "en", "finance/models")
+
+result = deid_pipeline.annotate("""CARGILL, INCORPORATED
+
+By:     Pirkko Suominen
+
+
+
+Name: Pirkko Suominen Title: Director, Bio Technology Development  Center,  Date:   10/19/2011
+
+BIOAMBER, SAS
+
+By:     Jean-François Huc
+
+
+
+Name: Jean-François Huc  Title: President Date:   October 15, 2011
+
+email : jeanfran@gmail.com
+phone : 18087339090 """)
+
+```
+
+</div>
+
+## Results
+
+```bash
+Masked with entity labels
+------------------------------
+<PARTY>, <PARTY>
+By:     <SIGNING_PERSON>
+Name: <PARTY>: <SIGNING_TITLE>,  Date:   <EFFDATE>
+<PARTY>, <PARTY>
+By:     <SIGNING_PERSON>
+Name: <PARTY>: <SIGNING_TITLE>Date:   <EFFDATE>
+
+email : <EMAIL>
+phone : <PHONE>
+
+Masked with chars
+------------------------------
+[*****], [**********]
+By:     [*************]
+Name: [*******************]: [**********************************]  Center,  Date:   [********]
+[******], [*]
+By:     [***************]
+Name: [**********************]: [*******]Date:   [**************]
+
+email : [****************]
+phone : [********]
+
+Masked with fixed length chars
+------------------------------
+****, ****
+By:     ****
+Name: ****: ****,  Date:   ****
+****, ****
+By:     ****
+Name: ****: ****Date:   ****
+
+email : ****
+phone : ****
+
+Obfuscated
+------------------------------
+MGT Trust Company, LLC., Clarus llc.
+By:     Benjamin Dean
+Name: John Snow Labs Inc: Sales Manager,  Date:   03/08/2025
+Clarus llc., SESA CO.
+By:     JAMES TURNER
+Name: MGT Trust Company, LLC.: Business ManagerDate:   11/7/2016
+
+email : Tyrus@google.com
+phone : 78 834 854
+
+```
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|finpipe_deid|
+|Type:|pipeline|
+|Compatibility:|Finance NLP 1.0.0+|
+|License:|Licensed|
+|Edition:|Official|
+|Language:|en|
+|Size:|471.8 MB|
+
+## Included Models
+
+- DocumentAssembler
+- SentenceDetector
+- TokenizerModel
+- BertEmbeddings
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ChunkMergeModel
+- DeIdentificationModel
+- DeIdentificationModel
+- DeIdentificationModel
+- DeIdentificationModel
\ No newline at end of file

From f034ee846b7a6ccc84178633c890f6d626192c1b Mon Sep 17 00:00:00 2001
From: jsl-models <74001263+jsl-models@users.noreply.github.com>
Date: Sat, 2 Sep 2023 04:03:10 +0700
Subject: [PATCH 10/15] Add model 2023-09-01-finpipe_deid_en (#589)

Co-authored-by: SKocer <samedkocer22@gmail.com>
---
 .../SKocer/2023-09-01-finpipe_deid_en.md      | 156 ++++++++++++++++++
 1 file changed, 156 insertions(+)
 create mode 100644 docs/_posts/SKocer/2023-09-01-finpipe_deid_en.md

diff --git a/docs/_posts/SKocer/2023-09-01-finpipe_deid_en.md b/docs/_posts/SKocer/2023-09-01-finpipe_deid_en.md
new file mode 100644
index 0000000000..f81826229b
--- /dev/null
+++ b/docs/_posts/SKocer/2023-09-01-finpipe_deid_en.md
@@ -0,0 +1,156 @@
+---
+layout: model
+title: Financial Deidentification Pipeline
+author: John Snow Labs
+name: finpipe_deid
+date: 2023-09-01
+tags: [licensed, en, finance, deid, deidentification, anonymization]
+task: Pipeline Finance
+language: en
+edition: Finance NLP 1.0.0
+spark_version: 3.2
+supported: true
+annotator: PipelineModel
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+This is a Pretrained Pipeline aimed to deidentify legal and financial documents to be compliant with data privacy regulations as GDPR and CCPA. Since the models used in this pipeline are statistical, make sure you use this model in a human-in-the-loop process to guarantee a 100% accuracy.
+
+You can carry out both masking and obfuscation with this pipeline, on the following entities:
+`ALIAS`, `EMAIL`, `PHONE`, `PROFESSION`, `ORG`, `DATE`, `PERSON`, `ADDRESS`, `STREET`, `CITY`, `STATE`, `ZIP`, `COUNTRY`, `TITLE_CLASS`, `TICKER`, `STOCK_EXCHANGE`, `CFN`, `IRS`
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.2_1693602013381.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.2_1693602013381.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+
+from sparknlp.pretrained import PretrainedPipeline
+
+deid_pipeline = PretrainedPipeline("finpipe_deid", "en", "finance/models")
+
+result = deid_pipeline.annotate("""CARGILL, INCORPORATED
+
+By:     Pirkko Suominen
+
+
+
+Name: Pirkko Suominen Title: Director, Bio Technology Development  Center,  Date:   10/19/2011
+
+BIOAMBER, SAS
+
+By:     Jean-François Huc
+
+
+
+Name: Jean-François Huc  Title: President Date:   October 15, 2011
+
+email : jeanfran@gmail.com
+phone : 18087339090 """)
+
+```
+
+</div>
+
+## Results
+
+```bash
+Masked with entity labels
+------------------------------
+<PARTY>, <PARTY>
+By:     <SIGNING_PERSON>
+Name: <PARTY>: <SIGNING_TITLE>,  Date:   <EFFDATE>
+<PARTY>, <PARTY>
+By:     <SIGNING_PERSON>
+Name: <PARTY>: <SIGNING_TITLE>Date:   <EFFDATE>
+
+email : <EMAIL>
+phone : <PHONE>
+
+Masked with chars
+------------------------------
+[*****], [**********]
+By:     [*************]
+Name: [*******************]: [**********************************]  Center,  Date:   [********]
+[******], [*]
+By:     [***************]
+Name: [**********************]: [*******]Date:   [**************]
+
+email : [****************]
+phone : [********]
+
+Masked with fixed length chars
+------------------------------
+****, ****
+By:     ****
+Name: ****: ****,  Date:   ****
+****, ****
+By:     ****
+Name: ****: ****Date:   ****
+
+email : ****
+phone : ****
+
+Obfuscated
+------------------------------
+MGT Trust Company, LLC., Clarus llc.
+By:     Benjamin Dean
+Name: John Snow Labs Inc: Sales Manager,  Date:   03/08/2025
+Clarus llc., SESA CO.
+By:     JAMES TURNER
+Name: MGT Trust Company, LLC.: Business ManagerDate:   11/7/2016
+
+email : Tyrus@google.com
+phone : 78 834 854
+
+```
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|finpipe_deid|
+|Type:|pipeline|
+|Compatibility:|Finance NLP 1.0.0+|
+|License:|Licensed|
+|Edition:|Official|
+|Language:|en|
+|Size:|472.3 MB|
+
+## Included Models
+
+- DocumentAssembler
+- SentenceDetector
+- TokenizerModel
+- BertEmbeddings
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ChunkMergeModel
+- DeIdentificationModel
+- DeIdentificationModel
+- DeIdentificationModel
+- DeIdentificationModel
\ No newline at end of file

From 28a4676fc9249bf8cc61dea36f94ebe3e8451004 Mon Sep 17 00:00:00 2001
From: jsl-models <74001263+jsl-models@users.noreply.github.com>
Date: Sat, 2 Sep 2023 04:10:34 +0700
Subject: [PATCH 11/15] Add model 2023-09-01-finpipe_deid_en (#593)

Co-authored-by: gokhanturer <mgturer@gmail.com>
---
 .../gokhanturer/2023-09-01-finpipe_deid_en.md | 156 ++++++++++++++++++
 1 file changed, 156 insertions(+)
 create mode 100644 docs/_posts/gokhanturer/2023-09-01-finpipe_deid_en.md

diff --git a/docs/_posts/gokhanturer/2023-09-01-finpipe_deid_en.md b/docs/_posts/gokhanturer/2023-09-01-finpipe_deid_en.md
new file mode 100644
index 0000000000..6d2e41062d
--- /dev/null
+++ b/docs/_posts/gokhanturer/2023-09-01-finpipe_deid_en.md
@@ -0,0 +1,156 @@
+---
+layout: model
+title: Financial Deidentification Pipeline
+author: John Snow Labs
+name: finpipe_deid
+date: 2023-09-01
+tags: [licensed, en, finance, deid, deidentification, anonymization]
+task: Pipeline Finance
+language: en
+edition: Finance NLP 1.0.0
+spark_version: 3.4
+supported: true
+annotator: PipelineModel
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+This is a Pretrained Pipeline aimed to deidentify legal and financial documents to be compliant with data privacy regulations as GDPR and CCPA. Since the models used in this pipeline are statistical, make sure you use this model in a human-in-the-loop process to guarantee a 100% accuracy.
+
+You can carry out both masking and obfuscation with this pipeline, on the following entities:
+`ALIAS`, `EMAIL`, `PHONE`, `PROFESSION`, `ORG`, `DATE`, `PERSON`, `ADDRESS`, `STREET`, `CITY`, `STATE`, `ZIP`, `COUNTRY`, `TITLE_CLASS`, `TICKER`, `STOCK_EXCHANGE`, `CFN`, `IRS`
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.4_1693602582270.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finpipe_deid_en_1.0.0_3.4_1693602582270.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+
+from sparknlp.pretrained import PretrainedPipeline
+
+deid_pipeline = PretrainedPipeline("finpipe_deid", "en", "finance/models")
+
+result = deid_pipeline.annotate("""CARGILL, INCORPORATED
+
+By:     Pirkko Suominen
+
+
+
+Name: Pirkko Suominen Title: Director, Bio Technology Development  Center,  Date:   10/19/2011
+
+BIOAMBER, SAS
+
+By:     Jean-François Huc
+
+
+
+Name: Jean-François Huc  Title: President Date:   October 15, 2011
+
+email : jeanfran@gmail.com
+phone : 18087339090 """)
+
+```
+
+</div>
+
+## Results
+
+```bash
+Masked with entity labels
+------------------------------
+<PARTY>, <PARTY>
+By:     <SIGNING_PERSON>
+Name: <PARTY>: <SIGNING_TITLE>,  Date:   <EFFDATE>
+<PARTY>, <PARTY>
+By:     <SIGNING_PERSON>
+Name: <PARTY>: <SIGNING_TITLE>Date:   <EFFDATE>
+
+email : <EMAIL>
+phone : <PHONE>
+
+Masked with chars
+------------------------------
+[*****], [**********]
+By:     [*************]
+Name: [*******************]: [**********************************]  Center,  Date:   [********]
+[******], [*]
+By:     [***************]
+Name: [**********************]: [*******]Date:   [**************]
+
+email : [****************]
+phone : [********]
+
+Masked with fixed length chars
+------------------------------
+****, ****
+By:     ****
+Name: ****: ****,  Date:   ****
+****, ****
+By:     ****
+Name: ****: ****Date:   ****
+
+email : ****
+phone : ****
+
+Obfuscated
+------------------------------
+MGT Trust Company, LLC., Clarus llc.
+By:     Benjamin Dean
+Name: John Snow Labs Inc: Sales Manager,  Date:   03/08/2025
+Clarus llc., SESA CO.
+By:     JAMES TURNER
+Name: MGT Trust Company, LLC.: Business ManagerDate:   11/7/2016
+
+email : Tyrus@google.com
+phone : 78 834 854
+
+```
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|finpipe_deid|
+|Type:|pipeline|
+|Compatibility:|Finance NLP 1.0.0+|
+|License:|Licensed|
+|Edition:|Official|
+|Language:|en|
+|Size:|475.2 MB|
+
+## Included Models
+
+- DocumentAssembler
+- SentenceDetector
+- TokenizerModel
+- BertEmbeddings
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- FinanceNerModel
+- NerConverterInternalModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ContextualParserModel
+- ChunkMergeModel
+- DeIdentificationModel
+- DeIdentificationModel
+- DeIdentificationModel
+- DeIdentificationModel
\ No newline at end of file

From 6b8d6fd4aa60545230ff8a926d2ece8fbbc38434 Mon Sep 17 00:00:00 2001
From: jsl-models <74001263+jsl-models@users.noreply.github.com>
Date: Fri, 6 Oct 2023 22:50:20 +0700
Subject: [PATCH 12/15] 2023-10-06-finembedding_e5_base_en (#685)

* Add model 2023-10-06-finembedding_e5_base_en

* Add model 2023-10-06-finner_absa_sm_en

* Add model 2023-10-06-finassertion_absa_sm_en

---------

Co-authored-by: dcecchini <dadachini@hotmail.com>
---
 .../2023-10-06-finassertion_absa_sm_en.md     | 151 ++++++++++++++++++
 .../2023-10-06-finembedding_e5_base_en.md     |  93 +++++++++++
 .../dcecchini/2023-10-06-finner_absa_sm_en.md | 147 +++++++++++++++++
 3 files changed, 391 insertions(+)
 create mode 100644 docs/_posts/dcecchini/2023-10-06-finassertion_absa_sm_en.md
 create mode 100644 docs/_posts/dcecchini/2023-10-06-finembedding_e5_base_en.md
 create mode 100644 docs/_posts/dcecchini/2023-10-06-finner_absa_sm_en.md

diff --git a/docs/_posts/dcecchini/2023-10-06-finassertion_absa_sm_en.md b/docs/_posts/dcecchini/2023-10-06-finassertion_absa_sm_en.md
new file mode 100644
index 0000000000..241b371fbd
--- /dev/null
+++ b/docs/_posts/dcecchini/2023-10-06-finassertion_absa_sm_en.md
@@ -0,0 +1,151 @@
+---
+layout: model
+title: Financial Assertion of Sentiment (sm, Small)
+author: John Snow Labs
+name: finassertion_absa_sm
+date: 2023-10-06
+tags: [finance, assertion, en, sentiment_analysis, licensed]
+task: Assertion Status
+language: en
+edition: Finance NLP 1.0.0
+spark_version: 3.0
+supported: true
+annotator: AssertionDLModel
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+This assertion model classifies financial entities into a sentiment. It is designed to be used together with the associated NER model.
+
+## Predicted Entities
+
+`POSITIVE`, `NEGATIVE`, `NEUTRAL`
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finassertion_absa_sm_en_1.0.0_3.0_1696606845902.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finassertion_absa_sm_en_1.0.0_3.0_1696606845902.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+documentAssembler = (
+    nlp.DocumentAssembler().setInputCol("text").setOutputCol("document")
+)
+
+# Sentence Detector annotator, processes various sentences per line
+sentenceDetector = (
+    nlp.SentenceDetector()
+    .setInputCols(["document"])
+    .setOutputCol("sentence")
+)
+
+# Tokenizer splits words in a relevant format for NLP
+tokenizer = (
+    nlp.Tokenizer().setInputCols(["sentence"]).setOutputCol("token")
+)
+
+bert_embeddings = (
+    nlp.BertEmbeddings.pretrained("bert_embeddings_sec_bert_base", "en")
+    .setInputCols("document", "token")
+    .setOutputCol("embeddings")
+    .setMaxSentenceLength(512)
+)
+
+clinical_ner = (
+    finance.NerModel.pretrained("finner_absa_sm", "en", "finance/models")
+    .setInputCols(["sentence", "token", "embeddings"])
+    .setOutputCol("ner")
+)
+
+ner_converter = (
+    finance.NerConverterInternal()
+    .setInputCols(["sentence", "token", "ner"])
+    .setOutputCol("ner_chunk")
+)
+
+assertion_model = (
+    finance.AssertionDLModel.pretrained("finassertion_absa_sm", "en", "finance/models")
+    .setInputCols(["sentence", "ner_chunk", "embeddings"])
+    .setOutputCol("assertion")
+)
+
+nlpPipeline = nlp.Pipeline(
+    stages=[
+        documentAssembler,
+        sentenceDetector,
+        tokenizer,
+        bert_embeddings,
+        clinical_ner,
+        ner_converter,
+        assertion_model,
+    ]
+)
+
+
+text = "Equity and earnings of affiliates in Latin America increased to $4.8 million in the quarter from $2.2 million in the prior year as the commodity markets in Latin America remain strong through the end of the quarter."
+
+spark_df = spark.createDataFrame([[text]]).toDF("text")
+
+result = model.fit(spark_df ).transform(spark_df)
+
+result.select(
+    F.explode(
+        F.arrays_zip("ner_chunk.result", "ner_chunk.metadata")
+    ).alias("cols")
+).select(
+    F.expr("cols['0']").alias("entity"),
+    F.expr("cols['1']['entity']").alias("label"),
+).show(
+    50, truncate=False
+)
+```
+
+</div>
+
+## Results
+
+```bash
++--------+---------+
+|entity  |label    |
++--------+---------+
+|Equity  |LIABILITY|
+|earnings|PROFIT   |
++--------+---------+
+```
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|finassertion_absa_sm|
+|Compatibility:|Finance NLP 1.0.0+|
+|License:|Licensed|
+|Edition:|Official|
+|Input Labels:|[document, chunk, embeddings]|
+|Output Labels:|[assertion]|
+|Language:|en|
+|Size:|2.7 MB|
+
+## References
+
+In-house annotations of earning call transcripts.
+
+## Benchmarking
+
+```bash
+     label    precision    recall  f1-score   support
+
+    NEGATIVE       0.57      0.42      0.48        74
+     NEUTRAL       0.51      0.70      0.59       184
+    POSITIVE       0.75      0.64      0.69       324
+```
\ No newline at end of file
diff --git a/docs/_posts/dcecchini/2023-10-06-finembedding_e5_base_en.md b/docs/_posts/dcecchini/2023-10-06-finembedding_e5_base_en.md
new file mode 100644
index 0000000000..cefedf209d
--- /dev/null
+++ b/docs/_posts/dcecchini/2023-10-06-finembedding_e5_base_en.md
@@ -0,0 +1,93 @@
+---
+layout: model
+title: Finance E5 Embedding Base
+author: John Snow Labs
+name: finembedding_e5_base
+date: 2023-10-06
+tags: [finance, en, licensed, e5, sentence_embedding, onnx]
+task: Embeddings
+language: en
+edition: Finance NLP 1.0.0
+spark_version: 3.0
+supported: true
+engine: onnx
+annotator: E5Embeddings
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+This model is a financial version of the E5 base model fine-tuned on earning call transcripts and finance question-answering datasets. Reference: Wang, Liang, et al. "Text embeddings by weakly-supervised contrastive pre-training." arXiv preprint arXiv:2212.03533 (2022).
+
+## Predicted Entities
+
+
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finembedding_e5_base_en_1.0.0_3.0_1696603847700.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finembedding_e5_base_en_1.0.0_3.0_1696603847700.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+document_assembler = (
+    nlp.DocumentAssembler().setInputCol("text").setOutputCol("document")
+)
+
+E5_embedding = (
+    nlp.E5Embeddings.pretrained(
+        "finembedding_e5_base", "en", "finance/models"
+    )
+    .setInputCols(["document"])
+    .setOutputCol("E5")
+)
+pipeline = nlp.Pipeline(stages=[document_assembler, E5_embedding])
+
+data = spark.createDataFrame(
+    [["What is the best way to invest in the stock market?"]]
+).toDF("text")
+
+result = pipeline.fit(data).transform(data)
+result. Select("E5.result").show()
+```
+
+</div>
+
+## Results
+
+```bash
++----------------------------------------------------------------------------------------------------+
+|                                                                                          embeddings|
++----------------------------------------------------------------------------------------------------+
+|[0.45521045, -0.16874692, -0.06179046, -0.37956607, 1.152633, 0.6849592, -0.9676384, 0.4624033, ...|
++----------------------------------------------------------------------------------------------------+
+```
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|finembedding_e5_base|
+|Compatibility:|Finance NLP 1.0.0+|
+|License:|Licensed|
+|Edition:|Official|
+|Input Labels:|[document]|
+|Output Labels:|[E5]|
+|Language:|en|
+|Size:|398.5 MB|
+
+## References
+
+For our Finance models, we will use publicly available datasets to fine-tune the model:
+
+- [FiQA](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/)
+- In-house annotated Earning Calls Transcripts
\ No newline at end of file
diff --git a/docs/_posts/dcecchini/2023-10-06-finner_absa_sm_en.md b/docs/_posts/dcecchini/2023-10-06-finner_absa_sm_en.md
new file mode 100644
index 0000000000..c4c7b98ae3
--- /dev/null
+++ b/docs/_posts/dcecchini/2023-10-06-finner_absa_sm_en.md
@@ -0,0 +1,147 @@
+---
+layout: model
+title: Financial NER for Aspect-based Sentiment Analysis (sm, Small)
+author: John Snow Labs
+name: finner_absa_sm
+date: 2023-10-06
+tags: [finance, en, ner, licensed]
+task: Named Entity Recognition
+language: en
+edition: Finance NLP 1.0.0
+spark_version: 3.0
+supported: true
+annotator: FinanceNerModel
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+This NER model identifies entities that can be associated with a financial sentiment. The model is designed to be used with the associated Assertion Status model that classifies the entities into a sentiment category.
+
+## Predicted Entities
+
+`REVENUE`, `EXPENSE`, `PROFIT`, `KPI`, `GAINS`, `ASSET`, `LIABILITY`, `CASHFLOW`, `LOSSES`, `FREE_CASH_FLOW`
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finner_absa_sm_en_1.0.0_3.0_1696605316183.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finner_absa_sm_en_1.0.0_3.0_1696605316183.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+document_assembler = nlp.DocumentAssembler()\
+    .setInputCol("text")\
+    .setOutputCol("document")
+
+sentence_detector = nlp.SentenceDetector() \
+    .setInputCols(["document"]) \
+    .setOutputCol("sentence") \
+    .setCustomBounds(["\n\n"])
+
+tokenizer = nlp.Tokenizer()\
+    .setInputCols(["sentence"])\
+    .setOutputCol("token")
+
+embeddings = nlp.BertEmbeddings.pretrained("bert_embeddings_sec_bert_base","en")\
+    .setInputCols(["sentence", "token"])\
+    .setOutputCol("embeddings")\
+    .setCaseSensitive(True)\
+    .setMaxSentenceLength(512)
+
+ner_model = finance.NerModel.pretrained("finner_absa_sm", "en", "finance/models")\
+    .setInputCols(["sentence", "token", "embeddings"])\
+    .setOutputCol("ner")\
+
+ner_converter = finance.NerConverterInternal()\
+    .setInputCols(["sentence", "token", "ner"])\
+    .setOutputCol("ner_chunk")
+
+pipeline = nlp.Pipeline(stages=[
+    document_assembler,
+    sentence_detector,
+    tokenizer,
+    embeddings,
+    ner_model,
+    ner_converter   
+    ])
+
+model = pipeline.fit(spark.createDataFrame([[""]]).toDF("text"))
+
+
+text = "Equity and earnings of affiliates in Latin America increased to $4.8 million in the quarter from $2.2 million in the prior year as the commodity markets in Latin America remain strong through the end of the quarter."
+
+spark_df = spark.createDataFrame([[text]]).toDF("text")
+
+result = model. Transform(spark_df)
+result. Select(F.explode(F.arrays_zip('ner_chunk.result', 'ner_chunk.metadata')).alias("cols")) \
+               .select(F.expr("cols['0']").alias("entity"),
+                       F.expr("cols['1']['entity']").alias("label")).show(50, truncate = False)
+
+```
+
+</div>
+
+## Results
+
+```bash
++--------+---------+
+|entity  |label    |
++--------+---------+
+|Equity  |LIABILITY|
+|earnings|PROFIT   |
++--------+---------+
+```
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|finner_absa_sm|
+|Compatibility:|Finance NLP 1.0.0+|
+|License:|Licensed|
+|Edition:|Official|
+|Input Labels:|[sentence, token, embeddings]|
+|Output Labels:|[ner]|
+|Language:|en|
+|Size:|16.3 MB|
+
+## References
+
+In-house annotations of earning call transcripts.
+
+## Benchmarking
+
+```bash
+         label    precision    recall  f1-score   support
+
+         B-ASSET     0.6000    0.2400    0.3429        25
+      B-CASHFLOW     0.7000    0.5833    0.6364        12
+       B-EXPENSE     0.7222    0.6500    0.6842        60
+B-FREE_CASH_FLOW     1.0000    1.0000    1.0000         8
+         B-GAINS     0.7333    0.5946    0.6567        37
+           B-KPI     0.7143    0.5556    0.6250        36
+     B-LIABILITY     0.5000    0.2778    0.3571        18
+        B-LOSSES     0.7143    0.7143    0.7143         7
+        B-PROFIT     0.8462    0.8919    0.8684        37
+       B-REVENUE     0.7385    0.8000    0.7680        60
+         I-ASSET     0.8000    0.3636    0.5000        11
+      I-CASHFLOW     0.9091    0.9091    0.9091        11
+       I-EXPENSE     0.7451    0.6230    0.6786        61
+I-FREE_CASH_FLOW     1.0000    1.0000    1.0000        17
+         I-GAINS     0.8333    0.6667    0.7407        30
+           I-KPI     0.8500    0.5000    0.6296        34
+     I-LIABILITY     0.5000    0.5000    0.5000         6
+        I-LOSSES     0.7143    0.6250    0.6667         8
+        I-PROFIT     0.8621    0.9615    0.9091        26
+       I-REVENUE     0.7600    0.7308    0.7451        26
+               O     0.9839    0.9923    0.9880      8660
+```
\ No newline at end of file

From fdca733b4d5843da420e39c44a827720e06db345 Mon Sep 17 00:00:00 2001
From: jsl-models <74001263+jsl-models@users.noreply.github.com>
Date: Thu, 9 Nov 2023 21:38:16 +0700
Subject: [PATCH 13/15] Add model 2023-11-09-finembedding_e5_large_en (#745)

Co-authored-by: dcecchini <dadachini@hotmail.com>
---
 .../2023-11-09-finembedding_e5_large_en.md    | 90 +++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 docs/_posts/dcecchini/2023-11-09-finembedding_e5_large_en.md

diff --git a/docs/_posts/dcecchini/2023-11-09-finembedding_e5_large_en.md b/docs/_posts/dcecchini/2023-11-09-finembedding_e5_large_en.md
new file mode 100644
index 0000000000..d0641108b7
--- /dev/null
+++ b/docs/_posts/dcecchini/2023-11-09-finembedding_e5_large_en.md
@@ -0,0 +1,90 @@
+---
+layout: model
+title: Finance E5 Embedding Large
+author: John Snow Labs
+name: finembedding_e5_large
+date: 2023-11-09
+tags: [finance, en, licensed, e5, sentence_embedding, onnx]
+task: Embeddings
+language: en
+edition: Finance NLP 1.0.0
+spark_version: 3.0
+supported: true
+engine: onnx
+annotator: E5Embeddings
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+This model is a financial version of the E5 large model fine-tuned on in-house curated financial datasets. Reference: Wang, Liang, et al. “Text embeddings by weakly-supervised contrastive pre-training.” arXiv preprint arXiv:2212.03533 (2022).
+
+## Predicted Entities
+
+
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finembedding_e5_large_en_1.0.0_3.0_1699530885080.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finembedding_e5_large_en_1.0.0_3.0_1699530885080.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+document_assembler = (
+    nlp.DocumentAssembler().setInputCol("text").setOutputCol("document")
+)
+
+E5_embedding = (
+    nlp.E5Embeddings.pretrained(
+        "finembedding_e5_large", "en", "finance/models"
+    )
+    .setInputCols(["document"])
+    .setOutputCol("E5")
+)
+pipeline = nlp.Pipeline(stages=[document_assembler, E5_embedding])
+
+data = spark.createDataFrame(
+    [["What is the best way to invest in the stock market?"]]
+).toDF("text")
+
+result = pipeline.fit(data).transform(data)
+result. Select("E5.result").show()
+```
+
+</div>
+
+## Results
+
+```bash
++----------------------------------------------------------------------------------------------------+
+|                                                                                          embeddings|
++----------------------------------------------------------------------------------------------------+
+|[0.8358813, -1.30341, -0.576791, 0.25893408, 0.26888973, 0.028243342, 0.47971666, 0.47653574, 0.4...|
++----------------------------------------------------------------------------------------------------+
+```
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|finembedding_e5_large|
+|Compatibility:|Finance NLP 1.0.0+|
+|License:|Licensed|
+|Edition:|Official|
+|Input Labels:|[document]|
+|Output Labels:|[E5]|
+|Language:|en|
+|Size:|1.2 GB|
+
+## References
+
+In-house annotated financial datasets.
\ No newline at end of file

From 7cc190d90b00fcd68420988c789ccff373559ca2 Mon Sep 17 00:00:00 2001
From: jsl-models <74001263+jsl-models@users.noreply.github.com>
Date: Sat, 11 Nov 2023 20:02:24 +0700
Subject: [PATCH 14/15] 2023-11-11-finner_aspect_based_sentiment_md_en (#754)

* Add model 2023-11-11-finner_aspect_based_sentiment_md_en

* Add model 2023-11-11-finassertion_aspect_based_sentiment_md_en

* Update 2023-11-11-finner_aspect_based_sentiment_md_en.md

* Update 2023-11-11-finassertion_aspect_based_sentiment_md_en.md

---------

Co-authored-by: Mary-Sci <meryemyildiz366@gmail.com>
Co-authored-by: Merve Ertas Uslu <67653613+Mary-Sci@users.noreply.github.com>
---
 ...nassertion_aspect_based_sentiment_md_en.md | 131 +++++++++++++++++
 ...-11-finner_aspect_based_sentiment_md_en.md | 136 ++++++++++++++++++
 2 files changed, 267 insertions(+)
 create mode 100644 docs/_posts/Mary-Sci/2023-11-11-finassertion_aspect_based_sentiment_md_en.md
 create mode 100644 docs/_posts/Mary-Sci/2023-11-11-finner_aspect_based_sentiment_md_en.md

diff --git a/docs/_posts/Mary-Sci/2023-11-11-finassertion_aspect_based_sentiment_md_en.md b/docs/_posts/Mary-Sci/2023-11-11-finassertion_aspect_based_sentiment_md_en.md
new file mode 100644
index 0000000000..12ca101255
--- /dev/null
+++ b/docs/_posts/Mary-Sci/2023-11-11-finassertion_aspect_based_sentiment_md_en.md
@@ -0,0 +1,131 @@
+---
+layout: model
+title: Financial Assertion of Aspect-Based Sentiment (md, Medium)
+author: John Snow Labs
+name: finassertion_aspect_based_sentiment_md
+date: 2023-11-11
+tags: [assertion, licensed, en, finance]
+task: Assertion Status
+language: en
+edition: Finance NLP 1.0.0
+spark_version: 3.0
+supported: true
+annotator: AssertionDLModel
+article_header:
+type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+This assertion model classifies financial entities into an aspect-based sentiment. It is designed to be used together with the associated NER model.
+
+## Predicted Entities
+
+`POSITIVE`, `NEGATIVE`, `NEUTRAL`
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finassertion_aspect_based_sentiment_md_en_1.0.0_3.0_1699705705778.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finassertion_aspect_based_sentiment_md_en_1.0.0_3.0_1699705705778.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+documentAssembler = nlp.DocumentAssembler()\
+    .setInputCol("text")\
+    .setOutputCol("document")
+
+# Sentence Detector annotator, processes various sentences per line
+sentenceDetector = nlp.SentenceDetector()\
+    .setInputCols(["document"])\
+    .setOutputCol("sentence")
+
+# Tokenizer splits words in a relevant format for NLP
+tokenizer = nlp.Tokenizer()\
+    .setInputCols(["sentence"])\
+    .setOutputCol("token")
+
+bert_embeddings = nlp.BertEmbeddings.pretrained("bert_embeddings_sec_bert_base", "en")\
+    .setInputCols("sentence", "token")\
+    .setOutputCol("embeddings")\
+    .setMaxSentenceLength(512)
+
+finance_ner = finance.NerModel.pretrained("finner_aspect_based_sentiment_md", "en", "finance/models")\
+    .setInputCols(["sentence", "token", "embeddings"])\
+    .setOutputCol("ner")
+
+ner_converter = finance.NerConverterInternal()\
+    .setInputCols(["sentence", "token", "ner"])\
+    .setOutputCol("ner_chunk")
+
+assertion_model = finance.AssertionDLModel.pretrained("finassertion_aspect_based_sentiment_md", "en", "finance/models")\
+    .setInputCols(["sentence", "ner_chunk", "embeddings"])\
+    .setOutputCol("assertion")
+
+
+nlpPipeline = nlp.Pipeline(
+    stages=[documentAssembler,
+            sentenceDetector,
+            tokenizer,
+            bert_embeddings,
+            finance_ner,
+            ner_converter,
+            assertion_model])
+
+text = "Equity and earnings of affiliates in Latin America increased to $4.8 million in the quarter from $2.2 million in the prior year as the commodity markets in Latin America remain strong through the end of the quarter."
+
+spark_df = spark.createDataFrame([[text]]).toDF("text")
+
+result = nlpPipeline.fit(spark_df ).transform(spark_df)
+
+result.select(F.explode(F.arrays_zip("ner_chunk.result", "ner_chunk.metadata", "assertion.result", "assertion.metadata")).alias("cols"))\
+      .select(F.expr("cols['0']").alias("entity"),
+              F.expr("cols['1']['entity']").alias("label"),
+              F.expr("cols['2']").alias("assertion"),
+              F.expr("cols['3']['confidence']").alias("confidence")).show(50, truncate=False)
+```
+
+</div>
+
+## Results
+
+```bash
++--------+---------+---------+----------+
+|entity  |label    |assertion|confidence|
++--------+---------+---------+----------+
+|Equity  |LIABILITY|POSITIVE |0.9895    |
+|earnings|PROFIT   |POSITIVE |0.995     |
++--------+---------+---------+----------+
+```
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|finassertion_aspect_based_sentiment_md|
+|Compatibility:|Finance NLP 1.0.0+|
+|License:|Licensed|
+|Edition:|Official|
+|Input Labels:|[document, chunk, embeddings]|
+|Output Labels:|[assertion]|
+|Language:|en|
+|Size:|2.7 MB|
+
+## Benchmarking
+
+```bash
+ label         precision  recall  f1-score  support 
+ NEGATIVE      0.68       0.43    0.53      232     
+ NEUTRAL       0.44       0.65    0.53      441     
+ POSITIVE      0.79       0.69    0.74      947     
+ accuracy      -          -       0.64      1620    
+ macro-avg     0.64       0.59    0.60      1620    
+ weighted-avg  0.68       0.64    0.65      1620    
+```
diff --git a/docs/_posts/Mary-Sci/2023-11-11-finner_aspect_based_sentiment_md_en.md b/docs/_posts/Mary-Sci/2023-11-11-finner_aspect_based_sentiment_md_en.md
new file mode 100644
index 0000000000..fb1df22a2a
--- /dev/null
+++ b/docs/_posts/Mary-Sci/2023-11-11-finner_aspect_based_sentiment_md_en.md
@@ -0,0 +1,136 @@
+---
+layout: model
+title: Financial NER on Aspect-Based Sentiment Analysis
+author: John Snow Labs
+name: finner_aspect_based_sentiment_md
+date: 2023-11-11
+tags: [ner, licensed, finance, en]
+task: Named Entity Recognition
+language: en
+edition: Finance NLP 1.0.0
+spark_version: 3.0
+supported: true
+annotator: FinanceNerModel
+article_header:
+type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+This NER model identifies entities that can be associated with a financial sentiment. The model is designed to be used with the associated Assertion Status model that classifies the entities into a sentiment category.
+
+## Predicted Entities
+
+`ASSET`, `CASHFLOW`, `EXPENSE`, `FREE_CASH_FLOW`, `GAINS`, `KPI`, `LIABILITY`, `LOSSES`, `PROFIT`, `REVENUE`
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finner_aspect_based_sentiment_md_en_1.0.0_3.0_1699704469251.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finner_aspect_based_sentiment_md_en_1.0.0_3.0_1699704469251.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+documentAssembler = nlp.DocumentAssembler()\
+    .setInputCol("text")\
+    .setOutputCol("document")
+
+# Sentence Detector annotator, processes various sentences per line
+sentenceDetector = nlp.SentenceDetector()\
+    .setInputCols(["document"])\
+    .setOutputCol("sentence")
+
+# Tokenizer splits words in a relevant format for NLP
+tokenizer = nlp.Tokenizer()\
+    .setInputCols(["sentence"])\
+    .setOutputCol("token")
+
+bert_embeddings = nlp.BertEmbeddings.pretrained("bert_embeddings_sec_bert_base", "en")\
+    .setInputCols("sentence", "token")\
+    .setOutputCol("embeddings")\
+    .setMaxSentenceLength(512)
+
+
+ner_model = finance.NerModel().pretrained("finner_aspect_based_sentiment_md", "en", "finance/models")\
+    .setInputCols(["sentence", "token", "embeddings"])\
+    .setOutputCol("ner")
+
+ner_converter = nlp.NerConverter()\
+    .setInputCols(["sentence","token","ner"])\
+    .setOutputCol("ner_chunk")
+
+nlpPipeline = nlp.Pipeline(stages=[
+        documentAssembler,
+        sentenceDetector,
+        tokenizer,
+        bert_embeddings,
+        ner_model,
+        ner_converter])
+
+empty_data = spark.createDataFrame([[""]]).toDF("text")
+model = nlpPipeline.fit(empty_data)
+
+text = ["""Equity and earnings of affiliates in Latin America increased to $4.8 million in the quarter from $2.2 million in the prior year as the commodity markets in Latin America remain strong through the end of the quarter."""]
+result = model.transform(spark.createDataFrame([text]).toDF("text"))
+
+from pyspark.sql import functions as F
+
+result.select(F.explode(F.arrays_zip(result.ner_chunk.result, result.ner_chunk.begin, result.ner_chunk.end, result.ner_chunk.metadata)).alias("cols")) \
+               .select(F.expr("cols['0']").alias("chunk"),
+                       F.expr("cols['1']").alias("begin"),
+                       F.expr("cols['2']").alias("end"),
+                       F.expr("cols['3']['entity']").alias("ner_label")
+                       ).show(100, truncate=False)
+```
+
+</div>
+
+## Results
+
+```bash
++--------+-----+---+---------+
+|chunk   |begin|end|ner_label|
++--------+-----+---+---------+
+|Equity  |1    |6  |LIABILITY|
+|earnings|12   |19 |PROFIT   |
++--------+-----+---+---------+
+```
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|finner_aspect_based_sentiment_md|
+|Compatibility:|Finance NLP 1.0.0+|
+|License:|Licensed|
+|Edition:|Official|
+|Input Labels:|[sentence, token, embeddings]|
+|Output Labels:|[ner]|
+|Language:|en|
+|Size:|16.5 MB|
+
+## Benchmarking
+
+```bash
+ label           precision  recall  f1-score  support 
+ ASSET           0.50       0.72    0.59      53      
+ CASHFLOW        0.78       0.60    0.68      30      
+ EXPENSE         0.71       0.68    0.70      151     
+ FREE_CASH_FLOW  1.00       1.00    1.00      19      
+ GAINS           0.80       0.78    0.79      55      
+ KPI             0.72       0.58    0.64      106     
+ LIABILITY       0.65       0.51    0.57      39      
+ LOSSES          0.77       0.59    0.67      29      
+ PROFIT          0.77       0.74    0.75      101     
+ REVENUE         0.74       0.78    0.76      231     
+ micro-avg       0.72       0.71    0.71      814     
+ macro-avg       0.74       0.70    0.71      814     
+ weighted-avg    0.73       0.71    0.71      814  
+```

From c3d98fab81c4f4fa3d950d699e518cfb832b59ba Mon Sep 17 00:00:00 2001
From: jsl-models <74001263+jsl-models@users.noreply.github.com>
Date: Tue, 19 Dec 2023 17:32:27 +0700
Subject: [PATCH 15/15] Add model 2023-12-07-finembeddings_bge_base_en (#812)

Co-authored-by: dcecchini <dadachini@hotmail.com>
---
 .../2023-12-07-finembeddings_bge_base_en.md   | 108 ++++++++++++++++++
 1 file changed, 108 insertions(+)
 create mode 100644 docs/_posts/dcecchini/2023-12-07-finembeddings_bge_base_en.md

diff --git a/docs/_posts/dcecchini/2023-12-07-finembeddings_bge_base_en.md b/docs/_posts/dcecchini/2023-12-07-finembeddings_bge_base_en.md
new file mode 100644
index 0000000000..206b7cb108
--- /dev/null
+++ b/docs/_posts/dcecchini/2023-12-07-finembeddings_bge_base_en.md
@@ -0,0 +1,108 @@
+---
+layout: model
+title: Finance Embeddings BGE Base
+author: John Snow Labs
+name: finembeddings_bge_base
+date: 2023-12-07
+tags: [finance, en, licensed, bge, embeddings, onnx]
+task: Embeddings
+language: en
+edition: Finance NLP 1.0.0
+spark_version: 3.0
+supported: true
+engine: onnx
+annotator: BertEmbeddings
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+This model is a legal version of the BGE base model fine-tuned on in-house curated datasets. Reference: Xiao, S., Liu, Z., Zhang, P., & Muennighof, N. (2023). C-pack: Packaged resources to advance general chinese embedding. arXiv preprint arXiv:2309.07597.
+
+## Predicted Entities
+
+
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/finance/models/finembeddings_bge_base_en_1.0.0_3.0_1701948521741.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/finance/models/finembeddings_bge_base_en_1.0.0_3.0_1701948521741.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+documentAssembler = nlp.DocumentAssembler() \
+    .setInputCol("text") \
+    .setOutputCol("document")
+
+tokenizer = nlp.Tokenizer() \
+    .setInputCols("document") \
+    .setOutputCol("token")
+
+bge = nlp.BertEmbeddings.pretrained("finembeddings_bge_base", "en", "finance/models")\
+    .setInputCols(["document", "token"])\
+    .setOutputCol("bge")
+
+pipeline = nlp.Pipeline(
+    stages = [
+        documentAssembler,
+        tokenizer,
+        bge
+  ])
+
+data = spark.createDataFrame([['
+    ''What is the best way to invest in the stock market?'''
+]]).toDF("text")
+
+result = pipeline.fit(data).transform(data)
+.selectExpr("explode(bge.embeddings) as bge_embeddings").show(truncate=100)
+```
+
+</div>
+
+## Results
+
+```bash
++----------------------------------------------------------------------------------------------------+
+|                                                                                      bge_embeddings|
++----------------------------------------------------------------------------------------------------+
+|[0.70071065, 0.8154926, 0.3667199, 0.49541458, 0.5675478, 0.47981235, 0.09903594, 1.0118086, -0.3...|
+|[0.5844246, 0.897823, 0.36319774, 0.33672202, 0.6926622, 0.62645215, 0.21583402, 0.99781555, -0.0...|
+|[0.5678047, 0.9290247, 0.19549623, 0.29991657, 0.6558282, 0.60267514, 0.2365676, 0.87947553, -0.1...|
+|[0.31799358, 0.60279167, 0.7648379, 0.2832115, 0.45711696, 0.12192034, -0.10309678, 1.1410849, -0...|
+|[1.0170714, 1.1024956, 0.59346, 0.4784618, 0.81034416, 0.2503267, -0.02142908, 0.6190611, -0.1401...|
+|[0.8248961, 1.1220868, 0.27929437, 0.20173876, 0.6809691, 0.6311508, 0.15206291, 0.8089775, 0.317...|
+|[0.76785743, 0.9963818, 0.21050292, 0.2416854, 1.0152707, 0.18767616, 0.27576423, 0.85077125, 0.3...|
+|[0.654324, 1.1681782, 0.17568657, 0.23243408, 0.76372075, 0.6539263, 0.2841307, 1.224574, 0.21359...|
+|[0.5922923, 1.2471354, 0.090304464, 0.48645073, 0.59852546, 0.8716394, 0.34509993, 0.9442089, 0.1...|
+|[0.72195786, 0.9363174, 0.06630206, 0.27642763, 0.7145356, 0.23325293, 0.12738094, 1.0298125, -0....|
+|[0.45599157, 0.9871535, 0.15671916, 0.17181304, 0.93662477, 0.27518728, -0.18060194, 0.93082047, ...|
+|[0.6865296, 1.052128, 0.2681757, 0.32934788, 0.47195143, 0.81678694, 0.012849957, 1.0271766, -0.0...|
++----------------------------------------------------------------------------------------------------+
+```
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|finembeddings_bge_base|
+|Compatibility:|Finance NLP 1.0.0+|
+|License:|Licensed|
+|Edition:|Official|
+|Input Labels:|[sentence, token]|
+|Output Labels:|[bge_embeddings]|
+|Language:|en|
+|Size:|397.2 MB|
+|Case sensitive:|false|
+
+## References
+
+In-house curated financial datasets.
\ No newline at end of file