From edca9323e381eb91b3693ea2d6c542d8da262ece Mon Sep 17 00:00:00 2001
From: Akar <67700732+akrztrk@users.noreply.github.com>
Date: Fri, 3 May 2024 19:45:27 +0200
Subject: [PATCH] updated md cards (#1171)
---
...obertresolve_umls_clinical_drugs_en_3_0.md | 57 ++++--
...-sbiobertresolve_umls_drug_substance_en.md | 170 +++++++++---------
...2021-12-23-sbiobert_jsl_rxnorm_cased_en.md | 15 +-
3 files changed, 138 insertions(+), 104 deletions(-)
diff --git a/docs/_posts/Ahmetemintek/2022-07-05-sbiobertresolve_umls_clinical_drugs_en_3_0.md b/docs/_posts/Ahmetemintek/2022-07-05-sbiobertresolve_umls_clinical_drugs_en_3_0.md
index e731d312d0..da24f24545 100644
--- a/docs/_posts/Ahmetemintek/2022-07-05-sbiobertresolve_umls_clinical_drugs_en_3_0.md
+++ b/docs/_posts/Ahmetemintek/2022-07-05-sbiobertresolve_umls_clinical_drugs_en_3_0.md
@@ -37,6 +37,7 @@ This model maps clinical entities to UMLS CUI codes. It is trained on 2022AA UML
{% include programmingLanguageSelectScalaPythonNLU.html %}
+
```python
...
document_assembler = DocumentAssembler()\
@@ -55,16 +56,18 @@ word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "c
.setInputCols(["sentence", "token"])\
.setOutputCol("embeddings")
-ner_model = MedicalNerModel.pretrained("ner_clinical", "en", "clinical/models")\
- .setInputCols(["sentence", "token", "embeddings"])\
- .setOutputCol("clinical_ner")
+ner_model = MedicalNerModel.pretrained("ner_posology_greedy", "en", "clinical/models")\
+ .setInputCols(["sentence", "token", "embeddings"])\
+ .setOutputCol("posology_ner")
ner_model_converter = NerConverterInternal()\
- .setInputCols(["sentence", "token", "clinical_ner"])\
- .setOutputCol("ner_chunk")
-
+ .setInputCols(["sentence", "token", "posology_ner"])\
+ .setOutputCol("posology_ner_chunk")\
+ .setWhiteList(["DRUG"])
-chunk2doc = Chunk2Doc().setInputCols("ner_chunk").setOutputCol("ner_chunk_doc")
+chunk2doc = Chunk2Doc()\
+ .setInputCols("posology_ner_chunk")\
+ .setOutputCol("ner_chunk_doc")
sbert_embedder = BertSentenceEmbeddings\
.pretrained("sbiobert_base_cased_mli","en","clinical/models")\
@@ -77,7 +80,17 @@ resolver = SentenceEntityResolverModel\
.setOutputCol("resolution")\
.setDistanceFunction("EUCLIDEAN")
-pipeline = Pipeline(stages = [document_assembler, sentence_detector, tokenizer, word_embeddings, ner_model, ner_model_converter, chunk2doc, sbert_embedder, resolver])
+pipeline = Pipeline(stages = [
+ document_assembler,
+ sentence_detector,
+ tokenizer,
+ word_embeddings,
+ ner_model,
+ ner_model_converter,
+ chunk2doc,
+ sbert_embedder,
+ resolver
+])
data = spark.createDataFrame([["""She was immediately given hydrogen peroxide 30 mg to treat the infection on her leg, and has been advised Neosporin Cream for 5 days. She has a history of taking magnesium hydroxide 100mg/1ml and metformin 1000 mg."""]]).toDF("text")
@@ -102,16 +115,18 @@ val word_embeddings = WordEmbeddingsModel
.setInputCols(Array("sentence", "token"))
.setOutputCol("embeddings")
-val ner_model = MedicalNerModel
- .pretrained("ner_clinical", "en", "clinical/models")
- .setInputCols(Array("sentence", "token", "embeddings"))
- .setOutputCol("clinical_ner")
+val ner_model = MedicalNerModel.pretrained("ner_posology_greedy", "en", "clinical/models")
+ .setInputCols(["sentence", "token", "embeddings"])
+ .setOutputCol("posology_ner")
val ner_model_converter = new NerConverterInternal()
- .setInputCols(Array("sentence", "token", "clinical_ner"))
- .setOutputCol("ner_chunk")
+ .setInputCols(["sentence", "token", "posology_ner"])
+ .setOutputCol("posology_ner_chunk")
+ .setWhiteList(["DRUG"])
-val chunk2doc = Chunk2Doc().setInputCols("ner_chunk").setOutputCol("ner_chunk_doc")
+chunk2doc = new Chunk2Doc()
+ .setInputCols("posology_ner_chunk")
+ .setOutputCol("ner_chunk_doc")
val sbert_embedder = BertSentenceEmbeddings
.pretrained("sbiobert_base_cased_mli", "en","clinical/models")
@@ -125,7 +140,17 @@ val resolver = SentenceEntityResolverModel
.setOutputCol("resolution")
.setDistanceFunction("EUCLIDEAN")
-val pipeline = new Pipeline().setStages(Array(document_assembler, sentence_detector, tokenizer, word_embeddings, ner_model, ner_model_converter, chunk2doc, sbert_embedder, resolver))
+val pipeline = new Pipeline().setStages(Array(
+ document_assembler,
+ sentence_detector,
+ tokenizer,
+ word_embeddings,
+ ner_model,
+ ner_model_converter,
+ chunk2doc,
+ sbert_embedder,
+ resolver
+))
val data = Seq("She was immediately given hydrogen peroxide 30 mg to treat the infection on her leg, and has been advised Neosporin Cream for 5 days. She has a history of taking magnesium hydroxide 100mg/1ml and metformin 1000 mg.").toDF("text")
diff --git a/docs/_posts/HashamUlHaq/2021-12-06-sbiobertresolve_umls_drug_substance_en.md b/docs/_posts/HashamUlHaq/2021-12-06-sbiobertresolve_umls_drug_substance_en.md
index 63596dc82f..a136e6ff22 100644
--- a/docs/_posts/HashamUlHaq/2021-12-06-sbiobertresolve_umls_drug_substance_en.md
+++ b/docs/_posts/HashamUlHaq/2021-12-06-sbiobertresolve_umls_drug_substance_en.md
@@ -38,103 +38,114 @@ This model maps clinical entities to UMLS CUI codes. It is trained on `2021AB` U
```python
documentAssembler = DocumentAssembler()\
- .setInputCol('text')\
- .setOutputCol('document')
+ .setInputCol("text")\
+ .setOutputCol("document")
-sentenceDetector = SentenceDetector() \
- .setInputCols(["document"]) \
+sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\
+ .setInputCols(["document"])\
.setOutputCol("sentence")
-tokenizer = Tokenizer() \
- .setInputCols(["sentence"]) \
+tokenizer = Tokenizer()\
+ .setInputCols(["sentence"])\
.setOutputCol("token")
-stopwords = StopWordsCleaner.pretrained()\
- .setInputCols("token")\
- .setOutputCol("cleanTokens")\
- .setCaseSensitive(False)
-
-word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
- .setInputCols(["sentence", "cleanTokens"])\
+word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical","en","clinical/models")\
+ .setInputCols(["sentence","token"])\
.setOutputCol("embeddings")
+ner_model = MedicalNerModel.pretrained("ner_posology_greedy","en","clinical/models")\
+ .setInputCols(["sentence","token","embeddings"])\
+ .setOutputCol("posology_ner")
-clinical_ner = MedicalNerModel.pretrained("ner_clinical", "en", "clinical/models") \
- .setInputCols(["sentence", "token", "embeddings"]) \
- .setOutputCol("ner")
-
-ner_converter = NerConverter() \
- .setInputCols(["sentence", "cleanTokens", "ner"]) \
- .setOutputCol("ner_chunk")
+ner_model_converter = NerConverterInternal()\
+ .setInputCols(["sentence","token","posology_ner"])\
+ .setOutputCol("posology_ner_chunk")\
+ .setWhiteList(["DRUG"])
chunk2doc = Chunk2Doc()\
- .setInputCols("ner_chunk")\
+ .setInputCols("posology_ner_chunk")\
.setOutputCol("ner_chunk_doc")
-sbert_embedder = BertSentenceEmbeddings\
- .pretrained("sbiobert_base_cased_mli",'en','clinical/models')\
+sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli",'en','clinical/models')\
.setInputCols(["ner_chunk_doc"])\
- .setOutputCol("sbert_embeddings").setCaseSensitive(False)
+ .setOutputCol("sbert_embeddings")\
+ .setCaseSensitive(False)
resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_umls_drug_substance","en", "clinical/models") \
- .setInputCols(["sbert_embeddings"]) \
- .setOutputCol("resolution")\
- .setDistanceFunction("EUCLIDEAN")
-
-pipeline = Pipeline(stages = [documentAssembler, sentenceDetector, tokenizer, stopwords, word_embeddings, clinical_ner, ner_converter, chunk2doc, sbert_embedder, resolver])
-
-data = spark.createDataFrame([[""]]).toDF("text")
-
-model = LightPipeline(pipeline.fit(data))
-
-results = model.fullAnnotate(['Dilaudid', 'Hydromorphone', 'Exalgo', 'Palladone', 'Hydrogen peroxide 30 mg', 'Neosporin Cream', 'Magnesium hydroxide 100mg/1ml', 'Metformin 1000 mg'])
+ .setInputCols(["sbert_embeddings"]) \
+ .setOutputCol("resolution")\
+ .setDistanceFunction("EUCLIDEAN")
+
+pipeline = Pipeline(stages=[
+ documentAssembler,
+ sentenceDetector,
+ tokenizer,
+ word_embeddings,
+ ner_model,
+ ner_model_converter,
+ chunk2doc,
+ sbert_embedder,
+ resolver
+])
+
+
+data = spark.createDataFrame([["She was immediately given hydrogen peroxide 30 mg to treat the infection on her leg, and has been advised Neosporin Cream for 5 days. She has a history of taking magnesium hydroxide 100mg/1ml and metformin 1000 mg."]]).toDF("text")
+
+result = pipeline.fit(data).transform(data)
```
```scala
-val documentAssembler = new DocumentAssembler()
- .setInputCol("text")
- .setOutputCol("document")
+val document_assembler = new DocumentAssembler()
+ .setInputCol("text")
+ .setOutputCol("document")
-val sentenceDetector = new SentenceDetector()
- .setInputCols("document")
- .setOutputCol("sentence")
+val sentence_detector = new SentenceDetector()
+ .setInputCols(Array("document"))
+ .setOutputCol("sentence")
val tokenizer = new Tokenizer()
- .setInputCols("sentence")
- .setOutputCol("token")
+ .setInputCols("sentence")
+ .setOutputCol("token")
-val stopwords = StopWordsCleaner.pretrained()
- .setInputCols("token")
- .setOutputCol("cleanTokens")
- .setCaseSensitive(False)
-
-val word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
- .setInputCols(Array("sentence", "cleanTokens"))
- .setOutputCol("embeddings")
+val word_embeddings = WordEmbeddingsModel
+ .pretrained("embeddings_clinical", "en", "clinical/models")
+ .setInputCols(Array("sentence", "token"))
+ .setOutputCol("embeddings")
-val clinical_ner = MedicalNerModel.pretrained("ner_clinical", "en", "clinical/models")
- .setInputCols(Array("sentence", "token", "embeddings"))
- .setOutputCol("ner")
+val ner_model = MedicalNerModel.pretrained("ner_posology_greedy", "en", "clinical/models")
+ .setInputCols(Array("sentence", "token", "embeddings"))
+ .setOutputCol("posology_ner")
-val ner_converter = new NerConverter()
- .setInputCols(Array("sentence", "cleanTokens", "ner"))
- .setOutputCol("ner_chunk")
+val ner_model_converter = new NerConverterInternal()
+ .setInputCols(Array("sentence", "token", "posology_ner"))
+ .setOutputCol("posology_ner_chunk")
+ .setWhiteList(["DRUG"])
val chunk2doc = new Chunk2Doc()
- .setInputCols("ner_chunk")
- .setOutputCol("ner_chunk_doc")
+ .setInputCols("posology_ner_chunk")
+ .setOutputCol("ner_chunk_doc")
val sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli", "en","clinical/models")
- .setInputCols("ner_chunk_doc")
- .setOutputCol("sbert_embeddings")
-
+ .setInputCols(Array("ner_chunk_doc"))
+ .setOutputCol("sbert_embeddings")
+ .setCaseSensitive(False)
+
val resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_umls_drug_substance", "en", "clinical/models")
- .setInputCols(Array("sbert_embeddings"))
- .setOutputCol("resolution")
- .setDistanceFunction("EUCLIDEAN")
-
-val p_model = new PipelineModel().setStages(Array(documentAssembler, sentenceDetector, tokenizer, stopwords, word_embeddings, clinical_ner, ner_converter, chunk2doc, sbert_embedder, resolver))
-
-val data = Seq("""'Dilaudid', 'Hydromorphone', 'Exalgo', 'Palladone', 'Hydrogen peroxide 30 mg', 'Neosporin Cream', 'Magnesium hydroxide 100mg/1ml', 'Metformin 1000 mg'""").toDS().toDF("text")
+ .setInputCols(Array("sbert_embeddings"))
+ .setOutputCol("resolution")
+ .setDistanceFunction("EUCLIDEAN")
+
+val p_model = new Pipeline().setStages(Array(
+ document_assembler,
+ sentence_detector,
+ tokenizer,
+ word_embeddings,
+ ner_model,
+ ner_model_converter,
+ chunk2doc,
+ sbert_embedder,
+ resolver))
+
+val data = Seq("She was immediately given hydrogen peroxide 30 mg to treat the infection on her leg, and has been advised Neosporin Cream for 5 days. She has a history of taking magnesium hydroxide 100mg/1ml and metformin 1000 mg.").toDF("text")
val res = p_model.fit(data).transform(data)
```
@@ -143,7 +154,7 @@ val res = p_model.fit(data).transform(data)
{:.nlu-block}
```python
import nlu
-nlu.load("en.resolve.umls_drug_substance").predict("""Magnesium hydroxide 100mg/1ml""")
+nlu.load("en.resolve.umls_drug_substance").predict("She was immediately given hydrogen peroxide 30 mg to treat the infection on her leg, and has been advised Neosporin Cream for 5 days. She has a history of taking magnesium hydroxide 100mg/1ml and metformin 1000 mg.")
```
@@ -151,17 +162,14 @@ nlu.load("en.resolve.umls_drug_substance").predict("""Magnesium hydroxide 100mg/
## Results
```bash
-| | chunk | code | code_description | all_k_code_desc | all_k_codes |
-|---:|:------------------------------|:---------|:---------------------------|:-------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| 0 | Dilaudid | C0728755 | dilaudid | ['C0728755', 'C0719907', 'C1448344', 'C0305924', 'C1569295'] | ['dilaudid', 'Dilaudid HP', 'Disthelm', 'Dilaudid Injection', 'Distaph'] |
-| 1 | Hydromorphone | C0012306 | HYDROMORPHONE | ['C0012306', 'C0700533', 'C1646274', 'C1170495', 'C0498841'] | ['HYDROMORPHONE', 'Hydromorphone HCl', 'Phl-HYDROmorphone', 'PMS HYDROmorphone', 'Hydromorphone injection'] |
-| 2 | Exalgo | C2746500 | Exalgo | ['C2746500', 'C0604734', 'C1707065', 'C0070591', 'C3660437'] | ['Exalgo', 'exaltolide', 'Exelgyn', 'Extacol', 'exserohilone'] |
-| 3 | Palladone | C0730726 | palladone | ['C0730726', 'C0594402', 'C1655349', 'C0069952', 'C2742475'] | ['palladone', 'Palladone-SR', 'Palladone IR', 'palladiazo', 'palladia'] |
-| 4 | Hydrogen peroxide 30 mg | C1126248 | hydrogen peroxide 30 MG/ML | ['C1126248', 'C0304655', 'C1605252', 'C0304656', 'C1154260'] | ['hydrogen peroxide 30 MG/ML', 'Hydrogen peroxide solution 30%', 'hydrogen peroxide 30 MG/ML [Proxacol]', 'Hydrogen peroxide 30 mg/mL cutaneous solution', 'benzoyl peroxide 30 MG/ML'] |
-| 5 | Neosporin Cream | C0132149 | Neosporin Cream | ['C0132149', 'C0306959', 'C4722788', 'C0704071', 'C0698988'] | ['Neosporin Cream', 'Neosporin Ointment', 'Neomycin Sulfate Cream', 'Neosporin Topical Ointment', 'Naseptin cream'] |
-| 6 | Magnesium hydroxide 100mg/1ml | C1134402 | magnesium hydroxide 100 MG | ['C1134402', 'C1126785', 'C4317023', 'C4051486', 'C4047137'] | ['magnesium hydroxide 100 MG', 'magnesium hydroxide 100 MG/ML', 'Magnesium sulphate 100mg/mL injection', 'magnesium sulfate 100 MG', 'magnesium sulfate 100 MG/ML'] |
-| 7 | Metformin 1000 mg | C0987664 | metformin 1000 MG | ['C0987664', 'C2719784', 'C0978482', 'C2719786', 'C4282269'] | ['metformin 1000 MG', 'metFORMIN hydrochloride 1000 MG', 'METFORMIN HCL 1000MG TAB', 'metFORMIN hydrochloride 1000 MG [Fortamet]', 'METFORMIN HCL 1000MG SA TAB'] |
-
++-----------------------------+-----+---+------+---------+--------------------------+------------------------------------------------------------+------------------------------------------------------------+
+| ner_chunk|begin|end|entity|umls_code| description| all_k_results| all_k_resolutions|
++-----------------------------+-----+---+------+---------+--------------------------+------------------------------------------------------------+------------------------------------------------------------+
+| hydrogen peroxide 30 mg| 26| 48| DRUG| C1126248|hydrogen peroxide 30 MG/ML|C1126248:::C0304655:::C1605252:::C0304656:::C1154260:::C2...|hydrogen peroxide 30 MG/ML:::Hydrogen peroxide solution 3...|
+| Neosporin Cream| 106|120| DRUG| C0132149| Neosporin Cream|C0132149:::C0306959:::C4722788:::C0704071:::C0698988:::C1...|Neosporin Cream:::Neosporin Ointment:::Neomycin Sulfate C...|
+|magnesium hydroxide 100mg/1ml| 162|190| DRUG| C1134402|magnesium hydroxide 100 MG|C1134402:::C1126785:::C4317023:::C4051486:::C4047137:::C1...|magnesium hydroxide 100 MG:::magnesium hydroxide 100 MG/M...|
+| metformin 1000 mg| 196|212| DRUG| C0987664| metformin 1000 MG|C0987664:::C2719784:::C0978482:::C2719786:::C4282269:::C2...|metformin 1000 MG:::metFORMIN hydrochloride 1000 MG:::MET...|
++-----------------------------+-----+---+------+---------+--------------------------+------------------------------------------------------------+------------------------------------------------------------+
```
{:.model-param}
diff --git a/docs/_posts/galiph/2021-12-23-sbiobert_jsl_rxnorm_cased_en.md b/docs/_posts/galiph/2021-12-23-sbiobert_jsl_rxnorm_cased_en.md
index 7e5648f6b4..3982ac0d6d 100644
--- a/docs/_posts/galiph/2021-12-23-sbiobert_jsl_rxnorm_cased_en.md
+++ b/docs/_posts/galiph/2021-12-23-sbiobert_jsl_rxnorm_cased_en.md
@@ -5,7 +5,7 @@ author: John Snow Labs
name: sbiobert_jsl_rxnorm_cased
date: 2021-12-23
tags: [licensed, embeddings, clinical, en]
-task: Entity Resolution
+task: Embeddings
language: en
nav_key: models
edition: Healthcare NLP 3.3.4
@@ -37,15 +37,16 @@ This model maps sentences & documents to a 768 dimensional dense vector space by
{% include programmingLanguageSelectScalaPythonNLU.html %}
+
```python
sentence_embeddings = BertSentenceEmbeddings.pretrained("sbiobert_jsl_rxnorm_cased", "en", "clinical/models")\
-.setInputCols(["sentence"])\
-.setOutputCol("sbioert_embeddings")
+ .setInputCols(["sentence"])\
+ .setOutputCol("sbioert_embeddings")
```
```scala
-val sentence_embeddings = BertSentenceEmbeddings.pretrained('sbiobert_jsl_rxnorm_cased', 'en','clinical/models')
-.setInputCols("sentence")
-.setOutputCol("sbioert_embeddings")
+val sentence_embeddings = BertSentenceEmbeddings.pretrained("sbiobert_jsl_rxnorm_cased", "en","clinical/models")
+ .setInputCols("sentence")
+ .setOutputCol("sbioert_embeddings")
```
@@ -73,4 +74,4 @@ Gives a 768-dimensional vector representation of the sentence.
|License:|Licensed|
|Edition:|Official|
|Language:|en|
-|Size:|402.0 MB|
\ No newline at end of file
+|Size:|402.0 MB|