### Install & Import Packages

In [2]:
%%capture
!pip install "flair" -q

In [3]:
from google.colab import drive
drive.mount('/content/drive')
import flair
from flair.data import Sentence
from flair.datasets import ColumnCorpus
from flair.embeddings import (
    WordEmbeddings, TransformerWordEmbeddings, StackedEmbeddings
)
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer
flair.__version__

'0.12.2'

### Import Data

In [4]:
DATA_PATH = "/content/drive/Shareddrives/CIS522-Project/data"
MODEL_PATH = "/content/drive/Shareddrives/CIS522-Project/models"

In [5]:
# Import data into flair using ColumnCorpus
corpus = ColumnCorpus(DATA_PATH, {0: "text", 1: "ner"}, train_file="flair_ner_train.txt", test_file="flair_ner_test.txt")
# Needed for model initialization
tag_dictionary = corpus.make_label_dictionary(label_type="ner", add_unk=False)
print(tag_dictionary.get_items())

2023-04-18 23:16:52,013 Reading data from /content/drive/Shareddrives/CIS522-Project/data
2023-04-18 23:16:52,018 Train: /content/drive/Shareddrives/CIS522-Project/data/flair_ner_train.txt
2023-04-18 23:16:52,019 Dev: None
2023-04-18 23:16:52,021 Test: /content/drive/Shareddrives/CIS522-Project/data/flair_ner_test.txt
2023-04-18 23:17:47,830 Computing label dictionary. Progress:


35399it [00:01, 26153.32it/s]

2023-04-18 23:17:49,228 Dictionary created for label 'ner' with 9 values: Drug (seen 84101 times), Strength (seen 59318 times), Form (seen 56550 times), Frequency (seen 48929 times), Route (seen 40228 times), Dosage (seen 32789 times), Reason (seen 13189 times), Duration (seen 3240 times), ADE (seen 2021 times)
['Drug', 'Strength', 'Form', 'Frequency', 'Route', 'Dosage', 'Reason', 'Duration', 'ADE']





### Initialize Weight Dictionary
This is the weight dictionary used by the loss function. The weight for a given entity is set to the ratio between the frequency of the most represented entity and the frequency of the given entity.

In [None]:
weight_dict = {
    'Drug': 84101/84101,
    'Strength': 84101/59318,
    'Form': 84101/56550,
    'Frequency': 84101/48929,
    'Route': 84101/40228,
    'Dosage': 84101/32789,
    'Reason': 84101/13189,
    'Duration': 84101/3240,
    'ADE': 84101/2021,
}

### Initialize/Load Embeddings & Model

In [None]:
# RUN ONLY DURING EMBEDDINGS/MODEL INITIALIZATION 
## Initialize fine-tuneable transformer embeddings WITH document context
# tf_embeddings = TransformerWordEmbeddings(
#     model='emilyalsentzer/Bio_ClinicalBERT',
#     layers="-1",
#     subtoken_pooling="mean",
#     fine_tune=True,
#     use_context=True,
#     model_max_length=256 # required to fix a weird tensor size mismatch error 
# )
# embedding_types = [
#     # word embeddings trained on PubMed and PMC
#     # WordEmbeddings("pubmed"),
#     tf_embeddings
# ]
# embeddings = StackedEmbeddings(embeddings=embedding_types)
## Initialize bare-bones sequence tagger (no CRF, no RNN, no reprojection)
# tf_tagger = SequenceTagger(
#     hidden_size=256,
#     embeddings=embeddings,
#     tag_dictionary=tag_dictionary,
#     tag_type='ner',
#     tag_format='BIOES',
#     use_crf=True,
#     use_rnn=False,
#     reproject_embeddings=False,
#     loss_weights=weight_dict
# )

# RUN WHEN A COPY OF THE MODEL HAS BEEN SAVED TO DRIVE
tf_tagger = SequenceTagger.load(f"{MODEL_PATH}/taggers/clinicalbert-crf/final-model.pt")

2023-04-15 02:21:10,106 SequenceTagger predicts: Dictionary with 39 tags: O, S-Drug, B-Drug, E-Drug, I-Drug, S-Strength, B-Strength, E-Strength, I-Strength, S-Form, B-Form, E-Form, I-Form, S-Frequency, B-Frequency, E-Frequency, I-Frequency, S-Route, B-Route, E-Route, I-Route, S-Dosage, B-Dosage, E-Dosage, I-Dosage, S-Reason, B-Reason, E-Reason, I-Reason, S-Duration, B-Duration, E-Duration, I-Duration, S-ADE, B-ADE, E-ADE, I-ADE, <START>, <STOP>


### Train Model

In [None]:
# Initialize trainer
trainer = ModelTrainer(tf_tagger, corpus)

# Train on corpus
trainer.train(
    base_path=f"{MODEL_PATH}/taggers/clinicalbert-crf",
    train_with_dev=False,
    max_epochs=50,
    learning_rate=0.005,
    mini_batch_size=16,
    embeddings_storage_mode='none'
)

### Evaluate Model

In [6]:
tf_tagger = SequenceTagger.load(f"{MODEL_PATH}/taggers/clinicalbert-crf/final-model.pt")
result = tf_tagger.evaluate(corpus.test, gold_label_type='ner', mini_batch_size=64)
print(result.detailed_results)

Downloading (…)lve/main/config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

2023-04-18 23:18:11,439 SequenceTagger predicts: Dictionary with 39 tags: O, S-Drug, B-Drug, E-Drug, I-Drug, S-Strength, B-Strength, E-Strength, I-Strength, S-Form, B-Form, E-Form, I-Form, S-Frequency, B-Frequency, E-Frequency, I-Frequency, S-Route, B-Route, E-Route, I-Route, S-Dosage, B-Dosage, E-Dosage, I-Dosage, S-Reason, B-Reason, E-Reason, I-Reason, S-Duration, B-Duration, E-Duration, I-Duration, S-ADE, B-ADE, E-ADE, I-ADE, <START>, <STOP>


100%|██████████| 397/397 [13:19<00:00,  2.01s/it]


2023-04-18 23:31:32,356 Evaluating as a multi-label problem: False

Results:
- F-score (micro) 0.9121
- F-score (macro) 0.8242
- Accuracy 0.8446

By class:
              precision    recall  f1-score   support

        Drug     0.8968    0.9390    0.9174     61167
    Strength     0.9414    0.9587    0.9500     42957
        Form     0.9257    0.9212    0.9234     41417
   Frequency     0.8670    0.8726    0.8698     36495
       Route     0.9457    0.9617    0.9536     30583
      Dosage     0.9236    0.9473    0.9353     23506
      Reason     0.6905    0.7782    0.7317      9533
    Duration     0.7659    0.7876    0.7766      1982
         ADE     0.4224    0.3141    0.3603      1299

   micro avg     0.9018    0.9227    0.9121    248939
   macro avg     0.8199    0.8312    0.8242    248939
weighted avg     0.9020    0.9227    0.9121    248939



In [None]:
# Create example sentence
sentence = Sentence("Patients on 40 mg of Topelfate and Topoxy twice a day for stomachache generally suffer from headache")

# Token level predictions
tf_tagger.predict(sentence, force_token_predictions=True)
print(sentence.to_tagged_string())

# Predict tags and print
tf_tagger.predict(sentence)
print(sentence.to_tagged_string())

Sentence[15]: "Patients on 40 mg of Topelfate and Topoxy twice a day generally suffer from headache" → ["40"/B-Strength, "mg"/E-Strength, "Topelfate"/S-Drug, "Topoxy"/S-Drug, "twice"/B-Frequency, "a"/I-Frequency, "day"/E-Frequency, "headache"/S-Reason]
Sentence[15]: "Patients on 40 mg of Topelfate and Topoxy twice a day generally suffer from headache" → ["40 mg"/Strength, "Topelfate"/Drug, "Topoxy"/Drug, "twice a day"/Frequency, "headache"/Reason]
