### Install & Import Packages

In [2]:
%%capture
!pip install "flair" -q

In [3]:
from google.colab import drive
drive.mount('/content/drive')
import flair
from flair.data import Sentence
from flair.datasets import ColumnCorpus
from flair.embeddings import (
    WordEmbeddings, FlairEmbeddings, StackedEmbeddings
)
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer
flair.__version__

'0.12.2'

### Import Data

In [4]:
DATA_PATH = "/content/drive/Shareddrives/CIS522-Project/data"
MODEL_PATH = "/content/drive/Shareddrives/CIS522-Project/models"

In [5]:
# Import data into flair using ColumnCorpus
corpus = ColumnCorpus(DATA_PATH, {0: "text", 1: "ner"}, train_file="flair_ner_train_augmented.txt", test_file="flair_ner_test.txt")
# Needed for model initialization
tag_dictionary = corpus.make_label_dictionary(label_type="ner", add_unk=False)
print(tag_dictionary.get_items())

2023-04-18 23:08:35,034 Reading data from /content/drive/Shareddrives/CIS522-Project/data
2023-04-18 23:08:35,039 Train: /content/drive/Shareddrives/CIS522-Project/data/flair_ner_train_augmented.txt
2023-04-18 23:08:35,043 Dev: None
2023-04-18 23:08:35,047 Test: /content/drive/Shareddrives/CIS522-Project/data/flair_ner_test.txt
2023-04-18 23:09:01,917 Computing label dictionary. Progress:


2776it [00:00, 34206.42it/s]

2023-04-18 23:09:02,047 Dictionary created for label 'ner' with 9 values: Drug (seen 5601 times), ADE (seen 4545 times), Reason (seen 1268 times), Strength (seen 953 times), Route (seen 809 times), Frequency (seen 709 times), Form (seen 607 times), Dosage (seen 584 times), Duration (seen 123 times)
['Drug', 'ADE', 'Reason', 'Strength', 'Route', 'Frequency', 'Form', 'Dosage', 'Duration']





### Initialize Weight Dictionary
This is the weight dictionary used by the loss function. The weight for a given entity is set to the ratio between the frequency of the most represented entity and the frequency of the given entity.

In [None]:
weight_dict = {
    'Drug': 5601/5601,
    'Strength': 5601/953,
    'Form': 5601/607,
    'Frequency': 5601/709,
    'Route': 5601/809,
    'Dosage': 5601/584,
    'Reason': 5601/1268,
    'Duration': 5601/123,
    'ADE': 5601/4545
}

### Load Model

In [None]:
tagger = SequenceTagger.load(f"{MODEL_PATH}/taggers/lstm-crf/final-model.pt")
tagger.weight_dict = weight_dict

2023-04-15 19:00:47,079 SequenceTagger predicts: Dictionary with 39 tags: O, S-Drug, B-Drug, E-Drug, I-Drug, S-Strength, B-Strength, E-Strength, I-Strength, S-Form, B-Form, E-Form, I-Form, S-Frequency, B-Frequency, E-Frequency, I-Frequency, S-Route, B-Route, E-Route, I-Route, S-Dosage, B-Dosage, E-Dosage, I-Dosage, S-Reason, B-Reason, E-Reason, I-Reason, S-Duration, B-Duration, E-Duration, I-Duration, S-ADE, B-ADE, E-ADE, I-ADE, <START>, <STOP>


### Fine-Tune Model

In [None]:
# Initialize trainer
trainer = ModelTrainer(tagger, corpus)

# Train on corpus
trainer.train(
    base_path=f"{MODEL_PATH}/taggers/lstm-crf-augmented",
    train_with_dev=False,
    max_epochs=20,
    patience=2,
    learning_rate=0.1,
    mini_batch_size=64,
    embeddings_storage_mode='none'
)

### Evalulate Model

In [6]:
tagger = SequenceTagger.load(f"{MODEL_PATH}/taggers/lstm-crf-augmented/final-model.pt")
result = tagger.evaluate(corpus.test, gold_label_type='ner', mini_batch_size=64)
print(result.detailed_results)

2023-04-18 23:09:34,137 SequenceTagger predicts: Dictionary with 39 tags: O, S-Drug, B-Drug, E-Drug, I-Drug, S-Strength, B-Strength, E-Strength, I-Strength, S-Form, B-Form, E-Form, I-Form, S-Frequency, B-Frequency, E-Frequency, I-Frequency, S-Route, B-Route, E-Route, I-Route, S-Dosage, B-Dosage, E-Dosage, I-Dosage, S-Reason, B-Reason, E-Reason, I-Reason, S-Duration, B-Duration, E-Duration, I-Duration, S-ADE, B-ADE, E-ADE, I-ADE, <START>, <STOP>


100%|██████████| 397/397 [05:23<00:00,  1.23it/s]


2023-04-18 23:14:58,866 Evaluating as a multi-label problem: False

Results:
- F-score (micro) 0.9026
- F-score (macro) 0.8208
- Accuracy 0.8303

By class:
              precision    recall  f1-score   support

        Drug     0.8785    0.9489    0.9124     61167
    Strength     0.9333    0.9606    0.9468     42957
        Form     0.9123    0.9108    0.9116     41417
   Frequency     0.8539    0.8562    0.8551     36495
       Route     0.9592    0.9429    0.9510     30583
      Dosage     0.9084    0.9172    0.9128     23506
      Reason     0.8111    0.7028    0.7531      9533
         ADE     0.2376    0.6474    0.3476      1299
    Duration     0.8208    0.7740    0.7967      1982

   micro avg     0.8907    0.9149    0.9026    248939
   macro avg     0.8128    0.8512    0.8208    248939
weighted avg     0.8963    0.9149    0.9046    248939



In [None]:
# Create example sentence
sentence = Sentence("Patients on 40 mg of Topelfate and Topoxy twice a day generally suffer from headache")

# Token level predictions
tagger.predict(sentence, force_token_predictions=True)
print(sentence.to_tagged_string())

# Predict tags and print
tagger.predict(sentence)
print(sentence.to_tagged_string())

Sentence[15]: "Patients on 40 mg of Topelfate and Topoxy twice a day generally suffer from headache" → ["40"/B-Strength, "mg"/E-Strength, "Topelfate"/S-Drug, "Topoxy"/S-Drug, "twice"/B-Frequency, "a"/I-Frequency, "day"/E-Frequency, "headache"/S-ADE]
Sentence[15]: "Patients on 40 mg of Topelfate and Topoxy twice a day generally suffer from headache" → ["40 mg"/Strength, "Topelfate"/Drug, "Topoxy"/Drug, "twice a day"/Frequency, "headache"/ADE]
