In [1]:
import json
import os
from datetime import date
from medcat.cat import CAT
from medcat.meta_cat import MetaCAT
from medcat.config_meta_cat import ConfigMetaCAT
from medcat.tokenizers.meta_cat_tokenizers import TokenizerWrapperBERT

In [2]:
# if you want to enable info level logging
import logging
logging.basicConfig(level=logging.INFO,force=True)

### Load the model pack with MetaCATs


In [None]:
model_pack = '<enter path to the model pack>' # .zip model pack location 
mctrainer_export = "<enter mct export location>"  # name of your mct export

In [4]:
# Load model
cat = CAT.load_model_pack(model_pack)

In [5]:

# Check what meta cat models are in this model pack.
print(f'There are: {len(cat._meta_cats)} meta cat models in this model pack.')

There are: 3 meta cat models in this model pack.


In [6]:
print(cat._meta_cats[0])

{
  "Category Name": "Temporality",
  "Description": "No description",
  "Classes": {
    "Past": 0,
    "Recent": 1,
    "Future": 2
  },
  "Model": "bert"
}


In [7]:
print(cat._meta_cats[1])

{
  "Category Name": "Presence",
  "Description": "No description",
  "Classes": {
    "Hypothetical (N/A)": 1,
    "Not present (False)": 0,
    "Present (True)": 2
  },
  "Model": "bert"
}


In [8]:
print(cat._meta_cats[2])

{
  "Category Name": "Experiencer",
  "Description": "No description",
  "Classes": {
    "Family": 1,
    "Other": 0,
    "Patient": 2
  },
  "Model": "bert"
}


<b> NOTE: </b> 
 The name for the classification task can vary. E.g: The Category Name for 'Experiencer' can be 'Subject', as it has been configured an annoated in MedCATTrainer this way, but the model expects 'Experiencer'
 
 To accomodate for this, we have a list that stores the variations for the alternate names. This attribute can be found under `mc.config.general.alternative_category_names`

E.g. for Experiencer, it will be pre-loaded as alternative_category_names = ['Experiencer','Subject']

Set this list to ensure during training / fine-tuning the model is aware of alternative names for classes.

In [None]:
print(cat._meta_cats[0].config.general.alternative_category_names)

💡 In case you are using older modelpacks, the above field will be empty. In that case, 

In [None]:
# Only run in case the above output is an empty list
category_name_mapping = [["Presence"],["Temporality","Time"],["Experiencer","Subject"]]
lookup = {item: group for group in category_name_mapping for item in group}

for meta_model in range(len(cat._meta_cats)):
    cat._meta_cats[meta_model].config.general.alternative_category_names = lookup.get(cat._meta_cats[meta_model].config.general.category_name)

<b> NOTE: </b> 
 The name for the classes can vary too. Some sites may have trained a MetaCAT model for the same task, but called a class value a slightly different name.
 
 E.g: For the Presence task, the class name can be 'Not present (False)' or 'False'
 
 To accomodate for this, we have a mapping that stores the variations for the alternate names. This attribute can be found under `mc.config.general.alternative_class_names`

 E.g. for Presence, it will be pre-loaded as alternative_class_names = [["Hypothetical (N/A)","Hypothetical"],["Not present (False)","False"],["Present (True)","True"]]

In [None]:
print(cat._meta_cats[0].config.general.alternative_class_names)

💡 In case you are using older modelpacks, the above field will be empty. In that case, please run the following code:

In [None]:
# Only run in case the above output is an empty list
class_name_mapping =  {
    "Temporality": [["Past"], ["Recent", "Present"], ["Future"]],
    "Time": [["Past"], ["Recent", "Present"], ["Future"]],
    "Experiencer": [["Family"], ["Other"], ["Patient"]],
    "Subject": [["Family"], ["Other"], ["Patient"]],
    "Presence": [["Hypothetical (N/A)", "Hypothetical"], ["Not present (False)", "False"], ["Present (True)", "True"]]
}

for meta_model in range(len(cat._meta_cats)):
    cat._meta_cats[meta_model].config.general.alternative_class_names = class_name_mapping[cat._meta_cats[meta_model].config.general.category_name]

# For LSTM and BERT model

In [None]:
# Train the first meta cat model - 'Temporality' Task.
meta_cat = cat._meta_cats[0]

# to overwrite the existing model, resave the fine-tuned model with the same model pack dir
meta_cat_task = meta_cat.config.general.category_name
model_pack_dir = '<enter path to meta model pack>'
save_dir_path = os.path.join(model_pack_dir,"meta_"+ meta_cat_task)

# to save the new model elsewhere, uncomment the below line
#save_dir_path= "test_meta_"+meta_cat_task # Where to save the meta_model and results. 

# train the meta_model
results = meta_cat.train_from_json(mctrainer_export, save_dir_path=save_dir_path)

# Save results
json.dump(results['report'], open(os.path.join(save_dir_path,'meta_'+meta_cat_task+'_results.json'), 'w'))

## If you dont have the model packs, and are training from scratch
<b>⚠️This is very rare, it is recommended to always use the model packs and then fine-tune them</b>

In [None]:
config = ConfigMetaCAT()
# make sure to change the following parameters:
# config.model['nclasses']
# config.general['category_name']

# change model name if training BERT for the first time
config.model['model_name'] = 'bert'

tokenizer = TokenizerWrapperBERT.load("", config.model['model_variant'])

save_dir_path= "test_meta_" + meta_cat_task # Where to save the meta_model and results. 

# Initialise and train meta_model
mc = MetaCAT(tokenizer=tokenizer, embeddings=None, config=config)
results = mc.train_from_json(mctrainer_export, save_dir_path=save_dir_path)

# Save results
json.dump(results['report'], open(os.path.join(save_dir_path,'meta_' + meta_cat_task+'_results.json'), 'w'))