In [None]:
import json
import os
from datetime import date
from medcat.meta_cat import MetaCAT
from medcat.config_meta_cat import ConfigMetaCAT
from medcat.tokenizers.meta_cat_tokenizers import TokenizerWrapperBERT

In [2]:
# if you want to enable info level logging
import logging
logging.basicConfig(level=logging.INFO,force=True)

# Set parameters

In [3]:
# relative path to working_with_cogstack folder
_rel_path = os.path.join("..", "..", "..")
# absolute path to working_with_cogstack folder
base_path = os.path.abspath(_rel_path)
# Load mct export
ann_dir = os.path.join(base_path, "data", "medcattrainer_export")

mctrainer_export_path = ann_dir + ""  # name of your mct export

# Load model
model_dir = os.path.join(base_path, "models", "modelpack")
modelpack = '' # name of modelpack
model_pack_path = os.path.join(model_dir, modelpack)
     #output_modelpack = model_dir + f"{today}_trained_model"

# will be used to date the trained model
today = str(date.today())
today = today.replace("-","")

# Initialise meta_ann models
if model_pack_path[-4:] == '.zip':
    base_dir_meta_models = model_pack_path[:-4]
else:
    base_dir_meta_models = model_pack_path

# Iterate through the meta_models contained in the model
meta_model_names = [] # These Meta_annotation tasks should correspond to the ones labelled in the mcttrainer export
for dirpath, dirnames, filenames in os.walk(base_dir_meta_models):
    for dirname in dirnames:
        if dirname.startswith('meta_'):
            meta_model_names.append(dirname[5:])

In [None]:
for meta_model in meta_model_names:
    config_file = os.path.join(base_dir_meta_models,"meta_"+meta_model,"config.json")
    with open(config_file, 'r') as jfile:
        config_dict = json.load(jfile)
    print(f"Model used for meta_{meta_model}:",config_dict['model']['model_name'])

<b> NOTE: </b> 
 The name for the classification task can vary. <br> E.g: Task name for 'Experiencer' can be 'Subject'.
 <br>To accomodate for this, we have a list that stores the variations for the alternate names. This attribute can be found under `mc.config.general.alternative_category_names`
<br> E.g. for Experiencer, it will be pre-loaded as alternative_category_names = ['Experiencer','Subject']

<b> NOTE: </b> 
 The name for the classes can vary too. <br> E.g: For Presence task, the class name can be 'Not present (False)' or 'False'
 <br>To accomodate for this, we have a mapping that stores the variations for the alternate names. This attribute can be found under `mc.config.general.alternative_class_names`
<br> E.g. for Presence, it will be pre-loaded as alternative_class_names = [["Hypothetical (N/A)","Hypothetical"],["Not present (False)","False"],["Present (True)","True"]]

# For LSTM and BERT model

In [None]:
for meta_model in meta_model_names:
    
    # load the meta_model
    mc = MetaCAT.load(save_dir_path=os.path.join(base_dir_meta_models,"meta_"+meta_model))

    # changing parameters
    mc.config.train['nepochs'] = 15
    
    # current model will be overwritten
    save_dir_path = os.path.join(base_dir_meta_models,"meta_"+meta_model)
    # to save the new model elsewhere, uncomment the below line
    #save_dir_path= "test_meta_"+meta_model # Where to save the meta_model and results. 

    # train the meta_model
    results = mc.train_from_json(mctrainer_export_path, save_dir_path=save_dir_path)
    
    # Save results
    json.dump(results['report'], open(os.path.join(save_dir_path,'meta_'+meta_model+'_results.json'), 'w'))

## If you dont have the model packs, and are training from scratch
<b>This is very rare, it is recommended to always use the model packs and then fine-tune them</b>

In [None]:
config = ConfigMetaCAT()
# make sure to change the following parameters:
# config.model['nclasses']
# config.general['category_name']

# change model name if training BERT for the first time
config.model['model_name'] = 'bert'

tokenizer = TokenizerWrapperBERT.load("", config.model['model_variant'])

save_dir_path= "test_meta_"+meta_model # Where to save the meta_model and results. 

# Initialise and train meta_model
mc = MetaCAT(tokenizer=tokenizer, embeddings=None, config=config)
results = mc.train_from_json(mctrainer_export_path, save_dir_path=save_dir_path)

# Save results
json.dump(results['report'], open(os.path.join(save_dir_path,'meta_'+meta_model+'_results.json'), 'w'))