In [None]:
import os
import sys
sys.path.insert(0, "/data/zeljko/projects/medgpt/")
sys.path.insert(0, "/data/zeljko/projects/MedCAT/")

#os.environ['CUDA_VISIBLE_DEVICES'] = "0"
os.environ['HF_DATASETS_CACHE'] = "/data/zeljko/.cache/huggingface"
os.environ['TRANSFORMERS_CACHE'] = "/data/zeljko/.cache/huggingface"

%load_ext autoreload
%autoreload 2

In [None]:
from transformers import GPT2Config, Trainer, TrainingArguments, AutoTokenizer, pipeline, GPT2Tokenizer, LlamaForCausalLM, AutoModelForCausalLM, DataCollatorWithPadding
from medgpt.tokenizers.simple_map_tokenizer import SimpleMapTokenizer
from medgpt.models.utils import add_cuis_to_model_and_tokenizer
from medgpt.tokenizers.utils import pack_text, create_labels, pack_examples
import re
import pickle
from medcat.cat import CAT
import pandas as pd
import datasets
import random
import math
from medgpt.config import Config
from transformers import BitsAndBytesConfig
import torch
from medgpt.metrics.next_concept_prediction import ComputePrecisionHF
from medgpt.datasets.data_collator import CollataAndPad
import collections
import json

In [None]:
config = Config(yaml_path='/home/ubuntu/projects/medgpt/configs/mimic-mistral.yaml', 
                extra_yaml_paths=['/home/ubuntu/projects/medgpt/configs/mimic-seq-len-4096.yaml'])

In [None]:
cat = CAT.load_model_pack(config.path.cat, meta_cat_config_dict={'general': {'device': config.cat.meta.device}})
cdb = cat.cdb

In [None]:
# Load the just saved models
tokenizer = AutoTokenizer.from_pretrained(config.path.tokenizer.self)

In [None]:
dataset = datasets.load_from_disk(config.path.dataset.prepared_risk_dataset)
dataset

# Automatic risk validation | fuzzy

In [None]:
from openai import AzureOpenAI
client = AzureOpenAI(
  api_key = "##",  
  api_version = "2024-02-15-preview",
  azure_endpoint = "##"
)

In [None]:
sc = '''
You are now playing the role of a medical doctor taking an exam,
your goal is to be as accurate as possible and make sure you do not make any mistakes. If you
are unsure about something, think step by step and then answer. You have the follow the instructions
precisely.'''
sc2 = '''Your first task is to compare how many of the predicted disorders marked as `Predictions:` match the labels marked as `Labels:` in the input.
Something is a match if it is approximately the same disorder (based on the definition of the disorder). 
For example `Diabetes` and `T1DM` are a match, T1DM and T2DM are types of `Diabetes`, i.e. they are more specific. The reverse is also fine, T1DM is a match for Diabetes.
The output should be a json file formatted as follows: {'explanation': <your brief explanation>, 'number_of_matches': <number>}'''

validation_prompt = '''Labels: {labels}
Predictions: {predictions}'''

In [None]:
limit = 5
def ask_openai(prompt, sc, sc2, model='gpt-4-turbo'):
    response = client.chat.completions.create(
        model = model,
        messages = [
            {"role": "system", "content": sc},
            {"role": "system", "content": sc2},
            {"role": "user", "content": prompt},
        ],
        temperature=0.7,
        top_p=0.95,
        frequency_penalty=0,
        presence_penalty=0,
        stop=None
    )

    message = None
    if response.choices[0].finish_reason == 'stop':
        message = response.choices[0].message.content

    return message

In [None]:
limit = 5
def ask_openai_json(prompt, sc, sc2, model='gpt-4-turbo'):
    response = client.chat.completions.create(
        model = model,
        messages = [
            {"role": "system", "content": sc},
            {"role": "system", "content": sc2},
            {"role": "user", "content": prompt},
        ],
        response_format={ "type": "json_object" },
        temperature=0.7,
        top_p=0.95,
        frequency_penalty=0,
        presence_penalty=0,
        stop=None
    )

    message = None
    if response.choices[0].finish_reason == 'stop':
        message = response.choices[0].message.content

    return message

# Risk | Foresight test

In [None]:
from medgpt.sight import Sight
from medgpt.metrics.next_concept_prediction import ComputePrecisionHF

In [None]:
model = AutoModelForCausalLM.from_pretrained(config.path.trained_model_risk, device_map='auto')

In [None]:
dataset = datasets.load_from_disk(config.path.dataset.prepared_risk_dataset)
dataset

In [None]:
sight = Sight(tokenizer=tokenizer, model=model, device=model.device, cat=cat)
token_type2tokens = pickle.load(open(config.path.tokenizer.token_type2tokens, 'rb'))
id2tkn = {v:k for k,v in tokenizer.vocab.items()}

In [None]:
gen = pipeline(model=model, tokenizer=tokenizer, task='text-generation')#, device=model.device)

In [None]:
out_ds = [('index', 'past', 'labels', 'preds', 'correct', 'wrong', 'fuzzy_matches')]
limit =  5
cors = []
icors = []
all_diseases = []
n_pts = 800
at_least_one = 0

u_gen = False # This switches between using sight or generation

for ind, pt in enumerate(dataset['test']):
    # Find the end
    end = len(pt['input_ids']) - 1 # I know, not the nicest way to do things, it was late
    for i in range(len(pt['input_ids'])):
        if pt['input_ids'][end - i] == 29901:#28747: # The last token before risks
            end = end - i + 1
            break
    #print(len([x for x in tokenizer.convert_ids_to_tokens(pt['input_ids'][0:end]) if x in cat.cdb.cui2names]))
    labels = tokenizer.convert_ids_to_tokens(pt['input_ids'][end:-1])
    if not u_gen:
        preds = [x[0] for x in sight.next_concepts(input_ids=pt['input_ids'][0:end], type_ids=['T-11'], n=limit, token_type2tokens=token_type2tokens, tkn2id=tokenizer.vocab, id2token=id2tkn)]
    else:
        preds = tokenizer.convert_ids_to_tokens(model.generate(torch.tensor([pt['input_ids'][0:end]]).to(model.device), max_length=len(pt['input_ids'][0:end]) + limit).detach().to('cpu')[0][-limit:].tolist())

    if len(labels) >= limit:
        cor = []
        icor = []
        all_diseases.extend(labels)
        for p in preds:
            if p in labels:
                cor.append(p)
            else:
                icor.append(p)
        cors.append(cor)
        icors.append(icor)

        if len(cor) > 0:
            at_least_one += 1
    
        out_ds.append((ind, 
                       '\n'.join([cdb.get_name(tkn) for tkn in tokenizer.convert_ids_to_tokens(pt['input_ids'][0:end]) if tkn in cdb.cui2names]),
                       '\n'.join([cdb.get_name(x) for x in labels]), 
                       '\n'.join([cdb.get_name(x) for x in preds]),
                       '\n'.join([cdb.get_name(x) for x in cor]), 
                       '\n'.join([cdb.get_name(x) for x in icor]),
                       ''))
        print(len(out_ds)-1, len(cor), len(icor))
        if len(out_ds) > n_pts:
            break

In [None]:
df = pd.DataFrame(columns=out_ds[0], data=out_ds[1:])
df.to_csv("./metrics/fs2_risk_validation_{}.csv".format(config.id), index=False)

### Do the fuzzy matching

In [None]:
output = []
last_i = 0

In [None]:
for i in range(last_i, len(df)):
    try:
        o = json.loads(ask_openai_json(validation_prompt.format(labels=df['labels'][i], predictions=df['preds'][i]), model='gpt-4-turbo'))
        if 'number_of_matches' in o:
            output.append(o)
            print(i, o)
    except Exception as e:
        print(i, e)
    last_i = i

In [None]:
df2 = pd.DataFrame(output)
len(df2[df2.number_of_matches > 0]) / len(df2), len(df2[df2.number_of_matches > 1]) / len(df2), len(df2[df2.number_of_matches > 2]) / len(df2)

In [None]:
df2.to_csv("./metrics/fs2_risk_validation_via-gpt_{}.csv".format(config.id), index=False)

# Risk | GPT-4 via Azure

In [None]:
dataset = datasets.load_from_disk(config.path.dataset.prepared_risk_dataset)

In [None]:
sc = '''
You are now playing the role of a medical doctor taking an exam,
your goal is to be as accurate as possible and make sure you do not make any mistakes. If you
are unsure about something, think step by step and then answer. You have the follow the instructions
precisely.'''
sc2 = '''Your first question in this medical quiz will consist of a patient history, your goal is to predict {limit} specific disorders
the patient is at risk for in the next month. Please take care that the disorders you are predicting cannot be part of the patient's past. They
have to be new disorders that will most likely affect the patient in the next month. You have to predict specific disorders, for example: you should never say "pulmunary problems"
as this is not a specific disorder, but you can say "pneumonia" as that is a specific disorder.'''

prompt = '''{history}

Given the above patient history, what {limit} specific new disorders is this patient at risk for in the next month?'''

In [None]:
limit = 5
def ask_openai(prompt, sc, sc2, model='gpt-4-turbo'):
    response = client.chat.completions.create(
        model = model,
        messages = [
            {"role": "system", "content": sc},
            {"role": "system", "content": sc2},
            {"role": "user", "content": prompt},
        ],
        response_format={ "type": "json_object" },
        temperature=0.7,
        top_p=0.95,
        frequency_penalty=0,
        presence_penalty=0,
        stop=None
    )

    message = None
    if response.choices[0].finish_reason == 'stop':
        message = response.choices[0].message.content

    return message

In [None]:
out_ds = [('index', 'past', 'labels', 'preds', 'correct-top-5', 'wrong-top-5', 'correct-top-3', 'wrong-top-3')]
limit =  5
cors = []
icors = []
n_pts = 1000
past = []
last_i = 0

In [None]:
for i in range(last_i, len(dataset['test'])):
    _text = tokenizer.convert_tokens_to_string([' ' + cat.cdb.get_name(x) if x in cat.cdb.cui2names else x for x in tokenizer.convert_ids_to_tokens(dataset['test'][i]['input_ids'])])[4:-4]
    try:
        text, labels = [x.strip() for x in _text.split('In the next month the patient is at risk of:')]
        labels = [x.strip() for x in labels.split('(disorder)') if x]
        t = prompt.format(history=text, limit=limit)
    
        if len(labels) >= limit:
            response = ask_openai(t, sc, sc2, model='gpt-4-turbo')
            #response = json.loads(ask_openai(t, sc, sc2, model='gpt-4-turbo'))
            out_ds.append((i, 
                           text,
                           '\n'.join(labels), 
                           response,#'\n'.join([x[0] for x in response['predictions']]),
                           '', 
                           '',
                           '',
                           ''))
            print(i, '; '.join(labels), response)
            if len(out_ds) > n_pts:
                break
    except Exception as e:
        print(e)
        print("Skip: ", i)
    last_i = i

In [None]:
df = pd.DataFrame(columns=out_ds[0][:-1], data=out_ds[1:])
df.to_csv("./metrics/fs2_risk_predictions_gpt-4-turbo.csv", index=False)
df

In [None]:
sc = '''
You are now playing the role of a medical doctor taking an exam,
your goal is to be as accurate as possible and make sure you do not make any mistakes. If you
are unsure about something, think step by step and then answer. You have the follow the instructions
precisely.'''
sc2 = '''Your first task is to compare how many of the predicted disorders marked as `Predictions:` match the labels marked as `Labels:` in the input.
Something is a match if it is approximately the same disorder (based on the definition of the disorder). 
For example `Diabetes` and `T1DM` are a match, T1DM and T2DM are types of `Diabetes`, i.e. they are more specific. The revrse is also fine, T1DM is match for Diabetes.
The output should be a json file formatted as follows: {'explanation': <your brief explanation>, 'number_of_matches': <number>}'''

validation_prompt = '''Labels: {labels}
Predictions: {predictions}'''

In [None]:
output = []
for i in range(len(df)):
    msg = validation_prompt.format(labels=df['labels'][i], predictions=df['preds'][i])
    o = json.loads(ask_openai_json(msg, sc, sc2, model='gpt-4-turbo'))
    output.append(o)
    print(i, o)

In [None]:
df2 = pd.DataFrame(output)
len(df2[df2.number_of_matches > 0]) / len(df2), len(df2[df2.number_of_matches > 1]) / len(df2), len(df2[df2.number_of_matches > 2]) / len(df2)

In [None]:
df2.to_csv("./metrics/fs2_risk_validation_gpt-4-turbo.csv", index=False)

# Make data

In [None]:
data = []
ds = dataset['test']
for i in range(len(ds)):
    _text = tokenizer.convert_tokens_to_string([' ' + cat.cdb.get_name(x) if x in cat.cdb.cui2names else x for x in tokenizer.convert_ids_to_tokens(dataset['test'][i]['input_ids'])])[4:-4]
    parts = [x.strip() for x in _text.split('In the next month the patient is at risk of:')]
    if len(parts) == 2:
        labels = [x.strip() for x in parts[1].split('(disorder)') if x]
        if len(labels) >= 5:
            text = parts[0].strip()
            data.append((text, labels))

In [None]:
len(data), len(ds)

# Test BioMistral

In [None]:
from transformers import AutoModel, AutoTokenizer, MistralForCausalLM
import json

mistral_tokenizer = AutoTokenizer.from_pretrained("BioMistral/BioMistral-7B")
mistral_model = AutoModelForCausalLM.from_pretrained("BioMistral/BioMistral-7B", device_map='auto')

In [None]:
gen_mistral = pipeline(model=mistral_model, tokenizer=mistral_tokenizer, task='text-generation')#, device=model.device)

In [None]:
mistral_prompt = '''<s>Please truthfully answer the following question. Please ensure that
your choice is socially unbiased and positive. If you don’t know the answer to a question, 
please don’t share false information.

<patient_history>
{history}
</patient_history>

Given the above patient history, what {limit} specific new disorders is this patient at risk for in the next month? The answer is:'''

In [None]:
output = []

In [None]:
# Run this 10 times or more, get an average and use that - outputs are a bit unpredictable
limit = 5
max_seq_len = 2048

for ind in range(len(output) + 1, len(data)): # subset if needed to the 100 from the test set used by gpt-4
    t = mistral_prompt.format(history=data[ind][0], limit=limit)
    labels = data[ind][1]
    if len(mistral_tokenizer(t)['input_ids']) < max_seq_len - 128 and len(labels) >= limit:
        o = gen_mistral(t, max_length=max_seq_len, do_sample=True)
        text_predictions = o[0]['generated_text'].split("The answer is:")[1].strip() #.split(".")[0]   
        if len(text_predictions) > 10: # Just make sure there is something, otherwise skip
            text_labels = ", ".join(labels)
    
            _prompt = validation_prompt.format(labels=text_labels, predictions=text_predictions)
            try:
                o = json.loads(ask_openai_json(validation_prompt.format(labels=text_labels, predictions=text_predictions), model='gpt-4-1106-preview'))
                print(ind, o)
                if 'number_of_matches' in o:
                    o['prompt'] = _prompt # so we have everything saved
                    output.append(o)
            except Exception as e:
                print(e)

In [None]:
df = pd.DataFrame(output)
len(df[df.number_of_matches > 0]) / len(df), len(df[df.number_of_matches > 1]) / len(df), len(df[df.number_of_matches > 2]) / len(df)

In [None]:
df.to_csv("./metrics/fs2_risk_validation_biomistral.csv", index=False)

# Test MedAlpaca

In [None]:
from transformers import AutoModel, AutoTokenizer, MistralForCausalLM

gen_med_llama = pipeline("text-generation", model="medalpaca/medalpaca-7b", tokenizer="medalpaca/medalpaca-7b")
tokenizer_med_llama = AutoTokenizer.from_pretrained("medalpaca/medalpaca-7b", model_max_length=2048) # 2048 was the one set in the paper

In [None]:
medalpaca_prompt = '''Context: {history}

Question: Given the above patient history, what {limit} specific new disorders is this patient at risk for in the next month?

Answer: '''

In [None]:
output = []

In [None]:
limit = 5
max_seq_len = 2048
for ind in range(len(output) + 1, len(data)):
    t = medalpaca_prompt.format(history=data[ind][0], limit=limit)
    labels = data[ind][1]
    if len(tokenizer_med_llama(t)['input_ids']) < max_seq_len - 128 and len(labels) >= limit:
        o = gen_med_llama(t, max_length=max_seq_len, do_sample=True)
        text_predictions = o[0]['generated_text'].split("Answer:")[1].strip() #.split(".")[0]   
        if len(text_predictions) > 10: # Just make sure there is something, otherwise skip
            text_labels = ", ".join(labels)
    
            _prompt = validation_prompt.format(labels=text_labels, predictions=text_predictions)
            try:
                o = json.loads(ask_openai_json(validation_prompt.format(labels=text_labels, predictions=text_predictions), model='gpt-4-1106-preview'))
                print(ind, o)
                if 'number_of_matches' in o:
                    o['prompt'] = _prompt # so we have everything saved
                    output.append(o)
            except Exception as e:
                print(e)

In [None]:
df = pd.DataFrame(output)
df.to_csv("./metrics/fs2_risk_validation_medalpaca.csv", index=False)
df

In [None]:
len(df[df.number_of_matches > 0]) / len(df), len(df[df.number_of_matches > 1]) / len(df), len(df[df.number_of_matches > 2]) / len(df), len(df)

# Test MediTron

In [None]:
from transformers import AutoModel, AutoTokenizer, MistralForCausalLM

gen_meditron = pipeline("text-generation", model="epfl-llm/meditron-7b", tokenizer="epfl-llm/meditron-7b")
tokenizer_meditron = AutoTokenizer.from_pretrained("epfl-llm/meditron-7b")

In [None]:
system_prompt = '''You are a medical doctor answering real-world medical entrance exam questions. Based
on your understanding of basic and clinical science, medical knowledge, and mechanisms
underlying health, disease, patient care, and modes of therapy, answer the question below given the following context:

<patient history>
{history}
</patient history>'''

In [None]:
user_prompt = 'Given the above patient history, what 5 specific new disorders is this patient at risk for in the next month?'

In [None]:
meditron_prompt = '''<|im_start|>system
{system}<|im_end|>
<|im_start|>question
{prompt}<|im_end|>
<|im_start|>answer '''

In [None]:
output = []
last_i 

In [None]:
o[0]['generated_text'].split("<|im_start|>answer")[1]

In [None]:
limit = 5
max_seq_len = 2048
for ind in range(last_i + 1, len(data)):
    t = meditron_prompt.format(system=system_prompt.format(history=data[ind][0]), prompt=user_prompt)
    labels = data[ind][1]
    if len(tokenizer_meditron(t)['input_ids']) < max_seq_len - 128 and len(labels) >= limit:
        o = gen_meditron(t, max_length=max_seq_len, do_sample=True)
        text_predictions = o[0]['generated_text'].split("<|im_start|>answer")[1].strip() #.split(".")[0]   
        if len(text_predictions) > 10: # Just make sure there is something, otherwise skip
            text_labels = ", ".join(labels)
    
            _prompt = validation_prompt.format(labels=text_labels, predictions=text_predictions)
            try:
                o = json.loads(ask_openai_json(validation_prompt.format(labels=text_labels, predictions=text_predictions), model='gpt-4-1106-preview'))
                print(ind, o)
                if 'number_of_matches' in o:
                    o['prompt'] = _prompt # so we have everything saved
                    output.append(o)
            except Exception as e:
                print(e)
    last_i = ind

In [None]:
df = pd.DataFrame(output)
df.to_csv("./metrics/fs2_risk_validation_meditron.csv", index=False)
df

In [None]:
len(df[df.number_of_matches > 0]) / len(df), len(df[df.number_of_matches > 1]) / len(df), len(df[df.number_of_matches > 2]) / len(df), len(df)

# Test next concept prediction

In [None]:
from datasets import Dataset

In [None]:
config = Config(yaml_path='/home/ubuntu/projects/medgpt/configs/mimic-mistral.yaml', 
                extra_yaml_paths=['/home/ubuntu/projects/medgpt/configs/mimic-seq-len-4096.yaml'])

In [None]:
model = AutoModelForCausalLM.from_pretrained(config.path.trained_model, use_flash_attention_2=False, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(config.path.tokenizer.self)

In [None]:
dataset = datasets.load_from_disk(config.path.dataset.prepared_dataset_split)

In [None]:
dataset = dataset.remove_columns(['patient_id'])

In [None]:
test_set_to_use = dataset['test']
dataset

In [None]:
# Add labels, if not added loss makes no sense but metrics are still fine
cuis = pickle.load(open(config.path.dataset.cuis_in_text, 'rb'))
cui_ids = set(tokenizer.convert_tokens_to_ids([c for c in cuis]))
test_set_to_use = test_set_to_use.map(
    lambda examples: create_labels(examples, config, cui_ids),
    batched=True,
    batch_size=1000,
    num_proc=16,
)

In [None]:
def get_metrics(metrics_data=None, test_set_to_use=None, trainer=None, m_file=None, f_name=None):
    size = 20
    for i in range(int(math.ceil(len(test_set_to_use) / size))):
        _dataset = Dataset.from_dict(test_set_to_use[i*size:(i+1)*size])
        compute_metrics.time_data = _dataset['time']
        compute_metrics.type_data = _dataset['token_type']
        if len(_dataset):
            p = trainer.predict(_dataset)
            metrics_data = compute_metrics(p, metrics_data)['metrics_data']
    m_file.write("{}, {}, {}, {}, {}, {}, {}\n".format(f_name, metrics_data['precision']['all'], 
                                 metrics_data['precision']['new'], 
                                 metrics_data['precision']['old'],
                                 metrics_data['recall']['all'],
                                 metrics_data['recall']['new'],
                                 metrics_data['recall']['old']))
    print(f_name,
          metrics_data['precision']['all'], 
          metrics_data['precision']['new'], 
          metrics_data['precision']['old'],
          metrics_data['recall']['all'],
          metrics_data['recall']['new'],
          metrics_data['recall']['old'])
    with open(f_name, 'wb') as f:
        pickle.dump(metrics_data, f)

    return metrics_data

In [None]:
token_type2tokens = pickle.load(open(config.path.tokenizer.token_type2tokens, 'rb'))
id2tkn = {v:k for k,v in tokenizer.vocab.items()}

In [None]:
all_types = set(token_type2tokens.keys())
all_types

In [None]:
targs = config.train.hf_training_arguments.to_dict()
# Set the dynamic dir for output
targs['output_dir'] = config.path.dataset.hf_output_folder
training_args = TrainingArguments(**targs)
dc = CollataAndPad(max_seq_len=config.train.max_timeline_len, pad_id=tokenizer.pad_token_id)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=None,
    compute_metrics=None,
    data_collator=dc,
)

In [None]:
m_file = open("./metrics/summary-mistral.txt", 'a', buffering=1)
#m_file.write("file_name, precision all, precision new, precision old, recall all, recall new, recall old\n")

for types in [{'T-55'}, {'T-18'}, {'T-39'}]: #all_types, {'T-11'}, 
    _types = list(types)[0] if len(types) == 1 else 'all_types'
    for timerange in [30, 365, 1000000]:
        compute_metrics = ComputePrecisionHF(id2tkn, 
                                         prediction_scope='time_range', 
                                         topk=1, # 1, 5, 10
                                         start=0, # 0, 10, 20, 50, 100
                                         return_all_metrics=True, 
                                         batch_size=1000, 
                                         select_token_types=types,
                                         type_data=test_set_to_use['token_type'],
                                         token_type2tokens=token_type2tokens,
                                         time_data=test_set_to_use['time'], 
                                         time_range=timerange*24*60*60, #30, 365, 1000000
                                         ignore_label_status=False,
                                         min_time_left=24*60*60)
        f_name = f"./metrics/mistral-start-0_topk-1_time_range-{timerange}_types-{_types}.pickle"
        get_metrics(None, test_set_to_use, trainer, m_file, f_name)

    for topk in [5, 10]:
        compute_metrics = ComputePrecisionHF(id2tkn, 
                                         prediction_scope='time_range', 
                                         topk=topk, # 1, 5, 10
                                         start=0, # 0, 10, 20, 50, 100
                                         return_all_metrics=True, 
                                         batch_size=1000, 
                                         select_token_types=types,
                                         type_data=test_set_to_use['token_type'],
                                         token_type2tokens=token_type2tokens,
                                         time_data=test_set_to_use['time'], 
                                         time_range=30*24*60*60, #30, 365, 1000000
                                         ignore_label_status=False,
                                         min_time_left=24*60*60)
        f_name = f"./metrics/mistral-start-0_topk-{topk}_time_range-30_types-{_types}.pickle"
        get_metrics(None, test_set_to_use, trainer, m_file, f_name)
m_file.close()

In [None]:
df = pd.read_csv("./metrics/summary-mistral.txt")

# Test prompts

In [None]:
from medgpt.sight import Sight

In [None]:
sight = Sight(tokenizer=tokenizer, model=model, device=model.device, cat=cat)
token_type2tokens = pickle.load(open(config.path.tokenizer.token_type2tokens, 'rb'))
id2tkn = {v:k for k,v in tokenizer.vocab.items()}

In [None]:
t = ''''''

In [None]:
for x in sight.next_concepts(t, type_ids=None, n=50, token_type2tokens=token_type2tokens, tkn2id=tokenizer.vocab, id2token=id2tkn):
    print(x[1], cat.cdb.get_name(x[0].strip()), x[0])

# Get the metrics into the right format

In [None]:
df = pd.read_csv("./metrics/summary-mistral.txt")

In [None]:
pattern = r"topk-(?P<topk>\d+)_time_range-(?P<time_range>\d+)_types-(?P<types>.+?)\.pickle"

In [None]:
at = []
t = []
t_days = []
for val in df.file_name:
    params = re.search(pattern, val).groupdict()
    at.append(params['topk'])
    if params['types'] == 'all_types':
        t.append('All')
    elif params['types'] == 'T-11':
        t.append('Disorders')
    elif params['types'] == 'T-18':
        t.append('Findings')
    elif params['types'] == 'T-55':
        t.append('Substances')
    elif params['types'] == 'T-39':
        t.append('Procedures')
    if params['time_range'] == '1000000':
        t_days.append('inf')
    else:
        t_days.append(params['time_range'])
df['@'] = at
df['Type'] = t
df['T - days'] = t_days

In [None]:
_new = []
_recurring = []
for i, row in df.iterrows():
    _new.append("{:.2f}/{:.2f}".format(row[' precision new'], row[' recall new']))
    _recurring.append("{:.2f}/{:.2f}".format(row[' precision old'], row[' recall old']))
df['New P/R'] = _new
df['Recurring P/R'] = _recurring

In [None]:
print(df[['Type', 'T - days', '@', 'New P/R', 'Recurring P/R']].to_latex(index=False))

# Top and Bottom 10

In [None]:
m = pickle.load(open("./metrics/start-0_topk-1_time_range-30_types-T-11.pickle", 'rb'))

In [None]:
m

In [None]:
prec = {}
for cui in m['positives']['new'].keys():
    prec[cui] = (m['positives']['new'][cui] / (m['positives']['new'][cui] + m['negatives']['new'].get(cui, 0)), m['positives']['new'][cui], m['negatives']['new'].get(cui, 0))

In [None]:
sorted_data = sorted(prec.items(), key=lambda x: x[1][0], reverse=False)

In [None]:
[(cat.cdb.get_name(x[0]), x[0], x[1][0], x[1][1], x[1][2]) for x in sorted_data if x[1][1] > 10]