## Imports

In [1]:
from dotenv import load_dotenv
import json
import os
import pandas as pd
import pprint
import requests as req
import urllib.request

import chromadb

from llama_index.core import Document, Settings, StorageContext, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.response.notebook_utils import display_response
from llama_index.core.schema import MetadataMode
from llama_index.embeddings.nomic import NomicEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.chroma import ChromaVectorStore

from utils_15B import extract_from_json

In [2]:
%reload_ext watermark
%watermark -v -p llama_index.core

# Python implementation: CPython
# Python version       : 3.11.7
# IPython version      : 8.20.0

# llama_index.core: 0.10.12

Python implementation: CPython
Python version       : 3.11.7
IPython version      : 8.20.0

llama_index.core: 0.10.12



In [3]:
! pip list | grep ^l

# llama_cpp_python                         0.2.53
# llama-index-core                         0.10.12
# llama-index-embeddings-openai            0.1.6
# llama-index-llms-llama-cpp               0.1.3
# llama-index-llms-openai                  0.1.6
# llama-index-vector-stores-chroma         0.0.1
# llamaindex-py-client                     0.1.13

llama_cpp_python                         0.2.53
llama-index-core                         0.10.12
llama-index-embeddings-huggingface       0.1.4
llama-index-embeddings-nomic             0.1.6
llama-index-embeddings-openai            0.1.6
llama-index-llms-llama-cpp               0.1.3
llama-index-llms-openai                  0.1.6
llama-index-vector-stores-chroma         0.0.1
llamaindex-py-client                     0.1.13
loguru                                   0.7.2


## Verify API tokens are available

In [4]:
load_dotenv()  # This loads the variables from .envz
nomic_api_key = os.getenv("NOMIC_API_KEY")
# print(nomic_api_key)

## (Optional) Remove previous JSON files and Chroma DB before starting
<span style="color: darkred; font-size: 18px;">using macOS/Linux %%bash

In [5]:
%%bash
find ./ -type f -name "*.json" -delete

In [6]:
%%bash
rm -rf chroma_db

## Fetch data corresponding to Pfizer PLS
source:  https://www.pfizer.com/science/clinical-trials/plain-language-study-results-summaries/

In [7]:
# Nine trials to consider (has Pfizer PLS, phase 3, completed with results, 2 arms):

# "NCT01720524" A Study To Evaluate Safety And Efficacy Of IV Sildenafil In The Treatment Of Neonates With Persistent Pulmonary Hypertension Of The Newborn
# "NCT01942135" Palbociclib (PD-0332991) Combined With Fulvestrant In Hormone Receptor+ HER2-Negative Metastatic Breast Cancer After Endocrine Failure (PALOMA-3)
# "NCT01945775" A Study Evaluating Talazoparib (BMN 673), a PARP Inhibitor, in Advanced and/​or Metastatic Breast Cancer Patients With BRCA Mutation (EMBRACA Study) (EMBRACA)
# "NCT01964716" 13vPnC Multidose Vial Safety, Tolerability and Immunogenicity Study in Healthy Infants.
# "NCT02130557" A Multicenter Phase 3, Open-Label Study of Bosutinib Versus Imatinib in Adult Patients With Newly Diagnosed Chronic Phase Chronic Myelogenous Leukemia
# "NCT02187744" A Study Of PF-05280014 Or Trastuzumab Plus Taxotere® And Carboplatin In HER2 Positive Breast Cancer In The Neoadjuvant Setting (REFLECTIONS B327-04)
# "NCT02367456" A Combination Study of PF-04449913 (Glasdegib) and Azacitidine In Untreated MDS, AML and CMML Patients (BRIGHT 1012)
# "NCT02603432" A Study Of Avelumab In Patients With Locally Advanced Or Metastatic Urothelial Cancer (JAVELIN Bladder 100)
# "NCT03090191" Clostridium Difficile Vaccine Efficacy Trial (Clover)

In [8]:
def get_trial(nct_id):
    trial = req.get(f"https://clinicaltrials.gov/api/v2/studies/{nct_id}")
    trial_json = trial.json()
    return trial_json

In [9]:
list_of_nct_id = [
    "NCT01720524",
    "NCT01942135",
    "NCT01945775",
    "NCT01964716",
    "NCT02130557",
    "NCT02187744",
    "NCT02367456",
    "NCT02603432",
    "NCT03090191",
]

def get_downloaded_json(list_of_nct_id):
    downloaded_json = []
    for nct_id in list_of_nct_id:
        trial = get_trial(nct_id)
        downloaded_json.append(trial)
        # save locally for reference
        with open(f"{nct_id}.json", "w") as f:
            json.dump(trial, f, indent=4)
    return downloaded_json

downloaded_json = get_downloaded_json(list_of_nct_id)
# downloaded_json[3]

## For each trial, extract a  subset of the data, save to a list
extract_from_json() function found in imported utils

In [10]:
def list_from_extracted_json(downloaded_json):    
    documents_list  = []
    for json_file in downloaded_json:
        extracted_json = extract_from_json(json_file)
        nct_id = json_file['protocolSection']['identificationModule']['nctId']
        # save manipulated JSON file to disk for review
        save_path = f"{nct_id}_extracted.json"
        with open(save_path, "w") as f:
            json.dump(extracted_json, f, indent=4)
        # prepare for indexing
        documents_list.append(extracted_json)
    return documents_list
    
documents_list = list_from_extracted_json(downloaded_json)
# len(documents_list)

### Metadata fields

In [13]:
# all the keys (for metadata)
all_keys = list(documents_list[0].keys())
# all_keys

In [16]:
# to adjust the metadata keys used
llm_keys_to_incude = [
    "Brief title",
    "National Clinical Identification NCT ID",
    "Lead sponsor",
    "Arms group 0 intervention names",
    "Enrollment count",
]

llm_keys_to_exclude = [key for key in all_keys if key not in llm_keys_to_incude]

# for simplicity, do the same for embedding_keys_to_exclude (in this example)
embedding_keys_to_exclude = llm_keys_to_exclude

In [17]:
# llm_keys_to_exclude 
# embedding_keys_to_exclude

['Organization study identification',
 'EudraCT number',
 'Organization',
 'Organization class',
 'Official title',
 'Overall status',
 'Start date',
 'Primary completion date',
 'Completion date',
 'Verification date',
 'Study first submitted date',
 'Results first submitted date',
 'Last update submitted date',
 'Last update posted date',
 'Lead sponsor class',
 'Brief summary',
 'Detailed description',
 'Condition',
 'Conditions keywords',
 'Study type',
 'Phases',
 'Allocation',
 'Intervention model',
 'Primary purpose',
 'Masking',
 'Who is masked',
 'Enrollment type',
 'Arms group 0 label',
 'Arms group 0 type',
 'Arms group 1 label',
 'Arms group 1 type',
 'Arms group 1 intervention names',
 'Arms group 0 intervention type',
 'Arms group 0 intervention name',
 'Arms group 0 intervention labels',
 'Arms group 1 intervention type',
 'Arms group 1 intervention name',
 'Arms group 1 intervention labels',
 'Primary outcome',
 'Primary outcome time frame',
 'Secondary outcome 0 measur

## Llama index

### embedding type

In [18]:
# embed_model = OpenAIEmbedding(model="text-embedding-3-small", dimensions=256)

# embed_model = NomicEmbedding(
#     api_key=nomic_api_key,
#     dimensionality=128,
#     model_name="nomic-embed-text-v1.5",
# )
# embedding = embed_model.get_text_embedding("Nomic Embeddings")

# using this as baseline standard
embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

### model type

In [19]:
# using this as baseline standard
# llm = OpenAI()
llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo", max_tokens=512)

### global settings

In [20]:
Settings.llm = llm
Settings.embed_model = embed_model

### add metadata

In [21]:
# NOTE:  metata data must be one of (str, int, float, None)
# use json.dumps() to convert lists and dictionaries into strings

def create_llama_docs(documents_list):
    llama_documents = []

    for trial in documents_list:
        trial["Brief title"] = json.dumps(trial["Brief title"])
        trial["Official title"] = json.dumps(trial["Official title"])
        trial["Brief summary"] = json.dumps(trial["Brief summary"])
        trial["Detailed description"] = json.dumps(trial["Detailed description"])
        trial["Arms group 0 intervention name"] = json.dumps(trial["Arms group 0 intervention name"])
        trial["Arms group 1 intervention name"] = json.dumps(trial["Arms group 1 intervention name"])
        trial["Eligibility minimum age"] = json.dumps(trial["Eligibility minimum age"])
        trial["Organization"] = json.dumps(trial["Organization"])

        # create a Llama Document object 
        # with text and excluded meta data for llm and embedding model
        llama_document = Document(
            text=trial["Detailed description"],
#             text=json.dumps(trial), #<== testing
            metadata=trial,
            excluded_llm_metadata_keys=llm_keys_to_exclude,
            excluded_embed_metadata_keys=embedding_keys_to_exclude ,
            metadata_template="{key}=>{value}",
            text_template="Metadata:\n{metadata_str}\n===========================\nContent: \n{content}"
        )
        llama_documents.append(llama_document)
    
    return llama_documents

llama_documents = create_llama_docs(documents_list)

In [22]:
# Example —LLM sees this:
print(llama_documents[4].get_content(metadata_mode=MetadataMode.LLM))

Metadata:
National Clinical Identification NCT ID=>NCT02130557
Brief title=>"A Multicenter Phase 3, Open-Label Study of Bosutinib Versus Imatinib in Adult Patients With Newly Diagnosed Chronic Phase Chronic Myelogenous Leukemia"
Lead sponsor=>Pfizer
Enrollment count=>536
Arms group 0 intervention names=>['Drug: Bosutinib']
Primary outcome group identifications=>['OG000', 'OG001']
Primary outcome group description=>A total sample size of 500 Ph+ participants is required for the study to provide \>= 90% power to detect at least 15% difference (assuming 25% in the imatinib vs 40% in the bosutinib arm) in the MMR rates at 12 months (48 weeks) with a 1-sided alpha of 2.5%, and 2 interim futility analyses at 33% and 66% of patients with adequate follow-up with early stopping for futility only (non-binding, O'Brien-Fleming analog beta spending function).
Primary outcome group confidence interval percentage value=>95
Primary outcome group confidence interval lower limit=>1.072
Primary outcome 

In [23]:
# Example — Embedding model sees this:
print(llama_documents[4].get_content(metadata_mode=MetadataMode.EMBED))

Metadata:
National Clinical Identification NCT ID=>NCT02130557
Brief title=>"A Multicenter Phase 3, Open-Label Study of Bosutinib Versus Imatinib in Adult Patients With Newly Diagnosed Chronic Phase Chronic Myelogenous Leukemia"
Lead sponsor=>Pfizer
Enrollment count=>536
Arms group 0 intervention names=>['Drug: Bosutinib']
Primary outcome group identifications=>['OG000', 'OG001']
Primary outcome group description=>A total sample size of 500 Ph+ participants is required for the study to provide \>= 90% power to detect at least 15% difference (assuming 25% in the imatinib vs 40% in the bosutinib arm) in the MMR rates at 12 months (48 weeks) with a 1-sided alpha of 2.5%, and 2 interim futility analyses at 33% and 66% of patients with adequate follow-up with early stopping for futility only (non-binding, O'Brien-Fleming analog beta spending function).
Primary outcome group confidence interval percentage value=>95
Primary outcome group confidence interval lower limit=>1.072
Primary outcome 

## Embedding

In [24]:
def create_nodes(llama_documents):
#     parser = SentenceSplitter(chunk_size=2560,chunk_overlap=32) # <== adjust
    parser = SentenceSplitter(chunk_size=512,chunk_overlap=20) # <== adjust
    nodes = parser.get_nodes_from_documents(llama_documents)

    for node in nodes:
        node_embedding = embed_model.get_text_embedding(
            node.get_content(metadata_mode=MetadataMode.EMBED)
        )
        node.embedding = node_embedding

    return nodes

nodes = create_nodes(llama_documents)

## Chroma

In [25]:
# Chroma DB collection name
COLLECTION_NAME = "CLINICAL_RAG"

db = chromadb.PersistentClient(path="chroma_db")
print(f"Looking for the {COLLECTION_NAME} collection in the database..." )
if COLLECTION_NAME not in [col.name for col in db.list_collections()]:
    print(f"{COLLECTION_NAME} collection WAS NOT FOUND in Chroma DB, creating...")
    chroma_collection = db.create_collection(COLLECTION_NAME)
    print("Creating vector store...")
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    print("Creating vector store index")
    VectorStoreIndex(
        nodes=nodes,
        storage_context=storage_context,
        store_nodes_override=True
    )
    record_count = chroma_collection.count()
    print(f"record count: {record_count}")   
    
else:
    print(f"{COLLECTION_NAME} collection WAS FOUND in Chroma DB")
    COLLECTION_NAME = db.get_collection(COLLECTION_NAME)
    vector_store = ChromaVectorStore(chroma_collection=COLLECTION_NAME)
    print("Restoring vector store index from the collection...")
    index = VectorStoreIndex.from_vector_store(
        vector_store=vector_store,
        embed_model=embed_model,
        store_nodes_override=True
    )
    record_count = COLLECTION_NAME.count()
    print(f"record count: {record_count}")

Looking for the CLINICAL_RAG collection in the database...
CLINICAL_RAG collection WAS NOT FOUND in Chroma DB, creating...
Creating vector store...
Creating vector store index
record count: 9


###  retrieve from Chroma DB and Query

In [26]:
index = VectorStoreIndex.from_vector_store(vector_store)  

#### Q1

In [27]:
query_engine = index.as_query_engine(similarity_top_k=record_count) # <== set k=total number of records

query = "List the different NCT ID in ascending order. How many is that?"
response = query_engine.query(query)
display_response(response)


**`Final Response:`** NCT01720524, NCT01942135, NCT01945775, NCT01964716, NCT02130557, NCT02187744, NCT02367456, NCT02603432, NCT03090191. There are 9 different NCT IDs in total.

In [None]:
# True answer
list_of_nct_id = [
    "NCT01720524",
    "NCT01942135",
    "NCT01945775",
    "NCT01964716",
    "NCT02130557",
    "NCT02187744",
    "NCT02367456",
    "NCT02603432",
    "NCT03090191",
]
# response: High Precision - all correct, High Recall - all retrieved

#### Q2

In [28]:
query_engine = index.as_query_engine(similarity_top_k=3) 
query = "How many NCT ID related to Leukemia? Which are they?"
response = query_engine.query(query)
display_response(response)


**`Final Response:`** There are two NCT IDs related to Leukemia. They are NCT02130557 and NCT02367456.

In [29]:
# True answer
# "NCT02130557",  "NCT02367456",
# response: correct

### Focus on one study

#### Q3

In [30]:
query_engine = index.as_query_engine(similarity_top_k=3)

query = "Who was the lead sponsor of NCT02130557?"
response = query_engine.query(query)
display_response(response)

**`Final Response:`** The lead sponsor of NCT02130557 was Pfizer.

In [31]:
# Correct, but too easy, right?

#### Q4

In [32]:
query_engine = index.as_query_engine(similarity_top_k=6) # <==note k

query = "What intervention was studied in NCT02130557?"
response = query_engine.query(query)
display_response(response)

**`Final Response:`** The intervention studied in NCT02130557 was Drug: Bosutinib.

In [33]:
# True answer: Bosutinib
# response:  correct
# in a previous iteration, wrong answers with these embeddings:</span><br>
# ("text-embedding-3-small", dimensions=256)<br>
# ("Nomic Embeddings")

#### Q5

In [34]:
query_engine = index.as_query_engine(similarity_top_k=7) # <==note k

query = "What was the enrollment count for NCT02130557?"
response = query_engine.query(query)
display_response(response)

**`Final Response:`** 536

In [35]:
# True answer: 536
# response:  correct

###  Q6 - [WIP] not checked, not developed, but fun to do

In [36]:
twenty_questions = [
    "What type of cancer does CML stand for?",
    "Where in the body does CML cancer start?",
    "What protein causes CML cancer?",
    "What does the drug bosutinib target or block?",
    "What two drugs were compared in this clinical trial?",
    "What was the purpose of conducting this clinical trial?",
    "What is a major molecular response (MMR)?",
    "How long was the initial treatment period in this trial?",
    "How many countries participated in this clinical trial?",
    "When did this clinical trial start and end?",
    "What were the inclusion criteria to participate in this trial?",
    "How many total participants started the study?",
    "What percentage of participants achieved MMR at 48 weeks on bosutinib vs imatinib?",
    "What percentage of participants had at least 1 medical problem during the trial?",
    "What were the most common medical problems reported in each treatment group?",
    "How many participants experienced serious medical problems?",
    "How many deaths occurred during the trial and were any attributed to the drugs?",
    "Why did participants stop or discontinue treatment during the trial?",
    "How long could participants receive treatment for after the initial 5 year study period?",
    "Where can you learn more details about this clinical trial?",
]

counter = 0
query_engine = index.as_query_engine(similarity_top_k=3)
for question in twenty_questions:
    counter += 1
    print(f"{counter}: {question}")
    query = question
    response = query_engine.query(query)
    display_response(response)

1: What type of cancer does CML stand for?


**`Final Response:`** Chronic Myelogenous Leukemia

2: Where in the body does CML cancer start?


**`Final Response:`** CML cancer starts in the bone marrow.

3: What protein causes CML cancer?


**`Final Response:`** BCR-ABL oncogene

4: What does the drug bosutinib target or block?


**`Final Response:`** The drug bosutinib targets or blocks certain proteins known as tyrosine kinases.

5: What two drugs were compared in this clinical trial?


**`Final Response:`** Bosutinib and Imatinib were compared in this clinical trial.

6: What was the purpose of conducting this clinical trial?


**`Final Response:`** The purpose of conducting this clinical trial was to evaluate the safety and efficacy of IV Sildenafil in the treatment of neonates with persistent pulmonary hypertension of the newborn.

7: What is a major molecular response (MMR)?


**`Final Response:`** A major molecular response (MMR) is typically defined as a significant reduction in the amount of BCR-ABL transcripts in the blood of patients with chronic myelogenous leukemia (CML). It is an important indicator of treatment response and is often measured by the percentage decrease in BCR-ABL transcripts from baseline levels.

8: How long was the initial treatment period in this trial?


**`Final Response:`** Patients in this trial were planned to be treated and/or followed for approximately 5 years (240 weeks) after randomization until the study closed.

9: How many countries participated in this clinical trial?


**`Final Response:`** The clinical trial mentioned in the context does not provide information about the number of countries that participated.

10: When did this clinical trial start and end?


**`Final Response:`** The clinical trial started when the planned number of approximately 500 Philadelphia Chromosome Positive (Ph+) patients were randomized and is expected to end after approximately 5 years (240 weeks) after randomization until the study has closed.

11: What were the inclusion criteria to participate in this trial?


**`Final Response:`** The inclusion criteria to participate in this trial were being an adult patient with newly diagnosed chronic phase chronic myelogenous leukemia.

12: How many total participants started the study?


**`Final Response:`** Approximately 536 participants started the study.

13: What percentage of participants achieved MMR at 48 weeks on bosutinib vs imatinib?


**`Final Response:`** The percentage of participants who achieved MMR at 48 weeks on bosutinib was estimated to be between 1.072% and 2.233%, while on imatinib, it was assumed to be 25%.

14: What percentage of participants had at least 1 medical problem during the trial?


**`Final Response:`** The percentage of participants who had at least 1 medical problem during the trial is not explicitly provided in the context information.

15: What were the most common medical problems reported in each treatment group?


**`Final Response:`** The most common medical problems reported in the Clostridium difficile vaccine group were related to gastrointestinal issues, while in the Bosutinib group, the most common medical problems reported were associated with hematologic abnormalities.

16: How many participants experienced serious medical problems?


**`Final Response:`** None of the provided context information mentions any participants experiencing serious medical problems.

17: How many deaths occurred during the trial and were any attributed to the drugs?


**`Final Response:`** There is no information provided in the context regarding the number of deaths that occurred during the trial or whether any deaths were attributed to the drugs being studied.

18: Why did participants stop or discontinue treatment during the trial?


**`Final Response:`** Participants stopped or discontinued treatment during the trial due to reasons such as disease progression or intolerance to the study medication.

19: How long could participants receive treatment for after the initial 5 year study period?


**`Final Response:`** Participants could receive treatment for up to approximately 5 years (240 weeks) after the initial 5-year study period.

20: Where can you learn more details about this clinical trial?


**`Final Response:`** You can learn more details about this clinical trial by visiting the official website of the National Institutes of Health (NIH) and searching for the National Clinical Trial (NCT) ID provided in the metadata section.

## Enhanced Prompts

In [37]:
PLS_prompt = "Using everyday language to make the clinical results of a study meaningful and understandable to a lay person, rephrase this: "
expert_prompt = "Emulate a PhD scientist and expert statistician to elaborate on the following: "

In [38]:
def get_response(query, prompt_1, prompt_2):

    query_engine = index.as_query_engine(similarity_top_k=7) # <==note k

    response = query_engine.query(query)
    print(f"Original response:\n{response}")
 
    query_2 = prompt_1 + response.response
    response_2 = query_engine.query(query_2)
    print(f"\nPlain Language Summary:\n{response_2}")
    
    query_2 = prompt_2 + response.response
    response_2 = query_engine.query(query_2)
    print(f"\nExpert Analysis:\n{response_2}")

In [39]:
query = "What was the purpose of study NCT02130557?"
get_response(query, PLS_prompt, expert_prompt)

Original response:
The purpose of study NCT02130557 was to compare the efficacy of Bosutinib versus Imatinib in adult patients with newly diagnosed Chronic Phase Chronic Myelogenous Leukemia.

Plain Language Summary:
The study NCT02130557 aimed to see if Bosutinib works better than Imatinib in adults who were recently diagnosed with Chronic Phase Chronic Myelogenous Leukemia.

Expert Analysis:
The purpose of study NCT02130557 was to investigate and compare the effectiveness of Bosutinib versus Imatinib in adult patients who were newly diagnosed with Chronic Phase Chronic Myelogenous Leukemia. The study aimed to enroll approximately 500 Philadelphia Chromosome Positive (Ph+) patients, with around 250 patients in each treatment arm. The primary outcome of the study was to detect a significant difference in the Major Molecular Response (MMR) rates at 12 months between the two treatment arms. The study design included interim futility analyses to assess the efficacy of the treatments and a

## add record(s)
no error thrown if records are dupes

In [40]:
print(f"record_count as is: {record_count}")

record_count as is: 9


In [41]:
# new unrelateds trial
list_of_nct_id = [
    "NCT00094887", # Anemia, Sickle Cell
    "NCT00108953", # Carcinoma, Hepatocellular
]

downloaded_json = get_downloaded_json(list_of_nct_id)
documents_list = list_from_extracted_json(downloaded_json)

In [42]:
# using previous functions 
llama_documents = create_llama_docs(documents_list)
nodes = create_nodes(llama_documents)
index.insert_nodes(nodes)

In [44]:
try:
    print(f"new DB record count: {chroma_collection.count()}")
    record_count = chroma_collection.count()
    print(record_count)
except:
    print(f"established DB record count: {COLLECTION_NAME.count()}")
    record_count = COLLECTION_NAME.count()
    print(record_count)

new DB record count: 11
11


In [45]:
query_engine = index.as_query_engine(similarity_top_k=record_count) # <== set k=total number of records

query = "List the different NCT ID in ascending order. How many is that?"
response = query_engine.query(query)
display_response(response)


**`Final Response:`** NCT00094887, NCT00108953, NCT01720524, NCT01942135, NCT01945775, NCT01964716, NCT02130557, NCT02187744, NCT02367456, NCT02603432, NCT03090191. There are 11 different NCT IDs in total.

## Using CallbackManager & LlamaDebugHandler

In [46]:
from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler

# initiaalize debug handler and save to Settings
llm_debug = LlamaDebugHandler()
callback_manager = CallbackManager([llm_debug])
Settings.callback_manager = callback_manager

In [47]:
# helper function
def print_in_out(in_out):
	print(in_out[0][0].payload['messages'][0])
	print("\n\n")
	print(in_out[0][0].payload['messages'][1])

# whenever query_engine.query() is called
query_engine = index.as_query_engine()
query = "How many NCT ID related to Leukemia? Which are they?"
response = query_engine.query(query)
print(response)

# print LLM debug info 
in_out = llm_debug.get_llm_inputs_outputs()
print_in_out(in_out)

**********
Trace: query
    |_CBEventType.QUERY ->  2.085385 seconds
      |_CBEventType.SYNTHESIZE ->  1.202301 seconds
        |_CBEventType.TEMPLATING ->  4.1e-05 seconds
        |_CBEventType.LLM ->  1.199059 seconds
**********
There are two NCT IDs related to Leukemia. They are NCT02130557 and NCT02367456.
system: You are an expert Q&A system that is trusted around the world.
Always answer the query using the provided context information, and not prior knowledge.
Some rules to follow:
1. Never directly reference the given context in your answer.
2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines.



user: Context information is below.
---------------------
Metadata:
National Clinical Identification NCT ID=>NCT02130557
Brief title=>"A Multicenter Phase 3, Open-Label Study of Bosutinib Versus Imatinib in Adult Patients With Newly Diagnosed Chronic Phase Chronic Myelogenous Leukemia"
Lead sponsor=>Pfizer
Enrollment coun