In [66]:
pip install ipywidgets

Collecting ipywidgets
  Downloading ipywidgets-8.1.2-py3-none-any.whl.metadata (2.4 kB)
Collecting widgetsnbextension~=4.0.10 (from ipywidgets)
  Downloading widgetsnbextension-4.0.10-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab-widgets~=3.0.10 (from ipywidgets)
  Downloading jupyterlab_widgets-3.0.10-py3-none-any.whl.metadata (4.1 kB)
Downloading ipywidgets-8.1.2-py3-none-any.whl (139 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.4/139.4 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading jupyterlab_widgets-3.0.10-py3-none-any.whl (215 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m215.0/215.0 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading widgetsnbextension-4.0.10-py3-none-any.whl (2.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m16.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: widgetsnbextension, jupyt

In [4]:
%%bash
find ./ -type f -name "*.json" -delete

In [1]:
from dotenv import load_dotenv
import json
import os
import pandas as pd
import pprint
import requests as req
import urllib.request

import chromadb

from llama_index.core import Document, Settings, StorageContext, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.response.notebook_utils import display_response
from llama_index.core.schema import MetadataMode
#from llama_index.embeddings.nomic import NomicEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.chroma import ChromaVectorStore
import requests as req

from utils_15B import extract_from_json

load_dotenv()  # This loads the variables from .envz
nomic_api_key = os.getenv("NOMIC_API_KEY")
# print(nomic_api_key)

In [2]:
import requests as req

def get_trial(nct_id=None, query_cond=None):
    if nct_id:
        trial = req.get(f"https://clinicaltrials.gov/api/v2/studies/{nct_id}")
        trial_json = trial.json()
        return trial_json
    elif query_cond:
        trials = req.get(f"https://clinicaltrials.gov/api/v2/studies?query.cond={query_cond}")
        trials_json = trials.json()
        return trials_json
    else:
        return None

In [3]:
list_of_nct_id = [
    "NCT01720524",
    "NCT01942135",
    "NCT01945775",
    "NCT01964716",
    "NCT02130557",
    "NCT02187744",
    "NCT02367456",
    "NCT02603432",
    "NCT03090191",
]

def get_downloaded_json(list_of_nct_id):
    downloaded_json = []
    for nct_id in list_of_nct_id:
        trial = get_trial(nct_id)
        downloaded_json.append(trial)
        # save locally for reference
        with open(f"{nct_id}.json", "w") as f:
            json.dump(trial, f, indent=4)
    return downloaded_json

downloaded_json = get_downloaded_json(list_of_nct_id)
# downloaded_json[3]

In [4]:
def get_downloaded_json(list_of_nct_id=None, query_cond=None):
    downloaded_json = []
    if list_of_nct_id:
        for nct_id in list_of_nct_id:
            trial = get_trial(nct_id=nct_id)
            downloaded_json.append(trial)
            # save locally for reference
            with open(f"{nct_id}.json", "w") as f:
                json.dump(trial, f, indent=4)
    elif query_cond:
        trials = get_trial(query_cond=query_cond)
        for trial in trials:
            nct_id = trial['NCTId']  # assuming the NCTId is available in the trial data
            downloaded_json.append(trial)
            # save locally for reference
            with open(f"{nct_id}.json", "w") as f:
                json.dump(trial, f, indent=4)
    return downloaded_json

In [5]:
def list_from_extracted_json(downloaded_json):    
    documents_list  = []
    for json_file in downloaded_json:
        extracted_json = extract_from_json(json_file)
        nct_id = json_file['protocolSection']['identificationModule']['nctId']
        # save manipulated JSON file to disk for review
        save_path = f"{nct_id}_extracted.json"
        with open(save_path, "w") as f:
            json.dump(extracted_json, f, indent=4)
        # prepare for indexing
        documents_list.append(extracted_json)
    return documents_list
    
documents_list = list_from_extracted_json(downloaded_json)
# len(documents_list)

In [6]:
# all the keys (for metadata)
all_keys = list(documents_list[0].keys())
#all_keys

In [7]:
# to adjust the metadata keys used
llm_keys_to_incude = [
    "Brief title",
    "National Clinical Identification NCT ID",
    "Lead sponsor",
    "Arms group 0 intervention names",
    "Enrollment count",
]

llm_keys_to_exclude = [key for key in all_keys if key not in llm_keys_to_incude]

# for simplicity, do the same for embedding_keys_to_exclude (in this example)
embedding_keys_to_exclude = llm_keys_to_exclude

In [8]:
# using this as baseline standard
embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

In [9]:
llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo", max_tokens=512)

In [10]:
# NOTE:  metata data must be one of (str, int, float, None)
# use json.dumps() to convert lists and dictionaries into strings

def create_llama_docs(documents_list):
    llama_documents = []

    for trial in documents_list:
        trial["Brief title"] = json.dumps(trial["Brief title"])
        trial["Official title"] = json.dumps(trial["Official title"])
        trial["Brief summary"] = json.dumps(trial["Brief summary"])
        trial["Detailed description"] = json.dumps(trial["Detailed description"])
        trial["Arms group 0 intervention name"] = json.dumps(trial["Arms group 0 intervention name"])
        trial["Arms group 1 intervention name"] = json.dumps(trial["Arms group 1 intervention name"])
        trial["Eligibility minimum age"] = json.dumps(trial["Eligibility minimum age"])
        trial["Organization"] = json.dumps(trial["Organization"])

        # create a Llama Document object 
        # with text and excluded meta data for llm and embedding model
        llama_document = Document(
            text=trial["Detailed description"],
#             text=json.dumps(trial), #<== testing
            metadata=trial,
            excluded_llm_metadata_keys=llm_keys_to_exclude,
            excluded_embed_metadata_keys=embedding_keys_to_exclude ,
            metadata_template="{key}=>{value}",
            text_template="Metadata:\n{metadata_str}\n===========================\nContent: \n{content}"
        )
        llama_documents.append(llama_document)
    
    return llama_documents

llama_documents = create_llama_docs(documents_list)

In [11]:
# Example —LLM sees this:
#print(llama_documents[4].get_content(metadata_mode=MetadataMode.LLM))

In [12]:
# Example — Embedding model sees this:
#print(llama_documents[4].get_content(metadata_mode=MetadataMode.EMBED))

In [13]:
def create_nodes(llama_documents):
#     parser = SentenceSplitter(chunk_size=2560,chunk_overlap=32) # <== adjust
    parser = SentenceSplitter(chunk_size=512,chunk_overlap=20) # <== adjust
    nodes = parser.get_nodes_from_documents(llama_documents)

    for node in nodes:
        node_embedding = embed_model.get_text_embedding(
            node.get_content(metadata_mode=MetadataMode.EMBED)
        )
        node.embedding = node_embedding

    return nodes

nodes = create_nodes(llama_documents)

In [14]:
# Chroma DB collection name
COLLECTION_NAME = "CLINICAL_RAG"

db = chromadb.PersistentClient(path="chroma_db")
print(f"Looking for the {COLLECTION_NAME} collection in the database..." )
if COLLECTION_NAME not in [col.name for col in db.list_collections()]:
    print(f"{COLLECTION_NAME} collection WAS NOT FOUND in Chroma DB, creating...")
    chroma_collection = db.create_collection(COLLECTION_NAME)
    print("Creating vector store...")
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    print("Creating vector store index")
    VectorStoreIndex(
        nodes=nodes,
        storage_context=storage_context,
        store_nodes_override=True
    )
    record_count = chroma_collection.count()
    print(f"record count: {record_count}")   
    
else:
    print(f"{COLLECTION_NAME} collection WAS FOUND in Chroma DB")
    COLLECTION_NAME = db.get_collection(COLLECTION_NAME)
    vector_store = ChromaVectorStore(chroma_collection=COLLECTION_NAME)
    print("Restoring vector store index from the collection...")
    index = VectorStoreIndex.from_vector_store(
        vector_store=vector_store,
        embed_model=embed_model,
        store_nodes_override=True
    )
    record_count = COLLECTION_NAME.count()
    print(f"record count: {record_count}")

Looking for the CLINICAL_RAG collection in the database...
CLINICAL_RAG collection WAS FOUND in Chroma DB
Restoring vector store index from the collection...
record count: 9


In [15]:
index = VectorStoreIndex.from_vector_store(vector_store)  

In [26]:
query_engine = index.as_query_engine(similarity_top_k=record_count) # <== set k=total number of records

query = "List the different NCT ID in ascending order. How many is that?"
response = query_engine.query(query)
display_response(response)

**`Final Response:`** NCT01720524, NCT01942135, NCT01945775, NCT01964716, NCT02130557, NCT02187744, NCT02367456, NCT02603432, NCT03090191. There are 9 different NCT IDs in total.

# Going to try chat engine

In [16]:
from llama_index.llms.openai import OpenAI
from llama_index.core.memory import ChatMemoryBuffer

In [40]:
chat_engine = index.as_chat_engine(
    chat_mode = 'context',
    memory = memory,
    system_prompt =(
        "You are a chatbot, who is an expert in clinical trials and translating clinical trial informaiton into layman's terms."
    )
)

In [18]:
chat_engine.reset()

In [49]:
response = chat_engine.chat("Hello!")
print(response)

Hello! How can I assist you today?


In [50]:
response = chat_engine.chat("What is the NCT ID for the first clinical trial?")
print(response)

The NCT ID for the first clinical trial is NCT01964716.


In [51]:
response = chat_engine.chat("Can you write me a Plain Language Summary of this trial?")
print(response)

Certainly! Here is a Plain Language Summary of the clinical trial with NCT ID NCT01964716:

Title: "A Study Of PF-05280014 Or Trastuzumab Plus Taxotere and Carboplatin In HER2 Positive Breast Cancer In The Neoadjuvant Setting"

This study is looking at a new drug called PF-05280014 compared to the standard treatment for HER2 positive breast cancer. Patients with this type of breast cancer will receive either PF-05280014 or the standard treatment which includes Trastuzumab, Taxotere, and Carboplatin before their main treatment (neoadjuvant setting).

The study aims to see if PF-05280014 is as effective as the standard treatment in treating HER2 positive breast cancer. The researchers will enroll 226 patients in the study.

The results of the study will help doctors understand if PF-05280014 can be a good alternative to the standard treatment for HER2 positive breast cancer in the neoadjuvant setting.

If you have any more questions or need further information, feel free to ask!


In [52]:
response = chat_engine.chat("Okay thanks, how many people are enrolled in this trial?")
print(response)

There are 226 patients enrolled in the clinical trial with NCT ID NCT01964716.


In [47]:
print(reponse)

The trial NCT01964716 has 240 participants enrolled.


# Trying chat w/ context and condense

In [17]:
memory = ChatMemoryBuffer.from_defaults(token_limit=3900)

chat_engine = index.as_chat_engine(
    chat_mode="condense_plus_context",
    memory=memory,
    llm=llm,
    context_prompt=(
        "You are a chatbot, who is an expert in clinical trials and translating clinical trial informaiton into layman's terms."
        "Here are the relevant documents for the context:\n"
        "{context_str}"
        "\nInstruction: Use the previous chat history, or the context above, to interact and help the user."
    ),
    verbose=False,
)

In [18]:
response = chat_engine.chat("What trials are about covid?")
print(response)

I don't have specific information on COVID trials in the provided context. However, I can help you find information on COVID trials. ClinicalTrials.gov is a good resource to search for clinical trials related to COVID-19. You can visit the website and use keywords like "COVID-19" or "coronavirus" to find relevant trials. If you have any specific questions or need assistance with understanding a particular COVID trial, feel free to ask!


In [19]:
response = chat_engine.chat("What trials are about breat cancer?")
print(response)

I have information on two clinical trials related to breast cancer:

1. **Trial 1:**
   - **NCT ID:** NCT02187744
   - **Brief Title:** "A Study Of PF-05280014 Or Trastuzumab Plus Taxotere® And Carboplatin In HER2 Positive Breast Cancer In The Neoadjuvant Setting (REFLECTIONS B327-04)"
   - **Lead Sponsor:** Pfizer
   - **Enrollment Count:** 226
   - **Interventions:** Biological: PF-05280014, Drug: Taxotere®, Drug: Paraplatin®
   - **Primary Outcome:** The primary outcome group estimate comment mentions that the stratified analysis was based on the normal approximation to the binomial distribution, adjusting for the randomization strata of primary tumor size, estrogen receptor status, and progesterone receptor status.

2. **Trial 2:**
   - **NCT ID:** NCT01945775
   - **Brief Title:** "A Study Evaluating Talazoparib (BMN 673), a PARP Inhibitor, in Advanced and/or Metastatic Breast Cancer Patients With BRCA Mutation (EMBRACA Study)"
   - **Lead Sponsor:** Pfizer
   - **Enrollment Count

In [20]:
response = chat_engine.chat("Can you write me a Plain Language Summary of the NCT01945775 trial?")
print(response)

Certainly! Here is a Plain Language Summary of the NCT01945775 trial:

Title: A Study Evaluating Talazoparib (BMN 673), a PARP Inhibitor, in Advanced and/or Metastatic Breast Cancer Patients With BRCA Mutation (EMBRACA Study)

This study, known as the EMBRACA Study, is looking at a drug called talazoparib (also known as BMN 673) in patients with advanced and/or metastatic breast cancer who have a specific genetic mutation called BRCA. Talazoparib is a type of drug known as a PARP inhibitor, which works by targeting specific weaknesses in cancer cells.

The main goal of this study is to see how effective talazoparib is in treating advanced and/or metastatic breast cancer in patients with BRCA mutations. The study will also look at the safety of talazoparib and how well patients tolerate the treatment.

Patients participating in this study will receive talazoparib as the main treatment. The study will measure how long it takes for the cancer to progress while on talazoparib treatment and

In [21]:
response = chat_engine.chat("Okay thanks, can you give me a summary of the NCT0218774 trail?")
print(response)

Certainly! Here is a Plain Language Summary of the NCT02187744 trial:

Title: A Study Of PF-05280014 Or Trastuzumab Plus Taxotere® And Carboplatin In HER2 Positive Breast Cancer In The Neoadjuvant Setting (REFLECTIONS B327-04)

This study, known as REFLECTIONS B327-04, is investigating the effectiveness of a drug called PF-05280014 compared to the standard treatment of trastuzumab plus Taxotere® and carboplatin in patients with HER2 positive breast cancer in the neoadjuvant setting. HER2 positive breast cancer is a type of breast cancer that has a higher amount of a protein called HER2, which can promote the growth of cancer cells.

PF-05280014 is being studied to see if it is as effective as the standard treatment in shrinking tumors before surgery (neoadjuvant setting). The study aims to determine the response rate of tumors to PF-05280014 compared to trastuzumab plus Taxotere® and carboplatin. The study will also evaluate the safety and tolerability of PF-05280014 in these patients.

In [58]:
response = chat_engine.chat("Sorry, I meant can you give me a summary of the NCT02187744 trail?")
print(response)

In the NCT02187744 trial, researchers are conducting a study to investigate the use of PF-05280014 (a biosimilar to Trastuzumab) or Trastuzumab in combination with Taxotere and Carboplatin in patients with HER2-positive breast cancer in the neoadjuvant setting. This study, known as the REFLECTIONS B327-04 study and sponsored by Pfizer, aims to enroll 226 participants.

HER2-positive breast cancer is a specific type of breast cancer that overexpresses a protein called HER2, which can promote the growth of cancer cells. Trastuzumab is a targeted therapy that specifically targets HER2-positive breast cancer cells.

The goal of this trial is to evaluate the effectiveness and safety of PF-05280014 (a biosimilar to Trastuzumab) compared to Trastuzumab when used in combination with Taxotere and Carboplatin as neoadjuvant treatment for HER2-positive breast cancer. Neoadjuvant treatment is given before surgery to shrink the tumor and improve surgical outcomes.

Participants in the study will re

In [22]:
response = chat_engine.chat("Okay, can you tell me the major differences between these two trials?")
print(response)

Certainly! Here are some major differences between the two clinical trials, NCT01945775 and NCT02187744:

1. **Disease Focus:**
   - NCT01945775 focuses on advanced and/or metastatic breast cancer patients with BRCA mutation.
   - NCT02187744 focuses on HER2 positive breast cancer patients in the neoadjuvant setting.

2. **Interventions:**
   - NCT01945775 involves the drug talazoparib, a PARP inhibitor.
   - NCT02187744 compares the effectiveness of PF-05280014 to the standard treatment of trastuzumab plus Taxotere® and carboplatin.

3. **Study Design:**
   - NCT01945775 is evaluating the efficacy and safety of talazoparib in a specific genetic subgroup of breast cancer patients.
   - NCT02187744 is comparing the response rate of tumors to PF-05280014 versus the standard treatment in a specific subtype of breast cancer patients.

4. **Patient Population:**
   - NCT01945775 includes patients with BRCA mutations and advanced/metastatic breast cancer.
   - NCT02187744 includes patients w

In [23]:
response = chat_engine.chat("Okay, can you tell me more about the PARP inhibitor?")
print(response)

PARP inhibitors are a class of drugs that have shown promise in the treatment of certain types of cancer, particularly in patients with BRCA mutations. Here are some key points about PARP inhibitors:

1. **Mechanism of Action:** PARP (poly ADP-ribose polymerase) is a protein involved in DNA repair. PARP inhibitors work by blocking the activity of PARP enzymes, which prevents cancer cells from repairing damaged DNA. This can lead to the accumulation of DNA damage in cancer cells, ultimately causing their death.

2. **BRCA Mutations:** PARP inhibitors are particularly effective in cancer cells that have mutations in the BRCA genes. BRCA genes are involved in DNA repair, and when these genes are mutated, cancer cells become more reliant on alternative DNA repair pathways, such as PARP-mediated repair. By inhibiting PARP, the cancer cells with BRCA mutations are unable to repair their DNA effectively, leading to cell death.

3. **Indications:** PARP inhibitors have been approved for the tr

# Working on building lightwieght chat bot

In [63]:
from IPython.display import clear_output

def chat_with_bot(chat_engine):
    """
    Starts an interactive chat session in the Jupyter Notebook with a continuously growing output.
    
    Args:
    - chat_engine: The chat engine initialized with LLamaIndex and other configurations.
    """
    chat_history = [] 
    
    print("Chatbot is ready to chat! Type 'quit' to end the session.")
    
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'quit':
            print("Chat session ended.")
            break
        
        chat_history.append(f"You: {user_input}")
        
        response = chat_engine.chat(user_input)
        
        chat_history.append(f"Bot: {response}")
        
        for line in chat_history:
            print(line)


In [64]:
chat_with_bot(chat_engine)


Chatbot is ready to chat! Type 'quit' to end the session.
You: Hey, which trials are about Breat cancer?
Bot: In the provided context, the two clinical trials related to breast cancer are:

1. NCT01945775: A Study Evaluating Talazoparib (BMN 673), a PARP Inhibitor, in Advanced and/or Metastatic Breast Cancer Patients With BRCA Mutation (EMBRACA Study)
- This study focuses on patients with advanced and/or metastatic breast cancer who have a specific genetic mutation called BRCA. It is evaluating the effectiveness and safety of Talazoparib in this patient population.

2. NCT02187744: A Study Of PF-05280014 Or Trastuzumab Plus Taxotere and Carboplatin In HER2 Positive Breast Cancer In The Neoadjuvant Setting (REFLECTIONS B327-04)
- This study is investigating the use of PF-05280014 (a biosimilar to Trastuzumab) or Trastuzumab in combination with Taxotere and Carboplatin in patients with HER2-positive breast cancer in the neoadjuvant setting.

If you have any specific questions about these

In [24]:
import ipywidgets as widgets
from IPython.display import display

text_input = widgets.Text(description="You:", width=300)
send_button = widgets.Button(description="Send")
output_area = widgets.Output(layout={'height': '300px', 'overflow_y': 'scroll'})

# initialize a list to keep track of the chat history
chat_history = []

def send_chat(button):
    user_input = text_input.value
    global chat_history  
    
    if user_input.lower() == 'quit':
        with output_area:
            display(widgets.HTML(value="<b>Chat session ended.</b>"))
        
        # reset the chat engine to clear the chat history / state
        chat_engine.reset()
        
        # clear the chat history and output area for a new session
        chat_history = []
        output_area.clear_output()
        display(widgets.HTML(value="<b>Chatbot reset! Type anything to start a new chat session.</b>"))
        return
    
    chat_history.append(f"You: {user_input}")
    
    response = chat_engine.chat(user_input)  
    chat_history.append(f"Bot: {response}")
    
    # display chat history without clearing previous messages
    with output_area:
        output_area.clear_output()  
        for message in chat_history:
            display(widgets.HTML(value=message.replace("\n", "<br>")))  
    text_input.value = ''

send_button.on_click(send_chat)

chat_box = widgets.VBox([text_input, send_button, output_area])
display(chat_box)


VBox(children=(Text(value='', description='You:'), Button(description='Send', style=ButtonStyle()), Output(lay…

## Print out from a conversation I had with the bot

You: What studies are about breast cancer?
Bot: Two studies related to breast cancer are:

1. Study NCT01945775 titled "A Study Evaluating Talazoparib (BMN 673), a PARP Inhibitor, in Advanced and/or Metastatic Breast Cancer Patients With BRCA Mutation (EMBRACA Study)" sponsored by Pfizer. This study aims to evaluate the effectiveness of talazoparib in patients with advanced and/or metastatic breast cancer who have a BRCA mutation. The study has 431 participants enrolled.

2. Study NCT02187744 titled "A Study Of PF-05280014 Or Trastuzumab Plus Taxotere and Carboplatin In HER2 Positive Breast Cancer In The Neoadjuvant Setting (REFLECTIONS B327-04)" also sponsored by Pfizer. This study focuses on HER2 positive breast cancer in the neoadjuvant setting and involves the use of PF-05280014, Taxotere, and Carboplatin. The study has 226 participants enrolled.
You: Can you give me a plain language summary of trial NCT02187744?
Bot: Certainly! Trial NCT02187744, also known as the "REFLECTIONS B327-04" study, is looking at a treatment for a specific type of breast cancer called HER2 positive breast cancer. The study is being conducted in the neoadjuvant setting, which means the treatment is given before surgery to shrink the tumor.

In this study, researchers are comparing the effectiveness of a drug called PF-05280014 with the standard treatment of Trastuzumab plus Taxotere and Carboplatin in patients with HER2 positive breast cancer. The goal is to see which treatment works better in reducing the size of the tumor before surgery.

The study involves 226 participants and is sponsored by Pfizer. The results of this study will help doctors and patients better understand the benefits of different treatment options for HER2 positive breast cancer in the neoadjuvant setting.
You: Alright, can you give me a study about the NCT01945775 trial please?
Bot: Certainly! Study NCT01945775 is titled "A Study Evaluating Talazoparib (BMN 673), a PARP Inhibitor, in Advanced and/or Metastatic Breast Cancer Patients With BRCA Mutation (EMBRACA Study)" sponsored by Pfizer.

This study is focused on evaluating the effectiveness of a drug called talazoparib, which is a PARP inhibitor, in patients with advanced and/or metastatic breast cancer who have a specific genetic mutation called BRCA. The study aims to assess how well talazoparib works in treating this type of breast cancer and to determine its safety profile.

The study has 431 participants enrolled, and the results will provide valuable information on the potential benefits of talazoparib as a treatment option for patients with advanced and/or metastatic breast cancer and BRCA mutations.
You: Can you compare the two studies now and exlpain the key differences please?
Bot: Certainly! Here is a comparison of the two studies, NCT02187744 and NCT01945775:

1. NCT02187744 (REFLECTIONS B327-04):
- Title: A Study Of PF-05280014 Or Trastuzumab Plus Taxotere and Carboplatin In HER2 Positive Breast Cancer In The Neoadjuvant Setting
- Sponsor: Pfizer
- Enrollment: 226 participants
- Focus: HER2 positive breast cancer in the neoadjuvant setting
- Interventions: PF-05280014, Trastuzumab, Taxotere, Carboplatin
- Goal: Comparing the effectiveness of PF-05280014 with standard treatment in reducing tumor size before surgery

2. NCT01945775 (EMBRACA Study):
- Title: A Study Evaluating Talazoparib (BMN 673), a PARP Inhibitor, in Advanced and/or Metastatic Breast Cancer Patients With BRCA Mutation
- Sponsor: Pfizer
- Enrollment: 431 participants
- Focus: Advanced and/or metastatic breast cancer with BRCA mutation
- Intervention: Talazoparib (PARP inhibitor)
- Goal: Evaluating the effectiveness and safety of talazoparib in treating breast cancer with BRCA mutation

Key Differences:
- NCT02187744 focuses on HER2 positive breast cancer in the neoadjuvant setting, while NCT01945775 focuses on advanced and/or metastatic breast cancer with BRCA mutation.
- The interventions in the two studies are different, with NCT02187744 using PF-05280014, Trastuzumab, Taxotere, and Carboplatin, and NCT01945775 using Talazoparib.
- The enrollment counts are also different, with NCT01945775 having a larger number of participants (431) compared to NCT02187744 (226).

These key differences highlight the specific populations being studied and the different treatment approaches being evaluated in each trial.
You: Can you tell me more about the PARP inhibitor?
Bot: PARP inhibitors are a class of drugs that are used in cancer treatment, particularly in the treatment of certain types of breast and ovarian cancers. PARP stands for poly (ADP-ribose) polymerase, which is a protein involved in DNA repair within cells.

When PARP inhibitors are used in cancer treatment, they work by blocking the PARP enzyme, which prevents cancer cells from repairing their damaged DNA. This leads to the accumulation of DNA damage in the cancer cells, ultimately causing them to die.

PARP inhibitors are particularly effective in cancers that have defects in other DNA repair pathways, such as cancers with BRCA mutations. These mutations make the cancer cells more reliant on the PARP pathway for DNA repair, making them more vulnerable to PARP inhibition.

Talazoparib, which is the PARP inhibitor being studied in the NCT01945775 trial, is one such drug that has shown promise in treating breast cancer with BRCA mutations. By targeting the DNA repair mechanisms in cancer cells, PARP inhibitors like talazoparib offer a targeted and effective treatment option for certain types of cancer.

You: What studies are about covid?
Bot: I can provide you with information on clinical trials related to COVID-19. However, based on the context provided, the studies mentioned are not specifically related to COVID-19. Would you like me to search for COVID-19 related clinical trials for you?

In [15]:
display(chat_box)

VBox(children=(Text(value='', description='You:'), Button(description='Send', style=ButtonStyle()), Output(lay…

In [16]:
display(chat_box)

VBox(children=(Text(value='', description='You:'), Button(description='Send', style=ButtonStyle()), Output(lay…

# Working on fetching additional information

In [21]:
import re

def extract_query_details(user_query, bot_response):
    # Regex pattern for NCT_ID
    nct_id_pattern = r"NCT[0-9]+"
    
    # check if bot's response indicates a lack of specific information
    if "I don't have information" in bot_response or "I don't have specific details" in bot_response:
        # Try to extract NCT_ID
        nct_id_match = re.search(nct_id_pattern, user_query)
        if nct_id_match:
            return ("NCT_ID", nct_id_match.group(0))
        
        # If no NCT_ID, treat the user query as a topic
        return ("topic", user_query.split("about")[-1].strip())
    
    # Rrturn None if bot's response does not indicate a lack of information
    return (None, None)


user_query = "Can you list clinical trials about covid?"
bot_response = "I can provide you with information about clinical trials related to COVID-19. However, ..."

query_type, query_value = extract_query_details(user_query, bot_response)

if query_type == "NCT_ID":
    # Fetch and process data for the NCT_ID
    print(f"Fetching information for clinical trial {query_value}...")
elif query_type == "topic":
    # Fetch and process data for the topic
    print(f"Searching for clinical trials about {query_value}...")


In [22]:
query_type, query_value = extract_query_details(user_query, bot_response)
print(query_type, query_value)

None None
