In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
import torch
from langchain_community.llms import LlamaCpp
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain_core.prompts import PromptTemplate

In [None]:
# the device to load the model onto
if torch.cuda.is_available():
    print("Using GPU")
    device = "cuda"
else:
    print("Using CPU")
    device = "cpu"

In [None]:
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

In [None]:
# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="Mistral-7B-Instruct-v0.1.gguf/mistral-7b-instruct-v0.1.Q5_K_M.gguf",
    temperature=0.75,
    max_tokens=2000,
    top_p=1,
    callback_manager=callback_manager,
    verbose=True,  # Verbose is required to pass to the callback manager
)

In [None]:
question = "How has this trial helped?"
context = "R DE was established as LHC165 600 g biweekly in combination with PDR001 400 mg Q4W. In single agent arm , no DLT was observed . In combination arm , one DLT of pancreatitis was reported . Comparable exposures of LHC165 were observed at same dose levels in single agent and combination arms . There was no impact on PK of LHC165 when given in combination with PDR001. The safety profile of LHC165 was well characterized in all treatment groups evaluated in this study . No major differences were observed between LHC165 as single agent vs . LHC165 + PDR001 combination . Overall , the safety profile of the doses explored was generally manageable ."
answer = "This trial helped learn about the safety of different doses of LHC165 given alone or with PDR001 in participants with advanced cancers . The researchers concluded that 600 g LHC165 was the highest dose that was safe for participants to receive alone or with 400 mg PDR001. Because enrollment ended early and there were too few participants , the researchers could not make any conclusions about the effects of LHC165 given alone or with PDR001 on shrinking cancer . The sponsor has no plans for other trials of LHC165 in people with advanced cancers ."
# define prompt format to llm
template = """
[INST] 
CONTEXT:
{context}

****************************************************************

QUESTION:
{question}

****************************************************************

INSTRUCTIONS:
Answer the users QUESTION using the CONTEXT text above.
Keep your answer ground in the facts of the CONTEXT and using plain language.
[/INST]
"""
prompt = PromptTemplate.from_template(template)
print(prompt.format(context=context, question=question))

In [None]:
result = llm.invoke(prompt.format(context=context, question=question))

In [None]:
result

In [None]:
import json

# Placeholder function to generate an LLM answer
def generate_llm_answer(context, question):
    result = llm.invoke(prompt.format(context=context, question=question))
    return result

def create_llm_answers(input_file, output_file):
    with open(input_file, 'r') as file:
        data = json.load(file)

    # Iterate over each object in the JSON array and add the 'llm_answer'
    for item in data:
        print(f'Current trial accessed: {item.get('trial_name', '')}')
        context = item.get('context', '')
        question = item.get('question', '')
        llm_answer = generate_llm_answer(context, question)
        item['llm_answer'] = llm_answer

    # Save the updated JSON data to a new file
    with open(output_file, 'w') as file:
        json.dump(data, file, indent=4)

    print(f"Updated JSON data has been saved to {output_file}")

In [None]:
files = [
    "How_has_this_trial_helped",
    "How_long_was_the_trial",
    "What_adverse_events_did_participants_report",
    "What_happened_during_the_trial",
    "What_treatments_did_the_participants_take",
    "What_were_the_results_of_the_trial",
    "Who_was_in_this_clinical_trial",
    "Why_was_the_research_needed"
]

for file in files:
    print(f'Current file accessed: {file}')

    input_file = 'FinalDataset/FullDataset2/'+ file +'.json'
    output_file = 'FinalDataset/Results/'+ file +'_with_llm_answers.json'
    # create_llm_answers(input_file, output_file)