In [None]:
# The packages to be installed before running the code
! pip3 install -qU guardrails-ai openai langchain_community langchain_experimental langchain-upstage sentence-transformers langchainhub langchain-chroma langchain matplotlib python-dotenv tavily-python ragas faiss-cpu tokenizers 
!pip install -qU python-dotenv
!pip install -qU PyPDF2
!pip install -qU langchain
!pip install -qU langchain-community
!pip install -qU langchain-core
!pip install -qU langchain-text-splitters
!pip install -qU langchain_upstage
!pip install -qU oracledb
print("======== Job Completed ========")

In [None]:
%load_ext dotenv
%dotenv

In [None]:
import os

(os.environ["UPSTAGE_API_KEY"],os.environ["DB_USER"],os.environ["DB_PASSWORD"]),
(os.environ["DSN"]),

In [None]:
import sys
import array
import time
import os
from dotenv import load_dotenv

import oracledb
from langchain_community.vectorstores import oraclevs
from langchain_community.vectorstores.oraclevs import OracleVS

from langchain_community.vectorstores.utils import DistanceStrategy
from langchain_core.documents import BaseDocumentTransformer, Document

from langchain_core.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

import warnings
warnings.filterwarnings("ignore")

print("Successfully imported libraries and modules")

In [None]:
username=os.environ["DB_USER"]
password=os.environ["DB_PASSWORD"]
dsn=os.environ["DSN"]

con = oracledb.connect(user=username, password=password, dsn=dsn)

try: 
    conn23c = oracledb.connect(user=username, password=password, dsn=dsn)
    print("Connection successful!", conn23c.version)
except Exception as e:
    print("Connection failed!")

In [None]:
from langchain_upstage import UpstageEmbeddings
upstage_embeddings = UpstageEmbeddings(model="solar-embedding-1-large")


vector_store = OracleVS(client=conn23c, 
                        embedding_function=upstage_embeddings, 
                        table_name="pre_diagnostic_db", 
                        distance_strategy=DistanceStrategy.DOT_PRODUCT)

retriever = vector_store.as_retriever()

In [None]:
from langchain_core.tools import tool
import requests
import os
from tavily import TavilyClient
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_upstage import ChatUpstage

llm = ChatUpstage()
tavily = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])

def intelligence_rag(question):
  total_result = []

  # 1. LLM 결과 
  rag_with_history_prompt = ChatPromptTemplate.from_messages(
      [
          ("system", "You are a helpful assistant."),
          ("human", "{input}"),
      ]
  )
  chain = rag_with_history_prompt | llm | StrOutputParser()
  chain_result = chain.invoke({"input": question})
  total_result.append(chain_result)

  # 2. 뉴스 결과
  x = tavily.search(query=question)
  # Extracting the content of the first two results
  first_two_contents = [result['content'] for result in x['results'][:2]]
  total_result.append(first_two_contents)

  # 3. RAG
  result_chunks=vector_store.similarity_search(question, k = 2)
  total_result.append(result_chunks)
  
  return total_result

In [None]:
# 전체 들어가는 문장 확인
context = intelligence_rag('너는 일차진료기관의 아프리카의 의사야. 어제부터 38도 이상의 발열이 지속된 26세 여성 환자가 내원하였어. 필요한 검사와 진단, 치료를 말해주고 그 이유를 step-by-step 으로 알려줘. ')

In [None]:
# 모델 결과 출력
prompt_template = PromptTemplate.from_template(
    """
    Please provide answer for question from the following context. 
    ---
    Question: {question}
    ---
    Context: {context}
    """
)
chain = prompt_template | llm | StrOutputParser()
chain.invoke({"context": context, "question": '너는 일차진료기관의 아프리카의 의사야. 어제부터 38도 이상의 발열이 지속된 26세 여성 환자가 내원하였어. 필요한 검사와 진단, 치료를 말해주고 그 이유를 step-by-step 으로 알려줘. '})

In [None]:
# RAG 결과 출력
user_question = ("A 30-year-old male with no unusual medical history presents with a fever and a positive malaria RCT test. Which drug would you prescribe? Answer with reasons.  ")
country = "Korea"
print ("The prompt to the LLM will be:",user_question)

if user_question:
    s3time =  time.time()
    # Look up the chunks that are most similar to the user's question in this case AI vector search
    # k is a top-k parameter
    result_chunks=vector_store.similarity_search(user_question, k = 2)
    s4time = time.time()
    print(f"Search for the user question in the Oracle Database 23ai and return similar chunks duration: {round(s4time - s3time, 1)} sec.")

In [None]:
prompt_template = PromptTemplate.from_template(
    """
    Please provide most correct answer from the following context. 

    Think step by step and look the html tags and table values carefully to provide the most correct answer.

    ---
    role : You are a doctor in a primary care organization in a {country}.
    ---
    Question: {question}
    ---
    Context: {context}
    ---
    Tell me the necessary tests, diagnoses, treatments, and prescriptions (pharmaceuticals) and tell me why.

    Let's think step by step.
    """
)

In [None]:
chain = prompt_template | llm | StrOutputParser()
chain.invoke({"context": result_chunks, "question": user_question, "country":country})

# 모델 평가 진행

In [None]:
from bert_score import score

s1 = """According to the WHO guidelines for malaria treatment, artemisinin-based combination therapy (ACT) is recommended as the first-line treatment for Plasmodium falciparum in Pakistan. 
Currently, the most commonly used ACT in Pakistan is artesunate-amodiaquine (AS-AQ). 
However, in this patient's case, treatment for Plasmodium vivax must also be considered. Plasmodium vivax is one of the prevalent malaria species in the region. 
For Plasmodium vivax, chloroquine (CQ) is recommended as the first-line treatment.
However, considering the possibility of CQ-resistant Plasmodium vivax, it is essential to review recent treatment efficacy studies conducted in Pakistan to verify if CQ remains highly effective in the region.
If CQ is found to be highly effective, it should be prescribed for 3 days (25mg/kg on the first day, followed by 12.5mg/kg for the next 2 days), and the treatment efficacy should be monitored.
If the efficacy of CQ is low or resistance has developed, alternative treatments such as artesunate-mefloquine (AS-MQ) or tafenoquine should be considered.
The efficacy of the prescribed treatment should be monitored, and appropriate follow-up actions should be taken in case of treatment failure or adverse effects.
Therefore, it is advisable to verify the current efficacy of CQ in the region, considering the possibility of CQ-resistant Plasmodium vivax. If CQ is effective, it should be prescribed for 3 days and the treatment efficacy monitored. If CQ efficacy is low or resistance has developed, alternative treatments should be considered."""

s2 = "Although I am not a doctor, according to the given context, if a 30-year-old male with no significant medical history tests positive for malaria by RDT, it is recommended to prescribe Artemisinin-based combination therapy (ACT). In Pakistan, combinations such as Artesunate-Amodiaquine (AS-AQ) or Artesunate-Mefloquine (AS-MQ) are commonly used. The physician will prescribe either AS-AQ or AS-MQ and provide guidance on the appropriate dosage and schedule. Additionally, the physician will monitor for any potential side effects during treatment and follow up with the patient for a certain period to confirm the treatment's effectiveness and monitor for any recurrence."

In [None]:
from bert_score import score

def calculate_bertscore(sentence1, sentence2, lang="en"):
    """
    Calculate BERTScore for two sentences.
    
    Args:
    sentence1 (str): First sentence.
    sentence2 (str): Second sentence.
    lang (str): Language of the sentences (default: 'en' for English).
    
    Returns:
    dict: A dictionary containing precision, recall, and F1 score.
    """
    # BERTScore requires lists of sentences
    sentences1 = [sentence1]
    sentences2 = [sentence2]

    # Calculate BERTScore
    P, R, F1 = score(sentences1, sentences2, lang=lang)
    
    # Convert scores to scalar values
    result = {
        "Precision": P.item(),
        "Recall": R.item(),
        "F1 Score": F1.item()
    }
    
    return result

# Example sentences
# sentence1 = "The quick brown fox jumps over the lazy dog."
# sentence2 = "A fast, dark-colored fox leaps over a sleepy dog.

# Calculate and print BERTScore
result = calculate_bertscore(s1, s2, lang="en")
print(result)
