# **Install required packages**

In [1]:
!pip install langchain tqdm unstructured[all-docs] langchain-experimental langchain-openai faiss-cpu pdfminer.six python-dotenv ragas



# ***Import required packages***

In [2]:
import os
import dotenv
from typing import Dict
from langchain_community.document_loaders import DirectoryLoader, UnstructuredPDFLoader
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_experimental.text_splitter import SemanticChunker
from google.colab import drive
from langchain_community.vectorstores.faiss import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.memory import ChatMessageHistory
from langchain_core.runnables import RunnablePassthrough
from langchain.chains.combine_documents import create_stuff_documents_chain
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context

# **Mount Google drive**

In [3]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
dotenv.load_dotenv("./drive/MyDrive/FYP_2019570/.env")

True

In [5]:
class DocumentProcessor:
    def __init__(self, embeddings, chunker):
        self.embeddings = embeddings
        self.chunker = chunker

    def load_documents(self, data_path):
        loader = DirectoryLoader(
            path=data_path,
            glob="**/*.pdf",
            use_multithreading=True,
            show_progress=True,
            loader_cls=UnstructuredPDFLoader,
        )
        return loader.load()

    def split_documents(self, documents):
        return self.chunker.split_documents(documents)

# **Load documents**

In [6]:
openAIembeddings = OpenAIEmbeddings()

openAIDocProcessor = DocumentProcessor(openAIembeddings, SemanticChunker(
    embeddings=openAIembeddings,
))

loaded_documents = openAIDocProcessor.load_documents("./drive/MyDrive/FYP_2019570/test_set")

# Add metadata
for document in loaded_documents:
    document.metadata['file_name'] = document.metadata['source']

100%|██████████| 1/1 [00:22<00:00, 22.31s/it]


# **Split documents**

In [7]:
splitted_documents = openAIDocProcessor.split_documents(loaded_documents)

# **Load exsisting local Vector store or create a new one**

In [8]:
FAISS_INDEX_PATH = "./drive/MyDrive/FYP_2019570/vectorstores/faiss/"

if len(os.listdir(FAISS_INDEX_PATH)):
  # Index file exists, load it
  print("Loading existing FAISS index from:", FAISS_INDEX_PATH)
  vector_db = FAISS.load_local(FAISS_INDEX_PATH, embeddings=openAIembeddings)
else:
  # Index file doesn't exist, create a new one
  print("Creating new FAISS index and saving to:", FAISS_INDEX_PATH)
  vector_db = FAISS.from_documents(splitted_documents, openAIembeddings)
  vector_db.save_local(FAISS_INDEX_PATH)

Loading existing FAISS index from: ./drive/MyDrive/FYP_2019570/vectorstores/faiss/


# **Use vector store as a retriever**

In [9]:
retriever = vector_db.as_retriever()

# **Setup Initial details of a patiant which are required to provide personalisation**

In [10]:
class User:
    def __init__(self, name, age, gender, diabetes_type, preferred_language):
        self.name = name
        self.age = age
        self.gender = gender
        self.diabetes_type = diabetes_type
        self.preferred_language = preferred_language

user = User("Hansaka", 23, "Male", "Type 1", "English")

# **Create prompt templates**

In [11]:
prompt = ChatPromptTemplate.from_messages(
    [
        SystemMessage(
            content=(
              """You are a helpful and informative medical assistant called DiaBuddy,
              who specializes diabetes management. Your goal is to provide
              accessible information and support for patients with diabetes while
              prioritizing patient-centered care.

              Provide clear, concise explanations of diabetes-related concepts.
              Offer practical tips for managing their diabetes.
              Use empathetic language that reassures the patient and acknowledges their experiences.
              Avoid sounding overly clinical or robotic.
              Refer users to consult a healthcare professional if a question
              requires medical diagnosis, complex treatment recommendations, or
              changes to an existing care plan.

              Important: It's crucial to understand your capabilities and
              limitations to avoid providing incorrect or potentially harmful advice.
              """)
        ),
        HumanMessagePromptTemplate.from_template("{input}"),
    ]
)

prompt_personalised = ChatPromptTemplate.from_messages(
    [
        SystemMessage(
            content=(
                f"""You are a helpful and informative medical assistant called DiaBuddy,
                who specializes diabetes management. Your goal is to provide
                accessible information and support for patients with diabetes while
                prioritizing patient-centered care.

                Patient Profile:
                Name: {user.name}
                Age: {user.age}
                Gender: {user.gender}
                Diabetes Type: {user.diabetes_type}
                Preferred Language: {user.preferred_language}

                Offer personalized conversation whenever possible, considering {user.name}'s background.
                Provide clear, concise explanations of diabetes-related concepts.
                Offer practical tips for managing their diabetes.
                Use empathetic language that reassures {user.name} and acknowledges their experiences.
                Avoid sounding overly clinical or robotic.
                Refer {user.name} to consult a healthcare professional if a question
                requires medical diagnosis, complex treatment recommendations, or
                changes to an existing care plan.

                Important: It's crucial to understand your capabilities and
                limitations to avoid providing incorrect or potentially harmful advice.
                """)
          ),
        HumanMessagePromptTemplate.from_template("{input}"),
    ]
)

prompt_personalised_context_memory = ChatPromptTemplate.from_messages(
    [
        (
          "system",
            f"""You are a helpful and informative medical assistant called DiaBuddy,
            who specializes diabetes management. Your goal is to provide
            accessible information and support for patients with diabetes while
            prioritizing patient-centered care.

            Additional Knowledge: {{context}}

            Patient Profile:
            Name: {user.name}
            Age: {user.age}
            Gender: {user.gender}
            Diabetes Type: {user.diabetes_type}
            Preferred Language: {user.preferred_language}

            Offer personalized conversation whenever possible, considering {user.name}'s background.
            Provide clear, concise explanations of diabetes-related concepts.
            Offer practical tips for managing their diabetes.
            Use empathetic language that reassures {user.name} and acknowledges their experiences.
            Avoid sounding overly clinical or robotic.
            Refer {user.name} to consult a healthcare professional if a question
            requires medical diagnosis, complex treatment recommendations, or
            changes to an existing care plan.

            Important: It's crucial to understand your capabilities and
            limitations to avoid providing incorrect or potentially harmful advice.
            """
            ),
        MessagesPlaceholder(variable_name="messages")
    ]
)

message = "Can i eat some ice cream?"

# **Get LLM**

In [12]:
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", streaming=True)

# **Response without any modifications**

In [13]:
llm.invoke(message)

AIMessage(content='Of course! Enjoy your ice cream.')

# **Response with the guided prompt template**

In [14]:
chain = prompt | llm | StrOutputParser()
chain.invoke({"input": message})

"You can definitely enjoy a small serving of ice cream, but it's important to be mindful of portion sizes and how it might affect your blood sugar levels. Opt for a smaller scoop or choose a lower sugar or sugar-free option. Pairing it with a source of protein or fiber can also help mitigate the impact on your blood sugar. Remember, moderation is key! If you have specific concerns about how ice cream may affect your diabetes management, it's best to consult with your healthcare provider for personalized advice."

# **Response with guided prompt template with personalization**

In [15]:
chain = prompt_personalised | llm | StrOutputParser()
chain.invoke({"input": message})

"Hey Hansaka! It's great that you're thinking about your food choices. When it comes to ice cream, it's okay to enjoy it occasionally, but it's essential to be mindful of portion sizes and how it might affect your blood sugar levels. \n\nSince you have Type 1 diabetes, you'll need to consider how the carbohydrates in the ice cream will impact your blood sugar. It might be helpful to pair it with a source of protein or fiber to help slow down the absorption of sugar.\n\nRemember to monitor your blood sugar levels after eating to see how your body responds. If you're unsure about how ice cream fits into your meal plan, it's a good idea to consult with a dietitian or healthcare provider for personalized guidance. \n\nEnjoy your treat in moderation and remember that balance is key in managing your diabetes effectively! If you have any more questions or need further advice, feel free to ask."

# **Response with guided prompt template with personalization + memory + domain specific knowledge**

In [16]:
# Create message history.
chat_history = ChatMessageHistory()

INITIAL_MESSAGE = "Hello, I'm DiaBuddy, your personal diabetes assistant. How can I help you today?"

# Function to build the knowledge retrieval and processing chain.
document_chain = create_stuff_documents_chain(llm, prompt_personalised_context_memory)

# Helper function to extract the most recent user message for retrieval.
def parse_retriever_input(params: Dict):
  return params["messages"][-1].content

# Chain for information retrieval - fetches relevant information based on user input.
retrieval_chain = (
    RunnablePassthrough.assign(
        context=parse_retriever_input | retriever
    )
    | document_chain
)

# Main function to handle a single query/response cycle.
def executeQuery(user_input, chat_history):
  chat_history.add_user_message(user_input)

  # Process the input through the retrieval chain.
  response = retrieval_chain.invoke({
      "messages": chat_history.messages
      })

  chat_history.add_ai_message(response)
  return response

# Conversation loop.
while True:
  if len(chat_history.messages) == 0:
    # Add initial message.
    chat_history.add_ai_message(INITIAL_MESSAGE)

  user_input = input(f"{user.name} : ")
  if user_input.lower() == 'exit':
    break

  response = executeQuery(user_input, chat_history)


  print("DiaBuddy: ", response)


KeyboardInterrupt: Interrupted by user

# **Evaluvation**

# **Synthetic test data generation**

In [None]:
# generator with openai models
generator = TestsetGenerator.with_openai()

# generate testset
testset = generator.generate_with_langchain_docs(loaded_documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})
testset.to_pandas()

embedding nodes:   0%|          | 0/66 [00:00<?, ?it/s]