**Installation and imports**

In [None]:
!pip install -U requests urllib3
!pip install -qU langchain-huggingface
!pip install langchain_milvus sentence-transformers langchain_community langchain_openai

In [None]:
from uuid import uuid4
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_milvus import Milvus
from langchain_core.documents import Document
from langchain_community.llms import Ollama
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

**Download embeddings**

In [None]:
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

**Setting up Milvus Lite Vector database**

In [None]:
URI = "./medical_information.db"

vector_store = Milvus(
    embedding_function=embeddings,
    connection_args={"uri": URI},
)

In [None]:
vector_store_saved = Milvus.from_documents(
    [Document(page_content="foo!")],
    embeddings,
    collection_name="medical_information",
    connection_args={"uri": URI},
)

In [None]:
vector_store_loaded = Milvus(
    embeddings,
    connection_args={"uri": URI},
    collection_name="patient_records",
)

**Extracting all information from documents to insert into vector database**

In [None]:
def extract_text_from_txt(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

file_path_avulsion = "./docs/avulsion.txt"
file_path_hairline = "./docs/hairline.txt"

page_content_avulsion = extract_text_from_txt(file_path_avulsion)
page_content_hairline = extract_text_from_txt(file_path_hairline)

**Insert documents with extracted information into database**

In [None]:
document_avulsion = Document(
    page_content=page_content_avulsion,
    metadata={"source": "medical_record"},
)

document_hairline = Document(
    page_content=page_content_hairline,
    metadata={"source": "medical_record"},
)

documents = [
    document_avulsion,
    document_hairline
]
uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents=documents, ids=uuids)

**Create retriever from vector database for RAG chain**

In [None]:
milvus_retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 1})
milvus_retriever.invoke("Avulsion", k=1, filter={"source": "medical_record"})

**Define the large language model**

In [None]:
llm = Ollama(
        base_url="http://llm.ic-shared-llm.svc.cluster.local:11434",
        model="mistral",
        top_p=0.92,
        temperature=0.01,
        num_predict=512,
        repeat_penalty=1.03,
        callbacks=[StreamingStdOutCallbackHandler()]
    )

**Perform Retrievel Augmented Generation**

In [None]:
retriever = milvus_retriever

system_prompt = (
    "Use the given context to answer the question. "
    "If you don't know the answer, say you don't know. "
    "Use three sentence maximum and keep the answer concise. "
    "Context: {context}"
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, prompt)
chain = create_retrieval_chain(retriever, question_answer_chain)

# chain.invoke({"input": "<<INPUT>>"})