In [1]:
import os
from pymilvus import MilvusClient
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_milvus import Milvus
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain import hub

import warnings
from langchain_ollama import OllamaLLM

In [2]:
path_pdfs = "megapidoc/"

documents = []
for file in os.listdir(path_pdfs):
    if file.endswith(".pdf"):
        pdf_path = os.path.join(path_pdfs, file)
        # print(pdf_path)
        loader = PyPDFLoader(pdf_path)
        documents.extend(loader.load())

In [3]:
## Connect to Milvus
MILVUS_URL = "./rag101.db"

client = MilvusClient(uri=MILVUS_URL)

if client.has_collection("LangChainCollection"):
    print("Collection exists")
else:
    client.drop_collection("LangChainCollection")

Collection exists


In [4]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from tqdm.autonotebook import tqdm, trange


In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)

all_splits = text_splitter.split_documents(documents)

In [6]:
model_kwargs = {"device": "cpu", "trust_remote_code": True}

embeddings = HuggingFaceEmbeddings(model_name="jinaai/jina-embeddings-v2-base-de",  model_kwargs=model_kwargs)

vectorstore = Milvus.from_documents( 
    documents=documents,
    embedding=embeddings,
    connection_args={
        "uri": MILVUS_URL,
    },
    drop_old=False,  
)


In [9]:
# Suppress all warnings from the langsmith.client module
warnings.filterwarnings("ignore", module="langsmith.client")

def run_query(query: str) -> str:
    llm = OllamaLLM(
        model="llama3.2",
        callbacks=[StreamingStdOutCallbackHandler()],
        stop=["<|eot_id|>"],
    )

    prompt = hub.pull("rlm/rag-prompt")

    qa_chain = RetrievalQA.from_chain_type(
        llm, retriever=vectorstore.as_retriever(), chain_type_kwargs={"prompt": prompt}
    )

    result = qa_chain.invoke({"query": query})
    return result

In [10]:
query = "What is the name of the function that lists all songs from a specific album in the music_library table?"
response = run_query(query)

The function that lists all songs from a specific album in the music_library table is `list_all_songs_from_album`.

In [11]:
response

{'query': 'What is the name of the function that lists all songs from a specific album in the music_library table?',
 'result': 'The function that lists all songs from a specific album in the music_library table is `list_all_songs_from_album`.'}

In [13]:
response['result']

'The function that lists all songs from a specific album in the music_library table is `list_all_songs_from_album`.'