In [62]:
from langchain_cohere.embeddings import CohereEmbeddings
import os

In [63]:
from langchain_huggingface import HuggingFaceEmbeddings

model_name = "Alibaba-NLP/gte-large-en-v1.5"
model_kwargs = {'device': 'mps', "trust_remote_code": True}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs)

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: Alibaba-NLP/gte-large-en-v1.5


In [None]:
MILVUS_URL = os.environ['MILVUS_URL']
MILVUS_KEY = os.environ['MILVUS_URL']
DIMS = 1024
EMBEDDING_MODEL = "embed-english-v3.0"
COHERE_KEY=os.environ['COHERE_KEY']

In [64]:
# embedding_fn = CohereEmbeddings(model=EMBEDDING_MODEL, cohere_api_key=COHERE_KEY)

In [65]:
e = hf.embed_query("What is flink")

In [66]:
from langchain_community.vectorstores.zilliz import Zilliz

zilliz = Zilliz(
    embedding_function = hf,
    collection_name="FlinkNEW",
    connection_args={"uri": MILVUS_URL, "token": MILVUS_KEY},
    auto_id=True
)

In [67]:
retriever = zilliz.as_retriever(search_kwargs={"k": 25})

In [68]:
for doc in retriever.invoke("List down all the commands used in the flink documenatation along with explanation of the command."):
    print(doc.metadata)

{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/ops/rest_api/', 'type': 'document', 'pk': 450143955092270970}
{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/ops/rest_api/', 'type': 'document', 'pk': 450143955092270992}
{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/ops/rest_api/', 'type': 'document', 'pk': 450143955092270958}
{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/ops/rest_api/', 'type': 'document', 'pk': 450143955092271030}
{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/ops/state/state_backends/', 'type': 'document', 'pk': 450143955092270598}
{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/ops/state/state_backends/#incremental-checkpoints', 'type': 'document', 'pk': 450143955092272290}
{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/ops/state/state_backends/#timers-heap-vs-rocksdb', 'type': 'document'

In [69]:
import pathlib
import textwrap

import google.generativeai as genai

from IPython.display import display
from IPython.display import Markdown


def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [70]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
# from langchain_cohere import CohereRerank
# compressor = CohereRerank(top_n=5, cohere_api_key=COHERE_KEY)


In [71]:
from langchain_community.document_compressors.flashrank_rerank import FlashrankRerank
# from flashrank import Ranker
# ranker = Ranker()
compressor = FlashrankRerank(top_n=5)


In [72]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

In [73]:
compression_retriever.invoke("What is flink")

[Document(page_content='is that Flink might immediately build an incremental checkpoint on top of the restored one. Therefore,', metadata={'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/ops/state/savepoints/', 'type': 'document', 'pk': 450143955092270490, 'relevance_score': 0.99825484}),
 Document(page_content='is that Flink might immediately build an incremental checkpoint on top of the restored one. Therefore,', metadata={'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/ops/state/savepoints/#resuming-from-savepoints', 'type': 'document', 'pk': 450143955092271434, 'relevance_score': 0.99825484}),
 Document(page_content='is that Flink might immediately build an incremental checkpoint on top of the restored one. Therefore,', metadata={'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/ops/state/savepoints/#savepoint-format', 'type': 'document', 'pk': 450143955092271548, 'relevance_score': 0.99825484}),
 Document(page_c

In [74]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")


In [75]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.documents import Document

# from langchain_cohere import ChatCohere

In [76]:
# llm = ChatCohere(model="command-r-plus", temperature=0.0, cohere_api_key=COHERE_KEY)

In [77]:
def format_docs(docs: list[Document]):
    
    text = ""

    for doc in docs:
        xml_tag_start = f"<{doc.metadata['url'].lower()}>"
        xml_tag_end = f"</{doc.metadata['url'].lower()}>"
        content = doc.page_content
        text += f"{xml_tag_start}\n{content}\n{xml_tag_end}\n\n"

    return text

In [78]:
GEMINI_KEY = "AIzaSyBVI2jAHepUzLwWoK6qwXCOYxD0NFzZIns"

In [79]:
from langchain_google_genai import ChatGoogleGenerativeAI

In [80]:
from langchain_google_genai import ChatGoogleGenerativeAI

In [81]:
google_llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key=GEMINI_KEY, temperature=0.0)

In [82]:
from langchain_core.prompts import PromptTemplate
example_prompt = PromptTemplate.from_template("""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.Keep the answer concise and to the point. Write down the citation at the end of the answer that you have taken reference from. The citation names are in form of urls, that are provided in the xml tags.
Follow below mention format for citation
Citation:
        (1) Source URL 1
        (2) Source URL 2
Only provide citation if you have used the information from the document.
Question: {question} \nContext: {context} \nAnswer""")

In [83]:
google_rag = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | example_prompt
    | google_llm
    | StrOutputParser()
)

In [84]:
google_rag.get_graph().print_ascii()

              +---------------------------------+           
              | Parallel<context,question>Input |           
              +---------------------------------+           
                    ****                ****                
                 ***                        ***             
               **                              ***          
+----------------------+                          **        
| VectorStoreRetriever |                           *        
+----------------------+                           *        
            *                                      *        
            *                                      *        
            *                                      *        
+---------------------+                     +-------------+ 
| Lambda(format_docs) |                     | Passthrough | 
+---------------------+                     +-------------+ 
                    ****                ****                
                        

In [88]:
response = google_rag.invoke("List down all the commands used in the flink documenatation along with explanation of the command.")

In [89]:
to_markdown(response)

> This document does not contain the answer to this question. It provides information about the structure of the JSON objects used in the Flink REST API. It does not list or explain any Flink commands. Citation: 
>         (1) https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/ops/rest_api/
>         (2) https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/ops/rest_api/

In [92]:
response = google_rag.invoke("How to handle backpressure situations. WHat options are available.List down each of the configueration")

In [93]:
to_markdown(response)

> To handle backpressure situations in Flink, you have three options:
> 
> 1. **Remove the backpressure source:** Optimize the Flink job, adjust Flink or JVM configurations, or scale up resources.
> 2. **Reduce buffered in-flight data:** Use techniques like buffer debloating to control the amount of data being held in buffers.
> 3. **Enable unaligned checkpoints:** This allows checkpoint barriers to progress faster, even under backpressure, but it's important to understand its trade-offs. 
> 
> Citation:
>         (1) https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/ops/state/checkpointing_under_backpressure/ 
>         (2) https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/ops/state/checkpointing_under_backpressure/#unaligned-checkpoints 
>         (3) https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/ops/state/checkpointing_under_backpressure/#buffer-debloating 


In [50]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])