In [2]:
from langchain_cohere.embeddings import CohereEmbeddings
import os

In [3]:
from langchain_huggingface import HuggingFaceEmbeddings

model_name = "Alibaba-NLP/gte-large-en-v1.5"
model_kwargs = {'device': 'mps', "trust_remote_code": True}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs)

  from tqdm.autonotebook import tqdm, trange


In [None]:
MILVUS_URL = os.environ['MILVUS_URL']
MILVUS_KEY = os.environ['MILVUS_URL']
DIMS = 1024
EMBEDDING_MODEL = "embed-english-v3.0"
COHERE_KEY=os.environ['COHERE_KEY']

In [4]:
# embedding_fn = CohereEmbeddings(model=EMBEDDING_MODEL, cohere_api_key=COHERE_KEY)

In [5]:
e = hf.embed_query("What is flink")

In [6]:
from langchain_community.vectorstores.zilliz import Zilliz

zilliz = Zilliz(
    embedding_function = hf,
    collection_name="Flink",
    connection_args={"uri": MILVUS_URL, "token": MILVUS_KEY},
    auto_id=True
)

In [7]:
retriever = zilliz.as_retriever(search_kwargs={"k": 25})

In [8]:
retriever.invoke("How does elastic scaling works in FLink. What are the various configuration used for scaling.")

[Document(page_content='how to configure the relevant components. The size of those components always has to be between its maximum and minimum value, otherwise Flink startup will fail.', metadata={'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/memory/mem_setup/', 'type': 'document', 'pk': 450143955092274740}),
 Document(page_content='â\x96¾ Monitoring Checkpointing Monitoring Back Pressure Upgrading Applications and Flink Versions Production Readiness Checklist Flink Development â\x96¾ Importing Flink into an IDE Building Flink from Source Internals â\x96¾ Jobs and Scheduling Task Lifecycle File Systems Project Homepage JavaDocs ScalaDocs PyDocs Pick Docs Version â\x96¾ 1.16 (â\x9c\x93) v1.16 v1.15 All Versions ä¸\xadæ\x96\x87ç\x89\x88 Elastic Scaling On This Page Reactive Mode Getting started Usage Limitations Adaptive Scheduler Usage Limitations Adaptive Batch Scheduler Usage Performance tuning Limitations Elastic Scaling # Apache Flink allows you

In [9]:
for doc in retriever.invoke("List down all the commands used in the flink documenatation along with explanation of the command."):
    print(doc.metadata)

{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/resource-providers/standalone/docker/', 'type': 'document', 'pk': 450143955092274102}
{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/resource-providers/standalone/kubernetes/', 'type': 'document', 'pk': 450143955092274178}
{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/cli/', 'type': 'document', 'pk': 450143955092274966}
{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/resource-providers/native_kubernetes/', 'type': 'document', 'pk': 450143955092274216}
{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/resource-providers/standalone/kubernetes/', 'type': 'document', 'pk': 450143955092274124}
{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/resource-providers/standalone/docker/', 'type': 'document', 'pk': 450143955092274088}
{'url

In [10]:
import pathlib
import textwrap

import google.generativeai as genai

from IPython.display import display
from IPython.display import Markdown


def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [11]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
# from langchain_cohere import CohereRerank
# compressor = CohereRerank(top_n=5, cohere_api_key=COHERE_KEY)


In [12]:
from langchain_community.document_compressors.flashrank_rerank import FlashrankRerank
# from flashrank import Ranker
# ranker = Ranker()
compressor = FlashrankRerank(top_n=10)


In [13]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

In [14]:
compression_retriever.invoke("How does elastic scaling works in FLink. What are the various configuration used for scaling")

[Document(page_content='scheduler are also available with it. The Flink community is working on addressing these limitations. No support for the Elastic Scaling . The elastic scaling only supports slot requests without specified-resource at the moment. No support for task manager redundancy . The slotmanager.redundant-taskmanager-num is used to start redundant TaskManagers to speed up job recovery. This config option will not take effect in fine-grained resource management at the moment. No support for evenly spread out slot strategy . This strategy tries to spread out the slots evenly across all available TaskManagers. The strategy is not supported in the first version of fine-grained resource management and cluster.evenly-spread-out-slots will not take effect in it at the moment. Limited integration with Flinkâ\x80\x99s Web UI . Slots in fine-grained resource management can have different resource specs. The web UI only shows the slot number without its details at the moment. Limited

In [15]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.documents import Document

# from langchain_cohere import ChatCohere

In [16]:
# llm = ChatCohere(model="command-r-plus", temperature=0.0, cohere_api_key=COHERE_KEY)

In [17]:
def format_docs(docs: list[Document]):
    
    text = ""

    for doc in docs:
        xml_tag_start = f"<{doc.metadata['url'].lower()}>"
        xml_tag_end = f"</{doc.metadata['url'].lower()}>"
        content = doc.page_content
        text += f"{xml_tag_start}\n{content}\n{xml_tag_end}\n\n"

    return text

In [18]:
GEMINI_KEY = "AIzaSyBVI2jAHepUzLwWoK6qwXCOYxD0NFzZIns"

In [19]:
from langchain_google_genai import ChatGoogleGenerativeAI

In [20]:
from langchain_google_genai import ChatGoogleGenerativeAI

In [45]:
google_llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key=GEMINI_KEY, temperature=0.0)

In [47]:
from langchain_core.prompts import PromptTemplate
example_prompt = PromptTemplate.from_template("""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.Keep the answer concise and to the point. Write down the citation at the end of the answer that you have taken reference from. The citation names are in form of urls, that are provided in the xml tags.
Follow below mention format for citation
Citation:
        (1) Source URL 1
        (2) Source URL 2
Only provide citation if you have used the information from the document.
Question: {question} \nContext: {context} \nAnswer""")

In [48]:
google_rag = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | example_prompt
    | google_llm
    | StrOutputParser()
)

In [49]:
google_rag.get_graph().print_ascii()

              +---------------------------------+           
              | Parallel<context,question>Input |           
              +---------------------------------+           
                    ****                ****                
                 ***                        ***             
               **                              ***          
+----------------------+                          **        
| VectorStoreRetriever |                           *        
+----------------------+                           *        
            *                                      *        
            *                                      *        
            *                                      *        
+---------------------+                     +-------------+ 
| Lambda(format_docs) |                     | Passthrough | 
+---------------------+                     +-------------+ 
                    ****                ****                
                        

In [50]:
response = google_rag.invoke("How does elastic scaling works in FLink")

In [51]:
to_markdown(response)

> Apache Flink offers elastic scaling, allowing you to adjust your job's parallelism dynamically. You can manually rescale by stopping a job, creating a savepoint, and restarting it with a different parallelism. Flink also provides automatic parallelism adjustment options, such as Reactive Mode.  Citation:
>         (1) https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/elastic_scaling/ 


In [52]:
response = google_rag.invoke("How does elastic scaling works for the batch job. WHat configuration are needed for scaling the batch job")

In [53]:
to_markdown(response)

> To enable elastic scaling for batch jobs in Apache Flink, you need to configure the Adaptive Batch Scheduler. This involves setting `jobmanager.scheduler: AdaptiveBatch` and ensuring that the `execution.batch-shuffle-mode` is unset or explicitly set to `ALL_EXCHANGES_BLOCKING`. Additionally, you can fine-tune the scaling behavior using parameters like `jobmanager.adaptive-batch-scheduler.min-parallelism`, `jobmanager.adaptive-batch-scheduler.max-parallelism`, and `jobmanager.adaptive-batch-scheduler.avg-data-volume-per-task`. 
> 
> Citation: 
> (1) https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/elastic_scaling/ 


In [54]:
response = google_rag.invoke("How to configure history server. wHat configurations are needed to setup history server on azure?")

In [55]:
to_markdown(response)

> This document does not contain the answer for how to setup history server on azure. However, it explains how to integrate history server with log archiving and browsing services. Citation:
>         (1) https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/advanced/historyserver/

In [58]:
response = google_rag.invoke("How to configure flink on azure")

In [59]:
to_markdown(response)

> To configure Flink on Azure, you can configure the Azure Blob storage key in the `flink-conf.yaml` file.
> Citation:
>  (1) https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/filesystems/azure/ 
