In [1]:
from langchain_cohere.embeddings import CohereEmbeddings
import os

In [2]:
from langchain_huggingface import HuggingFaceEmbeddings

model_name = "Alibaba-NLP/gte-large-en-v1.5"
model_kwargs = {'device': 'mps', "trust_remote_code": True}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs)

  from tqdm.autonotebook import tqdm, trange


In [22]:
MILVUS_URL = os.environ['MILVUS_URL']
MILVUS_KEY = os.environ['MILVUS_KEY']
DIMS = 1024
EMBEDDING_MODEL = "embed-english-v3.0"
COHERE_KEY=os.environ['COHERE_KEY']

In [4]:
# embedding_fn = CohereEmbeddings(model=EMBEDDING_MODEL, cohere_api_key=COHERE_KEY)

In [5]:
e = hf.embed_query("What is flink")

In [21]:
MILVUS_KEY

'https://in03-a472fda6cef4f90.api.gcp-us-west1.zillizcloud.com'

In [23]:
from langchain_community.vectorstores.zilliz import Zilliz

zilliz = Zilliz(
    embedding_function = hf,
    collection_name="Flink",
    connection_args={"uri": MILVUS_URL, "token": MILVUS_KEY},
    auto_id=True
)

In [24]:
retriever = zilliz.as_retriever(search_kwargs={"k": 25})

In [25]:
retriever.invoke("How does elastic scaling works in FLink. What are the various configuration used for scaling.")

[Document(page_content='how to configure the relevant components. The size of those components always has to be between its maximum and minimum value, otherwise Flink startup will fail.', metadata={'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/memory/mem_setup/', 'type': 'document', 'pk': 450143955092274740}),
 Document(page_content='â\x96¾ Monitoring Checkpointing Monitoring Back Pressure Upgrading Applications and Flink Versions Production Readiness Checklist Flink Development â\x96¾ Importing Flink into an IDE Building Flink from Source Internals â\x96¾ Jobs and Scheduling Task Lifecycle File Systems Project Homepage JavaDocs ScalaDocs PyDocs Pick Docs Version â\x96¾ 1.16 (â\x9c\x93) v1.16 v1.15 All Versions ä¸\xadæ\x96\x87ç\x89\x88 Elastic Scaling On This Page Reactive Mode Getting started Usage Limitations Adaptive Scheduler Usage Limitations Adaptive Batch Scheduler Usage Performance tuning Limitations Elastic Scaling # Apache Flink allows you

In [26]:
for doc in retriever.invoke("List down all the commands used in the flink documenatation along with explanation of the command."):
    print(doc.metadata)

{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/resource-providers/standalone/docker/', 'type': 'document', 'pk': 450143955092274102}
{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/resource-providers/standalone/kubernetes/', 'type': 'document', 'pk': 450143955092274178}
{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/cli/', 'type': 'document', 'pk': 450143955092274966}
{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/resource-providers/native_kubernetes/', 'type': 'document', 'pk': 450143955092274216}
{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/resource-providers/standalone/kubernetes/', 'type': 'document', 'pk': 450143955092274124}
{'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/resource-providers/standalone/docker/', 'type': 'document', 'pk': 450143955092274088}
{'url

In [27]:
import pathlib
import textwrap

import google.generativeai as genai

from IPython.display import display
from IPython.display import Markdown


def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [28]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
# from langchain_cohere import CohereRerank
# compressor = CohereRerank(top_n=5, cohere_api_key=COHERE_KEY)


In [29]:
from langchain_community.document_compressors.flashrank_rerank import FlashrankRerank
# from flashrank import Ranker
# ranker = Ranker()
compressor = FlashrankRerank(top_n=10)


INFO:flashrank.Ranker:Downloading ms-marco-MultiBERT-L-12...
ms-marco-MultiBERT-L-12.zip: 100%|██████████| 98.7M/98.7M [00:20<00:00, 4.98MiB/s]


In [30]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

In [31]:
compression_retriever.invoke("How does elastic scaling works in FLink. What are the various configuration used for scaling")

[Document(page_content='parallelism when re-scaling the program (via a savepoint). Flink’s internal bookkeeping tracks parallel state in the granularity of max-parallelism-many key groups .\nFlink’s design strives to make it efficient to have a very high value for the maximum parallelism, even if\nexecuting the program with a low parallelism. Compression # Flink offers optional compression (default: off) for all checkpoints and savepoints. Currently, compression always uses\nthe snappy compression algorithm (version 1.1.10.x) but we are planning to support\ncustom compression algorithms in the future. Compression works on the granularity of key-groups in keyed state, i.e.', metadata={'url': 'https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/ops/state/large_state_tuning/', 'type': 'document', 'pk': 450143955092273188, 'relevance_score': 0.99941087}),
 Document(page_content='scheduler are also available with it. The Flink community is working on addressing these limitations

In [34]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.documents import Document

# from langchain_cohere import ChatCohere

In [35]:
# llm = ChatCohere(model="command-r-plus", temperature=0.0, cohere_api_key=COHERE_KEY)

In [36]:
def format_docs(docs: list[Document]):
    
    text = ""

    for doc in docs:
        xml_tag_start = f"<{doc.metadata['url'].lower()}>"
        xml_tag_end = f"</{doc.metadata['url'].lower()}>"
        content = doc.page_content
        text += f"{xml_tag_start}\n{content}\n{xml_tag_end}\n\n"

    return text

In [45]:
GEMINI_KEY = os.enviorn["GEMINI_KEY"]

In [46]:
from langchain_google_genai import ChatGoogleGenerativeAI

In [47]:
from langchain_google_genai import ChatGoogleGenerativeAI

In [48]:
google_llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key=GEMINI_KEY, temperature=0.0)

In [49]:
from langchain_core.prompts import PromptTemplate
example_prompt = PromptTemplate.from_template("""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.Keep the answer concise and to the point. Write down the citation at the end of the answer that you have taken reference from. The citation names are in form of urls, that are provided in the xml tags.
Follow below mention format for citation
Citation:
        (1) Source URL 1
        (2) Source URL 2
Only provide citation if you have used the information from the document.
Question: {question} \nContext: {context} \nAnswer""")

In [50]:
google_rag = (
    {"context": compression_retriever | format_docs, "question": RunnablePassthrough()}
    | example_prompt
    | google_llm
    | StrOutputParser()
)

In [51]:
google_rag.get_graph().print_ascii()

                      +---------------------------------+              
                      | Parallel<context,question>Input |              
                      +---------------------------------+              
                           ****                  ****                  
                       ****                          ****              
                     **                                  ****          
+--------------------------------+                           **        
| ContextualCompressionRetriever |                            *        
+--------------------------------+                            *        
                 *                                            *        
                 *                                            *        
                 *                                            *        
      +---------------------+                         +-------------+  
      | Lambda(format_docs) |                         | Passthro

In [54]:
response = google_rag.invoke("How does elastic scaling works in FLink")

In [55]:
to_markdown(response)

> Elastic scaling in Flink can be achieved using the Reactive Mode. In this mode, Flink automatically adjusts the parallelism of a job to utilize all available cluster resources. Adding or removing TaskManagers will scale the job up or down, respectively. This mode relies on checkpoints for job restoration during rescaling. Citation:
>         (1) https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/elastic_scaling/ 


In [56]:
response = google_rag.invoke("How does elastic scaling works for the batch job. WHat configuration are needed for scaling the batch job")

In [57]:
to_markdown(response)

> The provided text discusses Adaptive Batch Scheduler for parallelism tuning in batch jobs, but it doesn't contain information about elastic scaling for batch jobs. Thus, I cannot answer your question. 


In [58]:
response = google_rag.invoke("How to configure history server. wHat configurations are needed to setup history server on azure?")

In [59]:
to_markdown(response)

> To configure the Flink HistoryServer, you need to set the `jobmanager.archive.fs.dir` in `flink-conf.yaml` on the JobManager and `historyserver.archive.fs.dir` on the HistoryServer. The HistoryServer can then monitor the configured directory for archived jobs. Citation:
> (1) https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/advanced/historyserver/ 


In [58]:
response = google_rag.invoke("How to configure flink on azure")

In [59]:
to_markdown(response)

> To configure Flink on Azure, you can configure the Azure Blob storage key in the `flink-conf.yaml` file.
> Citation:
>  (1) https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/deployment/filesystems/azure/ 


In [60]:
import os
from huggingface_hub import login
from huggingface_hub import snapshot_download
os.environ["CURL_CA_BUNDLE"]=""
login(token=os.enviorn["HF_TOKEN"])
# Hugging Face model identifier
model_id = 'meta-llama/Llama-2-7b-chat-hf'  # Replace with your model id


# Download model files from Hugging Face
model_dir = snapshot_download(repo_id=model_id, ignore_patterns="*.bin")
print(f"Model files downloaded to {model_dir}")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /Users/dipak/.cache/huggingface/token
Login successful


Fetching 14 files:  43%|████▎     | 6/14 [00:07<00:10,  1.28s/it]


In [3]:
import os

In [5]:
LANGCHAIN_TRACING_V2="true"
LANGCHAIN_API_KEY=os.environ['LANGSMITH_KEY']

In [2]:
from langchain_community.tools.tavily_search import TavilySearchResults
search = TavilySearchResults()

In [3]:
search.invoke("what is the weather in SF")

[{'url': 'https://www.weatherapi.com/',
  'content': "{'location': {'name': 'San Francisco', 'region': 'California', 'country': 'United States of America', 'lat': 37.78, 'lon': -122.42, 'tz_id': 'America/Los_Angeles', 'localtime_epoch': 1717854186, 'localtime': '2024-06-08 6:43'}, 'current': {'last_updated_epoch': 1717853400, 'last_updated': '2024-06-08 06:30', 'temp_c': 13.3, 'temp_f': 55.9, 'is_day': 1, 'condition': {'text': 'Overcast', 'icon': '//cdn.weatherapi.com/weather/64x64/day/122.png', 'code': 1009}, 'wind_mph': 12.5, 'wind_kph': 20.2, 'wind_degree': 240, 'wind_dir': 'WSW', 'pressure_mb': 1012.0, 'pressure_in': 29.88, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 93, 'cloud': 100, 'feelslike_c': 12.0, 'feelslike_f': 53.7, 'windchill_c': 10.4, 'windchill_f': 50.7, 'heatindex_c': 11.9, 'heatindex_f': 53.5, 'dewpoint_c': 10.0, 'dewpoint_f': 49.9, 'vis_km': 16.0, 'vis_miles': 9.0, 'uv': 4.0, 'gust_mph': 12.6, 'gust_kph': 20.3}}"},
 {'url': 'https://world-weather.info/forecast/u

In [6]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

loader = WebBaseLoader("https://docs.smith.langchain.com/overview")
docs = loader.load()
documents = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200
).split_documents(docs)
vector = FAISS.from_documents(documents, OpenAIEmbeddings())
retriever = vector.as_retriever()

In [7]:
retriever.invoke("how to upload a dataset")[0]

Document(page_content='description="A sample dataset in LangSmith.")client.create_examples(    inputs=[        {"postfix": "to LangSmith"},        {"postfix": "to Evaluations in LangSmith"},    ],    outputs=[        {"output": "Welcome to LangSmith"},        {"output": "Welcome to Evaluations in LangSmith"},    ],    dataset_id=dataset.id,)# Define your evaluatordef exact_match(run, example):    return {"score": run.outputs["output"] == example.outputs["output"]}experiment_results = evaluate(    lambda input: "Welcome " + input[\'postfix\'], # Your AI system goes here    data=dataset_name, # The data to predict and grade over    evaluators=[exact_match], # The evaluators to score the results    experiment_prefix="sample-experiment", # The name of the experiment    metadata={      "version": "1.0.0",      "revision_id": "beta"    },)import { Client, Run, Example } from "langsmith";import { evaluate } from "langsmith/evaluation";import { EvaluationResult } from "langsmith/evaluation";co

In [9]:
from langchain.tools.retriever import create_retriever_tool

In [10]:
retriever_tool = create_retriever_tool(
    retriever,
    "langsmith_search",
    "Search for information about LangSmith. For any questions about LangSmith, you must use this tool!",
)

In [11]:
tools = [search, retriever_tool]

In [12]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)

In [13]:
from langchain import hub

# Get the prompt to use - you can modify this!
prompt = hub.pull("hwchase17/openai-functions-agent")
prompt.messages

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')),
 MessagesPlaceholder(variable_name='chat_history', optional=True),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')),
 MessagesPlaceholder(variable_name='agent_scratchpad')]

In [14]:
from langchain.agents import create_tool_calling_agent

agent = create_tool_calling_agent(llm, tools, prompt)

In [15]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [16]:
agent_executor.invoke({"input": "hi!"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mHello! How can I assist you today?[0m

[1m> Finished chain.[0m


{'input': 'hi!', 'output': 'Hello! How can I assist you today?'}

In [17]:
agent_executor.invoke({"input": "how can langsmith help with testing?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `langsmith_search` with `{'query': 'how can LangSmith help with testing'}`


[0m[33;1m[1;3mGet started with LangSmith | 🦜️🛠️ LangSmith

Skip to main contentLangSmith API DocsSearchGo to AppQuick startTutorialsHow-to guidesConceptsReferencePricingSelf-hostingQuick startOn this pageGet started with LangSmithLangSmith is a platform for building production-grade LLM applications. It allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence. Use of LangChain is not necessary - LangSmith works on its own!1. Install LangSmith​PythonTypeScriptpip install -U langsmithyarn add langchain langsmith2. Create an API key​To create an API key head to the Settings page. Then click Create API Key.3. Set up your environment​Shellexport LANGCHAIN_TRACING_V2=trueexport LANGCHAIN_API_KEY=<your-api-key># The below examples use the OpenAI API, though it's not necessary in generalexport OPE

{'input': 'how can langsmith help with testing?',
 'output': 'LangSmith is a platform that can help with testing by allowing you to closely monitor and evaluate your application. It enables you to build production-grade LLM (Large Language Model) applications, ensuring that you can ship your products quickly and with confidence. Here are some ways LangSmith can assist with testing:\n\n1. **Installation**: You can install LangSmith using Python or TypeScript by running the appropriate commands.\n2. **Create an API Key**: Generate an API key in the Settings page to authenticate and interact with LangSmith.\n3. **Environment Setup**: Set up your environment variables to configure LangSmith for your specific needs.\n4. **Logging Traces**: LangSmith provides multiple methods to log traces, which are essential for monitoring and evaluating your application during testing.\n5. **Evaluation**: You can run evaluations on your test cases using LangSmith, ensuring that your application meets the 

In [18]:
agent_executor.invoke({"input": "whats the weather in sf?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `tavily_search_results_json` with `{'query': 'weather in San Francisco'}`


[0m[36;1m[1;3m[{'url': 'https://www.nws.noaa.gov/wtf/MapClick.php?CityName=San+Francisco&state=CA&site=MTR&textField1=37.775&textField2=-122.418&e=0&lg=ep', 'content': 'San Francisco CA. NWS. Point Forecast: San Francisco CA. 37.77°N 122.41°W. Mobile Weather Information | En Español. Last Update: 1:59 pm PDT May 14, 2024. Forecast Valid: 4pm PDT May 14, 2024-6pm PDT May 21, 2024.'}, {'url': 'https://www.accuweather.com/en/us/san-francisco/94103/weather-forecast/347629', 'content': 'Get the current and future weather conditions for San Francisco, CA, including temperature, precipitation, wind, air quality and more. See the hourly and 10-day outlook, radar maps, alerts and allergy information.'}, {'url': 'https://www.accuweather.com/en/us/san-francisco/94103/weather-forecast/6-347629_1_al', 'content': 'Get the current weather, air quality,

{'input': 'whats the weather in sf?',
 'output': 'The weather in San Francisco is currently around 37.77°N 122.41°W. You can find more detailed and up-to-date information on the [National Weather Service website](https://www.nws.noaa.gov/wtf/MapClick.php?CityName=San+Francisco&state=CA&site=MTR&textField1=37.775&textField2=-122.418&e=0&lg=ep).'}