In [29]:
## RAG Application With GPT-4o

In [1]:
from dotenv import load_dotenv

load_dotenv()
import os

os.environ["AZURE_OPENAI_ENDPOINT"] = os.getenv("AZURE_OPENAI_ENDPOINT")
os.environ["AZURE_OPENAI_API_KEY"] = os.getenv("AZURE_OPENAI_API_KEY")
os.environ["OPENAI_API_VERSION"]="2023-03-15-preview"


# 1. Get a Data Loader


In [2]:
from langchain_community.document_loaders import WebBaseLoader


USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
loader = WebBaseLoader("https://docs.smith.langchain.com/user_guide")
data = loader.load()
data

[Document(page_content="\n\n\n\n\nLangSmith User Guide | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith\n\n\n\n\n\n\n\nSkip to main contentGo to API DocsSearchGo to AppQuick StartUser GuideTracingEvaluationProduction Monitoring & AutomationsPrompt HubProxyPricingSelf-HostingCookbookThis is outdated documentation for \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith, which is no longer actively maintained.For up-to-date documentation, see the latest version.User GuideOn this pageLangSmith User GuideLangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we‚Äôll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they‚Äôre just starting their journey.Prototyping‚ÄãPrototyping LLM applications often involves quick experimentation between prompts, model types, retrieval strategy and other p

In [None]:
from langchain_community.document_loaders import UnstructuredPDFLoader

file_path = (
    "/Users/hcaap/Desktop/Kod/Siemens/Trying_rag/pdfs/bilaga-A.pdf"
)

loader = UnstructuredPDFLoader(file_path)
data_1 = loader.load()

# 2. Convert data to Vector Database


In [4]:
from langchain_chroma import Chroma ##vector Database
from langchain_openai import AzureChatOpenAI




In [26]:
llm = AzureChatOpenAI(
    deployment_name="gpt-4-turbo-1106-preview",
)

print(llm)

client=<openai.resources.chat.completions.Completions object at 0x000001F01DFDCA60> async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000001F04951ABC0> openai_api_key=SecretStr('**********') openai_proxy='' azure_endpoint='https://openai-resource-france.openai.azure.com/' deployment_name='gpt-4-turbo-1106-preview' openai_api_version='2023-03-15-preview' openai_api_type='azure'


In [27]:
llm.invoke("tell me about boby")

AIMessage(content='It seems like you may be referring to a name, "Boby," which could refer to a person, pet, or character. However, without more context, it\'s challenging to provide detailed information.\n\nIf "Boby" is a person\'s name or nickname, it could be short for Robert or Roberto, or it could be an independent given name. In any case, I would need more details to tell you anything specific about an individual named Boby.\n\nIf "Boby" refers to a pet, it\'s not uncommon for pet owners to give their animals unique or affectionate names, but without further context, I can\'t provide any information.\n\nIf "Boby" is a character from a book, movie, game, or other forms of media, I would need additional details to identify the character and provide relevant information.\n\nPlease provide more context or details about "Boby" so I can assist you appropriately.', response_metadata={'token_usage': {'completion_tokens': 186, 'prompt_tokens': 12, 'total_tokens': 198}, 'model_name': 'gpt-

## Text splitter

In [8]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(data)

In [23]:
import pandas as pd
df = pd.DataFrame(documents)
df.head()


## Embedding function

In [28]:
from langchain_openai import AzureOpenAIEmbeddings

embedding = AzureOpenAIEmbeddings(
    model="text-embedding-ada-002",
)

In [30]:
vector_store = Chroma.from_documents(documents, embedding)
vector_store

data=vector_store.get()
data

# 3. Make a RAG pipeline


In [13]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import RetrievalQA
from langchain import hub

In [15]:
prompt = hub.pull("rlm/rag-prompt")
prompt

ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

In [32]:
qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=vector_store.as_retriever(),
        chain_type_kwargs={"prompt": prompt}
    )

In [33]:
question = "Explain what is langsmith"
result = qa_chain({"query": question })
result

{'query': 'Explain what is langsmith',
 'result': 'LangSmith is a platform designed for the development, monitoring, and testing of large language model (LLM) applications. It supports a variety of workflows throughout the application development lifecycle, including prototyping, debugging, and running tests. Features of LangSmith include a playground for rapid experimentation, comparison views for evaluating changes, and the ability to create and run test datasets on LLM applications.'}

In [19]:
result["result"]

'LangSmith is a platform for LLM application development, monitoring, and testing. It allows for rapid iteration and experimentation, beta testing, capturing feedback, annotating traces, and monitoring and A/B testing. Additionally, it provides automations for processing traces at production scale.'

In [20]:
import pprint
pp = pprint.PrettyPrinter(indent=5)
pp.pprint(result["result"])

('LangSmith is a platform for LLM application development, monitoring, and '
 'testing. It allows for rapid iteration and experimentation, beta testing, '
 'capturing feedback, annotating traces, and monitoring and A/B testing. '
 'Additionally, it provides automations for processing traces at production '
 'scale.')


In [21]:
question = "Explain Monitoring and A/B Testing in langsmith"
result = qa_chain({"query": question })
result

{'query': 'Explain Monitoring and A/B Testing in langsmith',
 'result': 'Monitoring in LangSmith allows users to track key metrics over time and drill down into specific data points for debugging production issues. A/B Testing in LangSmith involves marking different versions of applications with different identifiers and viewing their performance side-by-side within each chart. Automations in LangSmith enable users to perform actions on traces in near real-time, such as automatically scoring traces or sending them to datasets.'}