# Build a RAG Application with LangChain, Part 2



In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

In [None]:
from langchain_openai import AzureChatOpenAI

model = AzureChatOpenAI(
  openai_api_version="2023-05-15",
  azure_deployment= os.getenv("AZURE_OPENAI_MODEL_DEPLOYMENT_NAME")
)

In [None]:
DATASET_NAME = "./prep/output/master.json"

import pandas as pd
transcripts_dataset = pd.read_json(DATASET_NAME)

In [None]:
transcripts_dataset

In [None]:
from langchain_community.document_loaders import DataFrameLoader
loader = DataFrameLoader(transcripts_dataset, page_content_column="text")
transcripts = loader.load()

In [None]:
transcripts

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
text_splitter.split_documents(transcripts)[:5]

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=256, chunk_overlap=20)
documents = text_splitter.split_documents(transcripts)

In [None]:
len(documents)

In [None]:
from langchain_openai.embeddings import AzureOpenAIEmbeddings

embeddings = AzureOpenAIEmbeddings()


In [None]:
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
#from langchain.schema.runnable import RunnablePassthrough


In [None]:
vectorstore2 = DocArrayInMemorySearch.from_documents(documents, embeddings)

In [None]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

from langchain.prompts import ChatPromptTemplate


In [None]:
#from langchain.retrievers.multi_query import MultiQueryRetriever

In [None]:
#retriever_from_llm = MultiQueryRetriever.from_llm(
#    retriever=vectorstore2.as_retriever(), llm=model
#)

In [None]:
retriever = vectorstore2.as_retriever()

In [None]:
unique_docs = retriever.get_relevant_documents(query="What is langchain?")

In [None]:
len(unique_docs)

In [None]:
unique_docs

In [None]:
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Answer:"""
prompt = ChatPromptTemplate.from_template(template)


In [None]:
setup = RunnableParallel(context=retriever, question=RunnablePassthrough())
setup.invoke("What is langchain?")

In [None]:
chain = setup | prompt | model | parser
chain.invoke("What is langchain?")

In [None]:
chain = prompt | model
chain.invoke({
    "question": "What is langchain?",
    "context": unique_docs
})

In [None]:
question = "What is langchain?"
context = retriever.get_relevant_documents(question)
actual_prompt = prompt.invoke({"context": context, "question": question})
model.invoke(actual_prompt)


In [None]:
retriever = vectorstore2.as_retriever()
retriever.invoke("What is langchain?")

In [None]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

setup = RunnableParallel(context=retriever, question=RunnablePassthrough())
setup.invoke("What is LangChain?")

In [None]:
chain = setup | prompt | model | parser
chain.invoke("What is LangChain?")

In [None]:
retriever = vectorstore2.as_retriever()

# get user query from input
while True:
    query = input("Enter a query: ")
    if query == "exit":
        break
    chain = (
        {"context": vectorstore2.as_retriever(), "question": RunnablePassthrough()}
        | prompt
        | model
        | parser
    )
    chain.invoke(query)