# RAG Langchain Web Example - BBC Strictly Wiki

This example loads content from a web page, splits the contents into chunks, loads these into a vector store, then uses a retriever to 
ask natural langauge questions.

This is based on the langchain RAG tutorial [here](https://python.langchain.com/docs/tutorials/rag/)

In [1]:
import langchain
import chromadb

print("Langchain and ChromaDB modules are successfully installed!")


Langchain and ChromaDB modules are successfully installed!


In [2]:
import textwrap

In [3]:
import os

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

In [4]:
import bs4
from langchain import hub
from langchain.vectorstores import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter


USER_AGENT environment variable not set, consider setting it to identify your requests.


In [5]:
# Load content from the specified Wikipedia page
loader = WebBaseLoader(
    web_paths=("https://en.wikipedia.org/wiki/Strictly_Come_Dancing",)
)
docs = loader.load()
docs

[Document(metadata={'source': 'https://en.wikipedia.org/wiki/Strictly_Come_Dancing', 'title': 'Strictly Come Dancing - Wikipedia', 'language': 'en'}, page_content='\n\n\n\nStrictly Come Dancing - Wikipedia\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nJump to content\n\n\n\n\n\n\n\nMain menu\n\n\n\n\n\nMain menu\nmove to sidebar\nhide\n\n\n\n\t\tNavigation\n\t\n\n\nMain pageContentsCurrent eventsRandom articleAbout WikipediaContact us\n\n\n\n\n\n\t\tContribute\n\t\n\n\nHelpLearn to editCommunity portalRecent changesUpload file\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDonate\n\n\n\n\n\n\n\n\nAppearance\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nCreate account\n\nLog in\n\n\n\n\n\n\n\n\nPersonal tools\n\n\n\n\n\n Create account Log in\n\n\n\n\n\n\t\tPages for logged out editors learn more\n\n\n\nContributionsTalk\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nContents\nmove to sidebar\nhide\n\n\n\n\n

In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain

  prompt = loads(json.dumps(prompt_object.manifest))


{
  context: VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x0000022894B25BE0>, search_kwargs={})
           | RunnableLambda(format_docs),
  question: RunnablePassthrough()
}
| ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])
| ChatOpenAI(client=<op

In [8]:
test_prompt1 = "What is Strictly Come Dancing?"

In [9]:
response = rag_chain.invoke(test_prompt1)
wrapped_response = textwrap.fill(response, width=120)
print(wrapped_response)

Strictly Come Dancing is a British dance contest show where celebrities partner with professional dancers to compete
primarily in ballroom and Latin dance. The couples are judged by a panel, and the show has been highly successful,
leading to adaptations in 60 other countries as Dancing with the Stars. It is currently presented by Tess Daly and
Claudia Winkleman.
