# RAG Langchain Web Example - BBC Strictly Wiki

This example loads content from a web page, splits the contents into chunks, loads these into a vector store, then uses a retriever to 
ask natural langauge questions.

This is based on the langchain RAG tutorial [here](https://python.langchain.com/docs/tutorials/rag/)

In [6]:
#!pip install bs4, openai,  pypdf,  python-dotenv,  langchain,  langchain-community,  langchain-openai,  langchain-text-splitters

In [None]:
import os
from dotenv import load_dotenv
load_dotenv() # Load the .env file

import langchain
import chromadb

print("Langchain and ChromaDB modules are successfully installed!")


In [8]:
import textwrap

In [9]:
import os
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o-mini")

In [None]:
import bs4
from langchain import hub
from langchain.vectorstores import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter


In [None]:
# Load content from the specified Wikipedia page
loader = WebBaseLoader(
    web_paths=("https://en.wikipedia.org/wiki/Strictly_Come_Dancing",)
)
docs = loader.load()
docs

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain

In [13]:
test_prompt1 = "What is Strictly Come Dancing?"

In [None]:
response = rag_chain.invoke(test_prompt1)
wrapped_response = textwrap.fill(response, width=120)
print(wrapped_response)