In [None]:
# Install required packages
!pip install langchain-community langchain-openai chromadb

In [None]:
# Importing necessary libraries for RAG application
import os  # OS interactions and environment variables

# Document loading and vector store components
from langchain_community.document_loaders import WebBaseLoader  # Load documents from web
from langchain_community.vectorstores import Chroma  # Vector storage and retrieval
from langchain_text_splitters import RecursiveCharacterTextSplitter  # Splitting documents into chunks

# Embedding and language model components
from langchain_openai import OpenAIEmbeddings  # Generate vector embeddings
from langchain_openai import ChatOpenAI  # OpenAI language model

# LangChain core components for RAG pipeline
from langchain_core.prompts import ChatPromptTemplate  # Create prompt templates
from langchain_core.output_parsers import StrOutputParser  # Parse model outputs
from langchain_core.runnables import RunnablePassthrough  # Create runnable sequences

In [None]:
# Set OpenAI API Key (replace with your actual key)
os.environ["OPENAI_API_KEY"] = "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"

In [None]:
# Load documents from a webpage
loader = WebBaseLoader("https://docs.smith.langchain.com")
docs = loader.load()

In [None]:
# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [None]:
# Create vector store
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

In [None]:
# Create retriever
retriever = vectorstore.as_retriever()

In [None]:
# Define RAG prompt
prompt = ChatPromptTemplate.from_template("""
Answer the question based only on the following context:
{context}

Question: {question}
""")

In [None]:
# Initialize language model
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [None]:
# Create RAG chain
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
# Example usage
def ask_question(question):
    return rag_chain.invoke(question)

In [None]:

# Demonstrate the RAG application
print("Question: What is LangSmith?")
response = ask_question("What is LangSmith?")
print("\nResponse:", response)

In [None]:
# Add source retrieval (new lines)
print("\nSources:")
sources = retriever.invoke("What is LangSmith?")
for i, source in enumerate(sources, 1):
    print(f"\nSource {i}:")
    print("Content:", source.page_content[:500] + "..." if len(source.page_content) > 500 else source.page_content)
    print("Source URL:", source.metadata.get('source', 'Unknown source'))