In [48]:
!pip install -q langchain_nvidia_ai_endpoints langchain-community langchain-text-splitters faiss-cpu

In [87]:
from google.colab import userdata
import os
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import chain

In [88]:
os.environ['NVIDIA_API_KEY'] = userdata.get('NVIDIA_API_KEY')

In [111]:
loader = WebBaseLoader("https://docs.smith.langchain.com/")
docs = loader.load()

In [112]:
docs

[Document(metadata={'source': 'https://docs.smith.langchain.com/', 'title': 'Get started with LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content="\n\n\n\n\nGet started with LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith\n\n\n\n\n\n\n\n\nSkip to main contentOur Building Ambient Agents with LangGraph course is now available on LangChain Academy!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformationsRegions FAQsdk_referenceGet StartedOn this pageGet started with LangSmith\nLangSmith is a platform for building production-grade LLM applications.\nIt allows you to closely monitor and evaluate your application, so you can ship quickly and w

In [113]:
embeddings = NVIDIAEmbeddings()

In [114]:
embeddings

NVIDIAEmbeddings(base_url='https://integrate.api.nvidia.com/v1', model='nvidia/nv-embedqa-e5-v5', truncate='NONE', dimensions=None, max_batch_size=50)

In [93]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
documents = text_splitter.split_documents(docs)
vector = FAISS.from_documents(documents, embeddings)
retriever = vector.as_retriever()

In [94]:
retriever

VectorStoreRetriever(tags=['FAISS', 'NVIDIAEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7bcc0a0a5b10>, search_kwargs={})

In [95]:
model = ChatNVIDIA(model="openai/gpt-oss-20b")



In [96]:
model

ChatNVIDIA(base_url='https://integrate.api.nvidia.com/v1', model='openai/gpt-oss-20b')

In [97]:
model.invoke("tell me more about you?")

AIMessage(content='', additional_kwargs={}, response_metadata={'role': 'assistant', 'reasoning_content': 'The user says: "tell me more about you?" They want more information about the assistant. So I need to respond with a friendly explanation: I\'m ChatGPT, a large language model trained by OpenAI. I can answer questions, provide information, etc. I can mention my training data, my capabilities, my limitations, my purpose, etc. I should also be mindful of the policies. There\'s no policy violation. Just provide a friendly, helpful answer. I should keep it concise but thorough. I can mention that I\'m not a human, I don\'t have consciousness, I\'m a tool, I was trained on a mixture of licensed data, data created by human trainers, and publicly available data. I can answer a wide range of topics. I can mention that I don\'t have personal experiences, I don\'t have feelings, I can\'t access the internet, etc. I can mention that I am a language model. I can mention that I am not perfect, 

In [98]:
hyde_template = """Even if you do not know the full answer, generate a one-paragraph hypothetical answer to the below question:

{question}"""

In [99]:
hyde_prompt = ChatPromptTemplate.from_template(hyde_template)

In [100]:
hyde_query_transformer = hyde_prompt | model | StrOutputParser()

In [101]:
@chain
def hyde_retriever(question):
    hypothetical_document = hyde_query_transformer.invoke({"question": question})
    return retriever.invoke(hypothetical_document)

In [102]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

In [103]:
prompt = ChatPromptTemplate.from_template(template)

In [104]:
answer_chain = prompt | model | StrOutputParser()

In [105]:
@chain
def final_chain(question):
    documents = hyde_retriever.invoke(question)

    # FIXED: Check if any document has non-empty page_content
    if not documents or not any(getattr(doc, "page_content", "").strip() for doc in documents):
        raise ValueError("No valid documents retrieved for the question.")

    for s in answer_chain.stream({"question": question, "context": documents}):
        yield s

In [106]:
print("Retrieved documents:", documents)

Retrieved documents: [Document(metadata={'source': 'https://docs.smith.langchain.com/', 'title': 'Get started with LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content='Get started with LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith'), Document(metadata={'source': 'https://docs.smith.langchain.com/', 'title': 'Get started with LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content='Skip to main contentOur Building Ambient Agents with LangGraph course is now available on LangChain Academy!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDa

In [107]:
for s in final_chain.stream("how can langsmith help with testing"):
    print(s, end="")

LangSmith helps you test LLM applications in several ways:

* **Evals** – The LangSmith SDK and UI let you build evaluation datasets and metrics, run those tests against your production traffic, score the results, and capture human feedback on the data.  
* **Observability** – Tracing, dashboards, and alerts give you real‑time visibility into key metrics (RPS, error rates, cost) so you can spot regressions or failures as soon as they happen.  
* **Prompt engineering** – Automatic version control and collaboration features let you iterate on prompts and see how changes affect test outcomes.  

Together, these tools give you a complete testing workflow that keeps your model’s quality high and your deployment confidence strong.

In [108]:
import gradio as gr

In [109]:
def ask_langsmith(question):
    return "".join(final_chain.stream(question))

with gr.Blocks() as demo:
    gr.Markdown("## 🧠 GPT-OSS RAG")

    with gr.Row():
        inp = gr.Textbox(placeholder="Ask your question here...", label="Your Question")
    out = gr.Textbox(label="Answer", lines=10)

    btn = gr.Button("Submit")
    btn.click(fn=ask_langsmith, inputs=inp, outputs=out)

In [110]:
demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f797a586a559f805ff.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


