# API Key from LangChain

In [1]:
import os
os.environ["LANGCHAIN_TRACING_V2"] = "true"

In [2]:
import getpass
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()

# Using Ollama Mistral Language Model

In [9]:
from langchain_community.llms import Ollama
# temperature set so that result is deterministic
llm = Ollama(model="mistral", temperature=0)

In [11]:
# Importing dataset which is a blgo post about langgraph

from langchain_community.document_loaders import WebBaseLoader 

loader = WebBaseLoader(
    web_path=("https://blog.langchain.dev/langgraph/"),
)
docs= loader.load()

# Split dataset

In [12]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Splitting parameters are 1000 characters per chunk with an overlap (if chunk bleeds over) of 200 characters
text_splitter= RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200,
    add_start_index = True,
)

In [13]:
all_splits = text_splitter.split_documents(docs)

In [14]:
# Number of chunks from our split
len(all_splits)

21

# Storing chunks/data in database as embeddings

In [15]:
from langchain_community.embeddings import OllamaEmbeddings

# nomic-embed-text model from Ollama
embedding = OllamaEmbeddings(
    model="nomic-embed-text",
)

In [16]:
from langchain_community.vectorstores import Chroma

# Stores embeddings within a vector class
vectorstore = Chroma.from_documents(
    documents = all_splits,
    embedding = embedding,
)

In [17]:
# Retrieval interface from langchain
retriever = vectorstore.as_retriever(
    search_type = "similarity",
    search_kwargs = {"k":6}
)

# Running RAG, creating input to output chain

In [18]:
# Using langchain's RAG prompt
# This particular prompt requires the question, as well as given context as input parameters, not entirely sure how to enter the context input within the .invoke function though
from langchain import hub 

prompt= hub.pull("rlm/rag-prompt")

In [19]:
# Formatting docs from retrieval
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [20]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt 
    | llm 
    | StrOutputParser()
)

In [24]:
rag_chain.invoke("What is StateGraph?")

" StateGraph is a tool that allows you to define and compile a graph of nodes, edges, and states, similar to an AgentExecutor in LangChain. It's designed for more controlled human-in-the-loop workflows and multi-agent workflows. The state of the graph includes concepts like input, chat_history, intermediate_steps, and agent_outcome. You can add nodes and edges to create the graph, with special cases like a starting edge and conditional edges."

# UI with Gradio

In [27]:
import gradio as gr
import time
import textwrap

In [30]:
# Function to track time taken for rag_chain to process user request
def process_question(user_question):
    start_time = time.time()

    # Using user_question into RAG as input
    response = rag_chain.invoke(user_question)

    # Measure the response time
    end_time = time.time()
    response_time = f"Response time: {end_time - start_time:.2f} seconds."

    # Final output of model
    full_response = f"{response}\n\n{response_time}"

    return full_response

# Setup the Gradio interface
iface = gr.Interface(fn=process_question,
                     inputs=gr.Textbox(lines=2, placeholder="Type your question here..."),
                     outputs=gr.Textbox(),
                     title="Basic RAG Chatbot",
                     description="This RAG references LangGraph, documentation here: https://blog.langchain.dev/langgraph/")

iface.launch(share=True)

Running on local URL:  http://127.0.0.1:7862
Running on public URL: https://dd4178c427174ec9e9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


