In [None]:
# |default_exp langchain_rag

Please reference [this blog post](https://nbdev.fast.ai/blog/posts/2022-11-07-spaces) on how to use this notebook.

## Install dependencies

## Make an app with Gradio

In [None]:
# |export
import gradio as gr
from dotenv import load_dotenv
import os
import time
import getpass

from openai import api_key


In [None]:
#| export
from llama_index.llms.openrouter import OpenRouter
from llama_index.core.llms import ChatMessage


In [None]:
# |export
load_dotenv()
#os.environ['HTTP_PROXY'] = ''
#os.environ['HTTPS_PROXY'] = ''
#os.environ['NO_PROXY'] = 'localhost, 127.0.0.1'
print(os.environ.get('OPENROUTER_API_KEY'))
print(os.environ.get('OPENROUTER_API_URL'))
print(os.environ.get('PINECONE_API_KEY'))
print(os.environ.get('HTTP_PROXY'))
print(os.environ.get('HTTPS_PROXY'))

In [None]:
from pinecone import Pinecone, ServerlessSpec, Index
import os
import pinecone

pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))

In [None]:
dataset_name = 'quickstart'
if dataset_name not in pc.list_indexes().names():
    pc.create_index(
        dataset_name,
        dimension=1536,
        metric="euclidean",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1",
        )
    ),
pinecone_index = pc.Index(dataset_name)

In [None]:
from llama_index.vector_stores.pinecone import PineconeVectorStore
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)

In [None]:

# Embed data
data = [
    {"id": "vec1", "text": "Apple is a popular fruit known for its sweetness and crisp texture."},
    {"id": "vec2", "text": "The tech company Apple is known for its innovative products like the iPhone."},
    {"id": "vec3", "text": "Many people enjoy eating apples as a healthy snack."},
    {"id": "vec4", "text": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces."},
    {"id": "vec5", "text": "An apple a day keeps the doctor away, as the saying goes."},
]

embeddings = pc.inference.embed(
    model="llama-text-embed-v2",
    inputs=[d['text'] for d in data],
    parameters={
        "input_type": "passage"
    }
)

vectors = []
for d, e in zip(data, embeddings):
    vectors.append({
        "id": d['id'],
        "values": e['values'],
        "metadata": {'text': d['text']}
    })

In [None]:
index.upsert(
    vectors=vectors,
    namespace="ns1"
)

In [None]:
index.describe_index_stats()

In [None]:
llm = OpenRouter(
    api_key=os.environ.get('OPENROUTER_API_KEY'),
    max_tokens=256,
    context_window=4096,
    model="qwen/qwen2.5-vl-32b-instruct:free"
    # model="deepseek/deepseek-r1:free"
    # model="gryphe/mythomax-l2-13b:free"
)

In [None]:
from openai import OpenAI
from os import getenv

client = OpenAI(
    base_url=os.environ.get('OPENROUTER_API_URL'),
    api_key=os.environ.get('OPENROUTER_API_KEY'),
)

completion = client.chat.completions.create(
    # model="bytedance-research/ui-tars-72b:free",
    model="google/gemini-2.5-pro-exp-03-25:free",
    extra_headers={
        "HTTP-Referer": "binjian.github.io",
        "X-Title": "My Test",
    },
    messages=[
        {
            "role": "user",
            "content": "给我讲个川普和普京的笑话吧."
        },
    ]
)
print(completion.choices[0].message.content)

In [None]:
llm = OpenRouter(
    api_key=os.environ.get('OPENROUTER_API_KEY'),
    max_tokens=256,
    context_window=4096,
    model="google/gemma-3-12b-it:free"
    # model="google/gemini-2.5-pro-exp-03-25:free"
    # model="deepseek/deepseek-r1:free"
    # model="gryphe/mythomax-l2-13b:free"
)
message = ChatMessage(role="user", content="Tell me a joke.")
resp = llm.chat([message])
print(resp)

In [None]:
message = ChatMessage(role="user", content="请讲述一个250字的科幻小说故事")
resp = llm.stream_chat([message])
for r in resp:
    print(r.delta, end='', flush=True)


In [None]:
resp = llm.complete("Tell me a joke in Feynman style.")
print(resp)

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from os import getenv
from dotenv import load_dotenv
load_dotenv()

template = """Question: {question}
Answer: Let's think step by step."""
prompt = PromptTemplate(template=template, input_variables=["question"])
llm = ChatOpenAI(
  openai_api_key=getenv("OPENROUTER_API_KEY"),
  openai_api_base=getenv("OPENROUTER_BASE_URL"),
  model_name="<model_name>",
  model_kwargs={
    "headers": {
      "HTTP-Referer": getenv("YOUR_SITE_URL"),
      "X-Title": getenv("YOUR_SITE_NAME"),
    }
  },
)
llm_chain = LLMChain(prompt=prompt, llm=llm)
question = "What NFL team won the Super Bowl in the year Justin Beiber was born?"
print(llm_chain.run(question))

In [None]:
message = ChatMessage(role="user", content="给我讲个少林寺笑话吧.")
resp = llm.chat(messages=[message])
print(resp)

In [None]:
#| export
if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

from langchain.chat_models import init_chat_model

llm = init_chat_model("gpt-4o-mini", model_provider="openai")

In [None]:
if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [None]:
#| export
from langchain_core.vectorstores import InMemoryVectorStore
vector_store = InMemoryVectorStore(embeddings)


In [None]:
#| export
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
import validators

In [None]:
#| export
# Load and chunk contents of the blog
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)

# Index chunks
_ = vector_store.add_documents(documents=all_splits)

# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")

In [None]:
#| export
# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [None]:
#| export
def answer(message, history, system_prompt, tokens):
    files = []
    file_names = []
    for msg in history:
        if msg["role"] == "user" and isinstance(msg["content"], tuple):
            files.append(msg["content"][0])
            file_names.append(msg["content"][0].split("/")[-1])
    for file in message["files"]:
        files.append(file)
        file_names.append(file.split("/")[-1])

    #if message["text"]:
    #    content = message["text"]
    #else:
    #    content = system_prompt
    # content = message
    # question = system_prompt
    # response = f"Content: {content}\nQuestion: {question}\n"
    # len = min(len(response),int(response_len))

    user_input = f"Question: {system_prompt}\n Website: {message['text']}\n File:\n{'\n'.join(file_names)}"

    if validators.url(message['text']):
        loader = WebBaseLoader(
            # web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
            web_paths=(message['text'],),
            bs_kwargs=dict(
                parse_only=bs4.SoupStrainer(
                    class_=("post-content", "post-title", "post-header")
                )
            ),
        )
        docs = loader.load()
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        all_splits = text_splitter.split_documents(docs)
        # Index chunks
        _ = vector_store.add_documents(documents=all_splits)

        # # # Compile application and test
        # graph_builder_i = StateGraph(State).add_sequence([retrieve, generate])
        # graph_builder_i.add_edge(START, "retrieve")
        # graph_i = graph_builder_i.compile()
        reply = graph.invoke({"question": system_prompt})
        response_i = reply["answer"]
    elif files:
        f = files[-1]
        f_name = file_names[-1]多久维护一次产品?
        response_i = f"File: {f_name}\n"

    # response_i = user_input
    for i in range(min(len(response_i), int(tokens))):
        time.sleep(0.05)
        yield response_i[: i + 1]

In [None]:
# |export
demo = gr.ChatInterface(
    answer,
    type="messages",
    title="智能问答RAG",
    description="输入一个网址，查询或询问其中的内容。",
    textbox=gr.MultimodalTextbox(value="https://lilianweng.github.io/posts/2023-06-23-agent/",
                                 file_count="multiple",
                                 file_types=["image", ".pdf", ".txt"],
                                 sources=["upload", "microphone"]),
    additional_inputs=[
        gr.Textbox("What is Task Decomposition?", label="你的问题在此输入！"),
        gr.Slider(10,400,value=300,label="回答长度")
    ],
    multimodal=True,
)
demo.launch(share=False)

In [None]:
# this is only necessary in a notebook
demo.close()

## Create a `requirements.txt` file

In [None]:
%%writefile ../requirements.txt
fastcore

## Convert this notebook into a Gradio app

In [None]:
# from nbdev.export import nb_export
# nb_export('01_gradio.ipynb', lib_path='.', name='gradio')

In [None]:
# | hide
import nbdev

nbdev.nbdev_export()

<div>
<link rel="stylesheet" href="https://gradio.s3-us-west-2.amazonaws.com/2.6.5/static/bundle.css">
<div id="target"></div>
<script src="https://gradio.s3-us-west-2.amazonaws.com/2.6.5/static/bundle.js"></script>
<script>
launchGradioFromSpaces("abidlabs/question-answering", "#target")
</script>
</div>