# RAG with OpenAI

## Load OpenAI Vector Store

In [None]:
from openai import OpenAI
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")
client = OpenAI()
vector_store = client.vector_stores.create(name="VECTOR STORE NAME HERE")

# Ready the files for upload to OpenAI
file_paths = ["Example_data/Prompt_Engineering.pdf"] # example file path, replace with your own files
file_streams = [open(path, "rb") for path in file_paths]

# Use the upload and poll SDK helper to upload the files, add them to the vector store,
# and poll the status of the file batch for completion.
file_batch = client.vector_stores.file_batches.upload_and_poll(
  vector_store_id=vector_store.id, files=file_streams
)

# You can print the status and the file counts of the batch to see the result of this operation.
print(file_batch.status)
print(file_batch.file_counts)

## Ask questions!

In [None]:
response = client.responses.create(
    model="gpt-4o",
    input="What are the 3 best ways to prompt a LLM?",
    tools=[{"type": "file_search", "vector_store_ids": [vector_store.id]}],
)
print(response.output_text)

# RAG with Langchain and Custom Vector Store

In [None]:
from langchain_openai import OpenAIEmbeddings
from langchain.chat_models import init_chat_model
import getpass
import os
import dotenv
# Load environment variables from .env file
dotenv.load_dotenv()

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
llm = init_chat_model("gpt-4o-mini", model_provider="openai")

# we assume you have a vector store set up
# Replace YOUR_VECTOR_STORE with your actual vector store instance or allow openai to create one in the next chapter
vector_store = YOUR_VECTOR_STORE

# Retrieval and Generation

In [None]:
from langchain_core.prompts import PromptTemplate
from langchain_core.documents import Document
from typing_extensions import List, TypedDict, Tuple
from langgraph.graph import START, StateGraph
from langchain_core.messages import AIMessage, HumanMessage

## Prompt - custom
template = """Use the following pieces of context to answer the question given by the USER. 
Be concise and accurate in your response. Admit if you do not know the answer.

{context}

{chat_history}

Question: {question}

Helpful Answer:"""
prompt = PromptTemplate.from_template(template)

example_messages = prompt.invoke(
    {"context": "(context goes here)", "question": "(question goes here)", "chat_history": "(messages go here)"}
).to_messages()

## State and Nodes
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str
    chat_history: List[Tuple[str, str]]

def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    # Convert the chat_history tuples to BaseMessage objects
    history_messages = []
    for human, ai in state["chat_history"]:
        history_messages.append(HumanMessage(content=human))
        history_messages.append(AIMessage(content=ai))
    messages = prompt.invoke({"question": state["question"], "context": docs_content, "chat_history": history_messages})
    response = llm.invoke(messages)
    return {"answer": response.content}

## Compile the graph
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()


### Usage

In [18]:
from langgraph.graph.state import CompiledStateGraph
class ChatBotWithMemory():
    def __init__(self, graph: CompiledStateGraph, chat_history: List[Tuple[str, str]]=[]):
        self.graph = graph
        self.chat_history = chat_history
    
    def ask_question(self, question: str):
        state = self.graph.invoke({"question": question, "chat_history": self.chat_history})
        self.chat_history.append((question, state["answer"]))
        return state["answer"]

In [None]:
chatbot = ChatBotWithMemory(graph)
question = "How tall is the eiffel tower?"
print(chatbot.ask_question(question))
question = "What is the capital of France?"
print(chatbot.ask_question(question))
question = "Name a city there."
print(chatbot.ask_question(question))
question = "Name another city."
print(chatbot.ask_question(question))