# Notebook 3: H2OGPTe with RAG
## Favio Vázquez

![](front.png)

## Configuration and setup

In [None]:
from h2ogpte import H2OGPTE
from dotenv import load_dotenv
import os

In [None]:
load_dotenv()

h2ogpte_keys = {
            "address": os.getenv("H2OGPTE_ADDRESS"),
            "api_key": os.getenv("H2OGPTE_KEY"),
        }

client = H2OGPTE(**h2ogpte_keys)
LLM = 'gpt-35-turbo-1106'

## Creating the Collection and Ingesting the document 

In [None]:
client.list_recent_collections(0, 10)

In [None]:
name = "meetup"

collection_id = None
print("Recent collections:")
recent_collections = client.list_recent_collections(0, 1000)
for c in recent_collections:
    if c.name == name and c.document_count:
        collection_id = c.id
        break

# Create Collection
if collection_id is None:
    print(f"Creating collection: {name} ...")
    collection_id = client.create_collection(
        name=name, description="Meetup demo collection"
    )
    print(f"New collection: {collection_id} ...")

In [None]:
collection_id

In [None]:
client.list_documents_in_collection(collection_id, 0, 1000)

In [None]:
filepath = 'blackholes.pdf'

filename = str(filepath).split('/')[-1]
print(f'Ingesting file {filename}')
documents = client.list_documents_in_collection(collection_id, 0, 1000)
document_ids = [d.id for d in documents if d.name == filename]

if len(document_ids) == 0:
    with open(filepath, 'rb') as f:
        doc = client.upload(filename, f)
    client.ingest_uploads(collection_id, [doc])
    print(f'File {filename} ingested')
else:
    print(f'File {filename} already ingested')

## Talking to the model and PDF

In [None]:
## This is going to give an answer without using the PDF
answer = client.answer_question(question="What is the document about?", llm=LLM).content
print(f"{LLM}: {answer}", flush=True)

In [None]:
prompts = {
    'system_prompt': """You are a virtual assistant that helps physicists in interpretations and analysis of report and documents.
Only answer the questions in English and never use another language.
Answer the questions with information provided in the context, do not create any information.""",

    'pre_prompt_query': """Consider the following document and answer the following questions based on the provided document.""",
    'prompt_query': """Answer the following question based on the provided document.""",
}

In [None]:
documents = client.list_documents_in_collection(collection_id, offset=0, limit=99)
doc = documents[0]
doc

## Asking questions to the document itself with a chat session

In [None]:
collection_id

In [None]:
question = "What is the document about?"

chat_session_id = client.create_chat_session(collection_id)
with client.connect(chat_session_id) as session:
    response = session.query(
        question,
        llm=LLM).content
    print(response, flush=True)

client.delete_chat_sessions([chat_session_id])

In [None]:
question = "Can black holes evaporate?"

chat_session_id = client.create_chat_session(collection_id)
with client.connect(chat_session_id) as session:
    response = session.query(
        question,
        llm=LLM).content
    print(response, flush=True)

client.delete_chat_sessions([chat_session_id])

## Summarizing the document

In [None]:
# Creating a summary of the document

message = "Create a summary of the document, including the most important information and conclusions"

summary = client.summarize_document(
    document_id=doc.id,
    max_num_chunks=20,
    llm=LLM,
    system_prompt=prompts['system_prompt'],
    pre_prompt_summary=prompts['pre_prompt_query'],
    prompt_summary=message,
)

print(summary.content)

## Configuring the chat session

In [None]:
llm_args = {
    "do_sample": True,
    "temperature": 0.1,
}

message = "Create a summary of the document, including the most important information and conclusions"

chat_session_id = client.create_chat_session(collection_id)
args = {
    "system_prompt": prompts['system_prompt'],
    "pre_prompt_query": prompts['pre_prompt_query'],
    "prompt_query": prompts['prompt_query'],
    "message": message,
    "timeout": 120,
    "llm": LLM,
    'rag_config':{"rag_type": 'rag+'},
    "llm_args": llm_args
    }

with client.connect(chat_session_id) as session:
    response = session.query(**args)
client.delete_chat_sessions([chat_session_id])

print(response.content)

In [None]:
llm_args = {
    "do_sample": True,
    "temperature": 1,
}

message = "Create a summary of the document, including the most important information and conclusions"

chat_session_id = client.create_chat_session(collection_id)
args = {
    "system_prompt": prompts['system_prompt'],
    "pre_prompt_query": prompts['pre_prompt_query'],
    "prompt_query": prompts['prompt_query'],
    "message": message,
    "timeout": 120,
    "llm": LLM,
    'rag_config':{"rag_type": 'rag+'},
    "llm_args": llm_args
    }

with client.connect(chat_session_id) as session:
    response = session.query(**args)
client.delete_chat_sessions([chat_session_id])

print(response.content)