# RAG Building Blocks

## Useful links

- https://platform.openai.com/settings/proj_QpRaSNGL97yQMNE2Ba1ToHN4/api-keys
- https://python.langchain.com/docs/integrations/document_loaders/
- https://python.langchain.com/docs/how_to/#text-splitters
- https://platform.openai.com/docs/guides/embeddings/embedding-models#embedding-models
- https://python.langchain.com/docs/integrations/vectorstores/chroma/
- https://smith.langchain.com/hub
- https://www.promptingguide.ai/
- https://github.com/promptslab/Awesome-Prompt-Engineering
- https://python.langchain.com/docs/integrations/chat/openai/#model-features
- https://platform.openai.com/docs/quickstart?api-mode=chat

In [None]:
pip install gradio langchain_community pypdf beautifulsoup4 langchain-text-splitters langchain_openai langchain-chroma



In [None]:
import os
import re
import gradio
from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_openai import ChatOpenAI



In [None]:
from google.colab import userdata

In [None]:
import chromadb.api

In [None]:
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

In [None]:
def predict_old(message, history):
  # Phase 1

  # Step 1 - Document loader
  file_path = "/content/solana-whitepaper-en.pdf"
  loader = PyPDFLoader(file_path, mode='single')

  docs = loader.load()

  # Step 1.5 - Document cleaning
  for doc in docs:
    # doc.page_content = doc.page_content.replace('\n', ' ')
    doc.page_content = re.sub('\n', '', doc.page_content)

  # print(docs)

  # Step 2- Document Transform / Splitting
  chunk_size = 600
  chunk_overlap= chunk_size * 0.2
  text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap
  )
  chunks = text_splitter.split_documents(docs)

  # print(chunks)

  # Step 3 - Embedding
  embedding_model = OpenAIEmbeddings(
      model="text-embedding-3-small"
  )

  chunk_as_strings = [chunk.page_content for chunk in chunks]
  vectors = embedding_model.embed_documents(chunk_as_strings)

  # print(len(vectors))

  # Step 4 - Vector DB Storage
  chromadb.api.client.SharedSystemClient.clear_system_cache()
  # pinecone or mongo are DB for production
  vector_db = Chroma(
      collection_name='solana-text-embedding-3-small',
      embedding_function=embedding_model,
  )
  # the DB will embed and store the chunks
  vector_db.add_documents(chunks)

  # print(relevant_chunks)

  # Phase 2 - RAG - Generation
  # step 2.1 Similarity Search
  relevant_chunks = vector_db.similarity_search(
      query=message,
      k=3
  )
  # print(relevant_chunks)

  # Step 2.2 create final prompt
  prompt = f"""
    Instructions:

    Answer the user Query based on the provided context, or use the provided context to response logically.
    Do not answer question that are not provided on the provided context.
    If the question is not provided in the provided context, answer with 'I don't know'.
    Use many emojis to answer the question.

    End of Instruction

    User Query:
    {message}
    End of User Query

    Context:
    {relevant_chunks}
    End of Context

  """

  # Step 2.3 Call the LLM
  llm = ChatOpenAI(
      model="gpt-4o-mini",
      temperature=0.7,
  )

  ai_msg = llm.invoke(prompt)

  return ai_msg.content


In [None]:
vector_db = None
embedding_model = OpenAIEmbeddings(
    model="text-embedding-3-small"
)

In [None]:
def load_information():
  # Phase 1

  # Step 1 - Document loader
  file_path = "/content/solana-whitepaper-en.pdf"
  loader = PyPDFLoader(file_path, mode='single')

  docs = loader.load()

  # Step 1.5 - Document cleaning
  for doc in docs:
    # doc.page_content = doc.page_content.replace('\n', ' ')
    doc.page_content = re.sub('\n', '', doc.page_content)

  # print(docs)

  # Step 2- Document Transform / Splitting
  chunk_size = 600
  chunk_overlap= chunk_size * 0.2
  text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap
  )
  chunks = text_splitter.split_documents(docs)

  # print(chunks)

  # Step 3 - Embedding
  # embedding_model = OpenAIEmbeddings(
  #     model="text-embedding-3-small"
  # )

  chunk_as_strings = [chunk.page_content for chunk in chunks]
  vectors = embedding_model.embed_documents(chunk_as_strings)

  # print(len(vectors))

  # Step 4 - Vector DB Storage
  chromadb.api.client.SharedSystemClient.clear_system_cache()
  # pinecone or mongo are DB for production
  vector_db = Chroma(
      collection_name='solana-text-embedding-3-small',
      embedding_function=embedding_model,
      persist_directory='./db'
  )
  # the DB will embed and store the chunks
  vector_db.add_documents(chunks)

In [None]:
def generation_phase(message, history):

  vector_db = Chroma(
      collection_name='solana-text-embedding-3-small',
      embedding_function=embedding_model,
      persist_directory='./db'
  )

  # Phase 2 - RAG - Generation
  # step 2.1 Similarity Search
  relevant_chunks = vector_db.similarity_search(
      query=message,
      k=3
  )
  # print(relevant_chunks)

  # Step 2.2 create final prompt
  prompt = f"""
    Instructions:

    Answer the user Query based on the provided context, or use the provided context to response logically.
    Do not answer question that are not provided on the provided context.
    If the question is not provided in the provided context, answer with 'I don't know'.
    Use many emojis to answer the question.

    End of Instruction

    User Query:
    {message}
    End of User Query

    Context:
    {relevant_chunks}
    End of Context

  """

  # Step 2.3 Call the LLM
  llm = ChatOpenAI(
      model="gpt-4o-mini",
      temperature=0.7,
  )

  ai_msg = llm.invoke(prompt)

  return ai_msg.content


In [None]:
load_information()

In [None]:
gradio.ChatInterface(generation_phase).launch(debug=True)

  self.chatbot = Chatbot(


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://9da3ac4060bcf9a07a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://9da3ac4060bcf9a07a.gradio.live




In [None]:
raise('Stopping')

TypeError: exceptions must derive from BaseException

# Implementing Agents

In [None]:
# Use termcolor to make it easy to colorize the outputs.
!pip install termcolor > /dev/null
!pip install langchain
!pip install openai
!pip install langchain_experimental
!pip install tiktoken
!pip install faiss-cpu==1.7.4
from datetime import datetime, timedelta
from typing import List
import math
import faiss
import os
import logging
logging.basicConfig(level=logging.ERROR)
from langchain.chat_models import ChatOpenAI
from langchain.docstore import InMemoryDocstore
from langchain.embeddings import OpenAIEmbeddings
from langchain.retrievers import TimeWeightedVectorStoreRetriever
from langchain.vectorstores import FAISS
from termcolor import colored
from langchain_experimental.generative_agents import (

    GenerativeAgent,
    GenerativeAgentMemory,
)

Collecting langchain_experimental
  Downloading langchain_experimental-0.3.4-py3-none-any.whl.metadata (1.7 kB)
Downloading langchain_experimental-0.3.4-py3-none-any.whl (209 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.2/209.2 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain_experimental
Successfully installed langchain_experimental-0.3.4
Collecting faiss-cpu==1.7.4
  Downloading faiss_cpu-1.7.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)
Downloading faiss_cpu-1.7.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m68.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.7.4


In [None]:
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

In [None]:
USER_NAME = "Eduardo"  # The name you want to use when interviewing the agent.

LLM = ChatOpenAI(max_tokens=1500)  # Can be any LLM you want.

  LLM = ChatOpenAI(max_tokens=1500)  # Can be any LLM you want.


## Implementing Your First Generative Agent

In [None]:
def relevance_score_fn(score: float) -> float:
    """Return a similarity score on a scale [0, 1]."""
    # This will differ depending on a few things:
    # - the distance / similarity metric used by the VectorStore
    # - the scale of your embeddings (OpenAI's are unit norm. Many others are not!)
    # This function converts the euclidean norm of normalized embeddings
    # (0 is most similar, sqrt(2) most dissimilar)
    # to a similarity function (0 to 1)
    return 1.0 - score / math.sqrt(2)


def create_new_memory_retriever():
    """Create a new vector store retriever unique to the agent."""
    # Define your embedding model
    embeddings_model = OpenAIEmbeddings()
    # Initialize the vectorstore as empty
    embedding_size = 1536
    index = faiss.IndexFlatL2(embedding_size)
    vectorstore = FAISS(
        embeddings_model.embed_query,
        index,
        InMemoryDocstore({}),
        {},
        relevance_score_fn=relevance_score_fn,
    )
    return TimeWeightedVectorStoreRetriever(
        vectorstore=vectorstore, other_score_keys=["importance"], k=15
    )

In [None]:
alexis_memory = GenerativeAgentMemory(
    llm=LLM,
    memory_retriever=create_new_memory_retriever(),
    verbose=False,
    reflection_threshold=8,  # we will give this a relatively low number to show how reflection works
)

# Defining the Generative Agent: Alexis
alexis = GenerativeAgent(
    name="Alexis",
    age=30,
    traits="curious, creative writer, world traveler",  # Persistent traits of Alexis
    status="exploring the intersection of technology and storytelling",  # Current status of Alexis
    memory_retriever=create_new_memory_retriever(),
    llm=LLM,
    memory=alexis_memory,
)

  embeddings_model = OpenAIEmbeddings()


In [None]:

# The current "Summary" of a character can't be made because the agent hasn't made
# any observations yet.
print(alexis.get_summary())

Name: Alexis (age: 30)
Innate traits: curious, creative writer, world traveler
Alexis is a disciplined and focused individual who is organized and detail-oriented. She is ambitious and driven, with a strong work ethic and a desire to achieve her goals. She is also independent and self-reliant, able to work effectively on her own.


In [None]:
# We can add memories directly to the memory object

alexis_observations = [
    "Alexis recalls her morning walk in the park",
    "Alexis feels excited about the new book she started reading",
    "Alexis remembers her conversation with a close friend",
    "Alexis thinks about the painting she saw at the art gallery",
    "Alexis is planning to learn a new recipe for dinner",
    "Alexis is looking forward to her weekend trip",
    "Alexis contemplates her goals for the month."
]

for observation in alexis_observations:
    alexis.memory.add_memory(observation)



# We will see how this summary updates after more observations to create a more rich description.
print(alexis.get_summary(force_refresh=True))

Name: Alexis (age: 30)
Innate traits: curious, creative writer, world traveler
Alexis is reflective, goal-oriented, enjoys nature, looks forward to new experiences, appreciates art, loves reading, and enjoys trying new things.
