# Creating a RAG system with GeminiAPI

* Financial Q&A dataset from kaggle - https://www.kaggle.com/datasets/yousefsaeedian/financial-q-and-a-10k

* API key for Gemini can be obtained from Google AI Studio.

In [None]:
!pip install -qU "google-genai==1.7.0" "chromadb==0.6.3"

In [None]:
#Importing the dataset
import kagglehub

# Download latest version
path = kagglehub.dataset_download("yousefsaeedian/financial-q-and-a-10k")

print("Path to dataset files:", path)

In [None]:
!ls /root/.cache/kagglehub/datasets/yousefsaeedian/financial-q-and-a-10k/versions/1

In [None]:
import pandas as pd
df = pd.read_csv(path+"/Financial-QA-10k.csv")
df.head()

In [None]:
#Keeping only the answers, tickers and filing information in the dataset
df['train'] = df['answer'] + ' Ticker: ' + df['ticker'] + ". Filing: " + df['filing']
df['train'][0]

In [None]:
#Removing null and duplicate values in the dataset, since they can cause errors during embedding generation
df.drop_duplicates(inplace = True)
df.dropna(inplace = True)

In [None]:
#Importing libraries
from google import genai
from google.genai import types
from IPython.display import Markdown

In [None]:
#Importing the secret or api key. Note - Make sure that the API key has been added to Google Collab Secrets
from google.colab import userdata
GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')

In [None]:
#Listing all the models available that can work with embeddings
client = genai.Client(api_key = GEMINI_API_KEY)

for m in client.models.list():
  if "embedContent" in m.supported_actions:
    print(m.name)

In [None]:
#Using the text-embedding-004 model to create embeddings
from chromadb import Documents, EmbeddingFunction, Embeddings
from google.api_core import retry
from google.genai import types

#Defining a helper to retry when the per-minute quota is reached
is_retriable = lambda e: (isinstance(e, genai.errors.APIError) and e.code in (429,503))

#Creating a class
class GeminiEmbeddings(EmbeddingFunction):
  #Specifying whether we are working with documents or queries
  document_mode = True

  #Creating a decorator that adds the retry behaviour to a function
  @retry.Retry(predicate = is_retriable)
  def __call__(self, input: Documents) -> Embeddings: #Creating a function that takes documents and returns embeddings
    #Checking the document mode
    if self.document_mode:
      embedding_task = "retrieval_document"
    else:
      embedding_task = "retrieval_query"

    #Configuring the response as per the model, input documents and document mode
    response = client.models.embed_content(
        model = "models/text-embedding-004",
        contents = input,
        config = types.EmbedContentConfig(
            task_type = embedding_task,
        ),
    )

    #Returning the embeddings of the documents
    return [e.values for e in response.embeddings]

In [None]:
#Create a database client for chromadb and populate it with the embeddings from class created above
import chromadb

db_name = 'financial_db'

embed_fn = GeminiEmbeddings() #Embedding function to be used is the class that was defined above
embed_fn.document_mode = True

#Creating a client and a collection with specified name and embedding function
chroma_client = chromadb.Client()
db = chroma_client.get_or_create_collection(name = db_name, embedding_function = embed_fn)

#Adding the documents in the database
#Only 1st 100 documents are added, due to API restrictions on creating more embeddings
db.add(documents = df['train'][:100].to_list(), ids = [str(i) for i in range(100)])

In [None]:
#Verifying that the documents were added
db.count()

In [None]:
#Viewing the 1st document added
db.peek(1)

In [None]:
#Switch to query mode when querying our collection
embed_fn.document_mode = False

#Search the chroma db for the specified query
query = "Explain NVIDIA's CUDA programming model and when did it create the GPU?"
results = db.query(query_texts=[query], n_results = 5) #Retrieving 5 most relevant answers

#Obtain only the document from the results and not the other data
[all_passages] = results['documents']

#Print the 1st result
Markdown(all_passages[0])

In [None]:
#Since we can obtain the relevant passage from the database for the query, we now pass it to Gemini to generate the final result

#Converting a multiline query into a single line
query_oneline = query.replace("\n"," ")

#Crafting a prompt
prompt = f"""You are a helpful and informative bot that answers questions using text from the reference passage included below.
Be sure to respond in a complete sentence, being comprehensive, including all relevant background information.
Be sure to break down complicated concepts and strike a friendly and converstional tone. If the passage is irrelevant to the answer, you may ignore it.

QUESTION: {query_oneline}"""

#Add the retrieved passages to the prompt
for passages in all_passages:
  passage_oneline = passages.replace("\n"," ")
  prompt += f"\nPassage: {passage_oneline}"

print(prompt)

In [None]:
#Obtaining a response from the gemini-2.0-flash model from Google.
answer = client.models.generate_content(
    model = "gemini-2.0-flash",
    contents = prompt
)

#Presenting the answer via Markdown
Markdown(answer.text)

### Embeddings - 2nd Method
* Since there are some restrictions on the number of API calls for the embedding function, we can use the sentence transformers in python to create our own embeddings.
* These embeddings can then be saved via pickle, which can be used later, even when the session is completed, to avoid recalculations.

In [None]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')

#Creating the embeddings
embeddings = model.encode(list(df['train']), batch_size=64, show_progress_bar=True)

In [None]:
#Saving the embeddings using pickle in the directory
import pickle
with open("embeddings.pkl","wb") as file:
  pickle.dump(embeddings, file)

In [None]:
#Creating a collection and adding the newly created embeddings
db2 = chroma_client.get_or_create_collection(name = "financial_db2")
db2.add(documents = list(df['train']), ids = [str(i) for i in range(len(df))], embeddings = embeddings)

In [None]:
db2.count()

In [None]:
len(df)

In [None]:
#Querying the collection
query = "When did NVIDIA invent the GPU?"
results = db2.query(query_texts = query, n_results = 5)
all_passages = results['documents']
Markdown(results["documents"][0][0])

In [None]:
#Generating final result using Gemini
query_oneline = query.replace("\n"," ")

#Crafting the prompt
prompt = f"""You are a helpful and informative bot that answers questions using text from the reference passage included below.
Be sure to respond in a complete sentence, being comprehensive, including all relevant background information.
Be sure to break down complicated concepts and strike a friendly and converstional tone. If the passage is irrelevant to the answer, you may ignore it.

QUESTION: {query_oneline}"""

#Add the retrieved passages to the prompt
for passages in all_passages[0]:
  passage_oneline = passages.replace("\n"," ")
  prompt += f"\nPassage: {passage_oneline}"

print(prompt)

In [None]:
#Creating a function to take user queries and returning the results using Gemini
def query_gemini(query:str, n_results:int):
  #Obtaining the search results for user query based on number of results decided by the user
  results = db2.query(query_texts = query, n_results = n_results)
  all_passages = results['documents']


  #Crafting the prompt
  query_oneline = query.replace("\n"," ")

  prompt = f"""You are a helpful and informative bot that answers questions using text from the reference passage included below.
  Be sure to respond in a complete sentence, being comprehensive, including all relevant background information.
  Be sure to break down complicated concepts and strike a friendly and converstional tone. If the passage is irrelevant to the answer, you may ignore it.

  QUESTION: {query_oneline}"""

  #Add the retrieved passages to the prompt
  for passages in all_passages[0]:
    passage_oneline = passages.replace("\n"," ")
    prompt += f"\nPassage: {passage_oneline}"

  #Generating answer
  answer = client.models.generate_content(
      model = "gemini-2.0-flash",
      contents = prompt
  )

  print(answer.text)

In [None]:
query_gemini("Provide the financial results for Nvidia in 2023", n_results = 5)