## Install the SDK

The Python SDK for the Gemini API is contained in the [`google-generativeai`](https://pypi.org/project/google-generativeai/) package. Install the dependency using pip:

In [None]:
!pip install -q -U google-generativeai

## Set up your API key

To use the Gemini API, you'll need an API key. If you don't already have one, create a key in Google AI Studio.

<a class="button" href="https://aistudio.google.com/app/apikey" target="_blank" rel="noopener noreferrer">Get an API key</a>

In Colab, add the key to the secrets manager under the "🔑" in the left panel. Give it the name `GOOGLE_API_KEY`. Then pass the key to the SDK:

In [None]:
# Import the Python SDK
import google.generativeai as genai
# Used to securely store your API key
from google.colab import userdata

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

In [None]:
!pip install langchain  PyPDF2



Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1


In [None]:
#import os
import PyPDF2
#from langchain.text_splitter import CharacterTextSplitter
#from google import genai
#from langchain.vectorstores import FAISS
#from langchain.embeddings.base import Embeddings
#from langchain.chains import RetrievalQA
#from langchain_google_genai import ChatGoogleGenerativeAI
#from langchain.vectorstores import Chroma  # import Chroma class
#import numpy as np
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
#import time

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100,
    length_function=len,
)


def extract_text_from_pdf(pdf_path):
    with open(pdf_path, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
    return text

def get_documents():
  document_text = extract_text_from_pdf("abc.pdf")
  chunks1 = text_splitter.split_documents([Document(page_content=document_text)])
  chunks= [chunk.page_content for chunk in chunks1]
  print("Number of Chunks= ",len(chunks))
  return chunks

## Initialize the Generative Model

Before you can make any API calls, you need to initialize the Generative Model.

In [None]:
model = genai.GenerativeModel('gemini-2.0-flash')

In [None]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [None]:
from huggingface_hub import login

# Log in to your Hugging Face account
login(token="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")

In [None]:
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, pipeline
import torch
import faiss
import numpy as np

# 1. Load tokenizer and models
embedding_model_id = "google/gemma-2-9b-it"


tokenizer = AutoTokenizer.from_pretrained(embedding_model_id)
embedding_model = AutoModel.from_pretrained(
        "google/gemma-2-9b-it",
    device_map="auto",
    torch_dtype=torch.bfloat16,
)



# 2.documents
documents = get_documents() # List of texts

# 3. Generate embeddings
def embed(texts):
  emps=[]
  for i,text in enumerate(texts):
    print(i)
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = embedding_model(**inputs)
    embeddings = outputs.last_hidden_state[:, 0, :].type(torch.float32).numpy()  # CLS token or first token
    emps.append(embeddings)
  return np.array(emps)

doc_embeddings = embed(documents)
doc_embeddings =np.vstack(doc_embeddings)
print(doc_embeddings.shape)

# 4. Create FAISS index
dimension = doc_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(doc_embeddings)

faiss.write_index(index, "my_faiss_index.bin")


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Number of Chunks=  12
0
1
2
3
4
5
6
7
8
9
10
11
(12, 3584)


In [None]:
index = faiss.read_index("my_faiss_index.bin")


# 5. Retrieve relevant docs
def retrieve(query, k=1):
    query_embedding = embed([query])
    #print(query_embedding)
    query_embedding = query_embedding[0]
    distances, indices = index.search(query_embedding, k)
    return [documents[i] for i in indices[0]]

# 6. RAG: Retrieve, then generate
def answer_question(query, k=1):
    context_docs = retrieve(query, k)
    context = "\n".join(context_docs)
    prompt = f"Context: {context}\n\nQuestion: {query}\nAnswer:"

    response = model.generate_content(prompt)
    return response.text

# 🔍 Example
question = "Where is the Eiffel Tower located?"
#question ="how many mile the Great Wall of China? "
question = "من هو ابن خلدون"
response = answer_question(question, k=2)
print(f"Q: {question}\nA: {response}")



0
Q: من هو ابن خلدون
A: عبد الرحمن محمد بن خلدون، ولد في تونس عام 1332 م وتوفي في مصر عام 1404 م.

