In [1]:
import pinecone
from langchain_pinecone import PineconeVectorStore
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.schema import Document
import os
import langchain
import pinecone
from langchain_pinecone import PineconeVectorStore  # CORRECT IMPORT
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.llms import OpenAI
from dotenv import load_dotenv


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()
pc = pinecone.Pinecone(api_key=os.environ['PINECONE_API_KEY'])

In [3]:
## reading the document
def read_doc(directory):
    file_loader = PyPDFDirectoryLoader(directory)
    documents = file_loader.load()
    return documents

In [4]:
doc = read_doc('documents/')
len(doc)

21

In [5]:
## divide docs into chunks so the llm could process it

def chunk_data(docs, chunk_size=800, chunk_overlap=50):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    doc = text_splitter.split_documents(docs)
    return docs

In [6]:
documents = chunk_data(docs=doc)
len(documents)

21

In [7]:
## embeddings technique of google gemini
embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    google_api_key=os.environ['GEMINI_API_KEY']
)
embeddings

GoogleGenerativeAIEmbeddings(client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x00000259EFA50050>, model='models/embedding-001', task_type=None, google_api_key=SecretStr('**********'), credentials=None, client_options=None, transport=None, request_options=None)

In [8]:
vectors = embeddings.embed_query("how are you ?")
len(vectors)

768

In [9]:
index_name = "llmvectordb"

In [10]:
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=768,  # Gemini embedding dimension
        metric="cosine",
        spec=pinecone.ServerlessSpec(
            cloud="aws",
            region="us-west-2"
        )
    )
    print("Created new index")
    # Wait for index to be ready
    time.sleep(30)
else:
    print("Index already exists")

Index already exists


In [11]:
vectorstore = PineconeVectorStore.from_documents(
    documents=documents,
    embedding=embeddings,
    index_name=index_name
)

In [12]:
def retrieve_query(query, k=2):
    matching_results = vectorstore.similarity_search(query, k=k)
    return matching_results

In [16]:
llm = ChatGoogleGenerativeAI(
    model="models/gemini-1.5-flash",
    google_api_key=os.environ['GEMINI_API_KEY'],
    temperature=0.5
)

In [14]:
import google.generativeai as genai
from google.api_core import client_options

# Configure with explicit options
genai.configure(
    api_key=os.environ['GEMINI_API_KEY'],
    # client_options=client_options.ClientOptions(api_endpoint="generativelanguage.googleapis.com")
)

# List available models
print("Available models:")
for model in genai.list_models():
    if 'generateContent' in model.supported_generation_methods:
        print(f"Model: {model.name}")
        print(f"Display name: {model.display_name}")
        print("---")

# Use the exact model name from the list

Available models:
Model: models/gemini-1.5-pro-latest
Display name: Gemini 1.5 Pro Latest
---
Model: models/gemini-1.5-pro-002
Display name: Gemini 1.5 Pro 002
---
Model: models/gemini-1.5-pro
Display name: Gemini 1.5 Pro
---
Model: models/gemini-1.5-flash-latest
Display name: Gemini 1.5 Flash Latest
---
Model: models/gemini-1.5-flash
Display name: Gemini 1.5 Flash
---
Model: models/gemini-1.5-flash-002
Display name: Gemini 1.5 Flash 002
---
Model: models/gemini-1.5-flash-8b
Display name: Gemini 1.5 Flash-8B
---
Model: models/gemini-1.5-flash-8b-001
Display name: Gemini 1.5 Flash-8B 001
---
Model: models/gemini-1.5-flash-8b-latest
Display name: Gemini 1.5 Flash-8B Latest
---
Model: models/gemini-2.5-pro-preview-03-25
Display name: Gemini 2.5 Pro Preview 03-25
---
Model: models/gemini-2.5-flash-preview-05-20
Display name: Gemini 2.5 Flash Preview 05-20
---
Model: models/gemini-2.5-flash
Display name: Gemini 2.5 Flash
---
Model: models/gemini-2.5-flash-lite-preview-06-17
Display name: Ge

In [17]:
# Test the connection
response = llm.invoke("Hello, how are you?")
print(response.content)

I am doing well, thank you for asking!  How are you today?


In [18]:
chain = load_qa_chain(llm, chain_type="stuff")

stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  chain = load_qa_chain(llm, chain_type="stuff")


In [19]:
def retrieve_answers(query):
    doc_search = retrieve_query(query)
    print(doc_search)
    response = chain.run(input_documents=doc_search, question=query)
    return response


In [20]:
our_query = "give me the definition of acv"
answer = retrieve_answers(our_query)
print(answer)

[Document(id='9ad87013-d869-4429-b8f6-51efc239d202', metadata={'creationdate': 'D:20250823115734', 'creator': 'PDFium', 'page': 0.0, 'page_label': '1', 'producer': 'PDFium', 'source': 'documents\\doc.pdf', 'total_pages': 21.0}, page_content="ANALYSE CYCLE DE VIE ENVIRONNEMENTAL – EMPREINTE CARBONE\nTaieb BEN ROMDHANE     IMI4                                                                                                                                       Page 1 | 1\nObjectifs du cours :\uf0b7 Comprendre les principes de base de l'analyse du cycle de vie.\uf0b7 Apprendre les étapes clés du processus ACV.\uf0b7 Identifier les outils et méthodologies utilisés pour effectuer une ACV.\uf0b7 Savoir interpréter les résultats d’une ACV et appliquer les conclusions pour une gestion durable des produits1. Définition et objectifs de l'ACVL'analyse du cycle de vie (ACV) est un outil qui permet d’évaluer les impacts environnementaux d'un produit ou d'un service tout au long de son existence. Cel

  response = chain.run(input_documents=doc_search, question=query)


Based on the provided text, ACV (Analyse du Cycle de Vie) is a tool used to evaluate the environmental impacts of a product or service throughout its entire life cycle.  This includes the extraction of raw materials, manufacturing, transportation, use, and end-of-life (recycling, incineration, or landfilling).
