In [16]:
import os
import streamlit as st
import pickle
import time
import google.generativeai as genai
import langchain
#from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import FAISS

In [2]:
os.environ['GOOGLE_API_KEY'] = 'api-key'

In [3]:
#!pip install genai

In [4]:
genai.configure(api_key = os.environ['GOOGLE_API_KEY'])

In [5]:
for m in genai.list_models():
    if 'generateContent' in m.supported_generation_methods:
        print(m.name)

models/gemini-1.0-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro-vision-latest
models/gemini-1.5-flash
models/gemini-1.5-flash-001
models/gemini-1.5-flash-latest
models/gemini-1.5-pro
models/gemini-1.5-pro-001
models/gemini-1.5-pro-latest
models/gemini-pro
models/gemini-pro-vision


In [6]:
#llm = genai.GenerativeModel('gemini-1.5-flash')

In [7]:
loaders = UnstructuredURLLoader(urls = [
    "https://www.moneycontrol.com/news/business/personal-finance/nvidias-stock-surge-a-potential-boon-for-indian-it-services-providers-12767075.html",
    "https://blogs.nvidia.com/blog/huang-zuckerberg-siggraph-2024/"
])

data = loaders.load()
len(data)

2

In [8]:
text_splitter = RecursiveCharacterTextSplitter (
    chunk_size = 1000,
    chunk_overlap = 200
)

docs = text_splitter.split_documents(data)
len(docs)

25

In [12]:
for m in genai.list_models():
  if 'embedContent' in m.supported_generation_methods:
    print(m.name)


models/embedding-001
models/text-embedding-004


In [60]:
# import faiss
# embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
# vectorIndex = FAISS.from_documents(docs, embeddings)

In [35]:
# from langchain.embeddings import HuggingFaceInstructEmbeddings

# instructor_embeddings = HuggingFaceInstructEmbeddings()

# vectordb = FAISS.from_documents(documents=docs, embedding=instructor_embeddings)

In [57]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
vectorindex_genai = FAISS.from_documents(docs, embeddings)


# Extract components for pickling
faiss_index = vectorindex_genai.index
docstore = vectorindex_genai.docstore
index_to_docstore_id = vectorindex_genai.index_to_docstore_id

# Serialize components to a file
file_path = "vector_index_components.pkl"
with open(file_path, "wb") as f:
    pickle.dump((faiss_index, docstore, index_to_docstore_id), f)

print("Vector database components saved successfully!")

Vector database components saved successfully!


In [58]:
# Load components from the pickle file
with open(file_path, "rb") as f:
    loaded_faiss_index, loaded_docstore, loaded_index_to_docstore_id = pickle.load(f)

# Reconstruct the FAISS vector store
loaded_vectorindex_genai = FAISS(
    index=loaded_faiss_index,
    docstore=loaded_docstore,
    index_to_docstore_id=loaded_index_to_docstore_id,
    embedding_function=embeddings.embed_query,  # Pass the embedding function here
)

print("Vector database loaded successfully!")

`embedding_function` is expected to be an Embeddings object, support for passing in a function will soon be removed.


Vector database loaded successfully!


In [42]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.7)

In [59]:
chain = RetrievalQAWithSourcesChain.from_llm(llm = llm, retriever = loaded_vectorindex_genai.as_retriever())
chain



In [61]:
query = "Who are the largest customers of Nvidia?"
langchain.debug = False

chain({"question":query}, return_only_outputs = True)

{'answer': 'FINAL ANSWER: The largest customers of Nvidia are estimated to be Microsoft (19%), Meta (13%), Amazon (6%), and Alphabet (6%). \n',
 'sources': 'https://www.moneycontrol.com/news/business/personal-finance/nvidias-stock-surge-a-potential-boon-for-indian-it-services-providers-12767075.html'}