## Generative AI with Langchain -  Gemini Pro, RAG and Pinecone

In [1]:
import warnings
warnings.filterwarnings('ignore')

import google.generativeai as genai
from langchain_google_genai import GoogleGenerativeAIEmbeddings

from langchain.document_loaders import DataFrameLoader

import pinecone
from langchain.vectorstores import Pinecone
from langchain.text_splitter import CharacterTextSplitter

import getpass

import pandas as pd
import os, sys

from dotenv import load_dotenv
load_dotenv()

False

In [2]:
os.environ["GOOGLE_API_KEY"] = getpass.getpass(prompt="Enter your GenAI API key: ")

Enter your GenAI API key: ········


In [3]:
df = pd.read_excel('books_data.xlsx')
df.head()

Unnamed: 0,ISBN,Genre,Title,Description
0,978-9364499739,Kids,My First 100 Learnings,This is a box set of 5 early learning board bo...
1,978-9388369999,Kids,My First Five Minutes Fairy Tales,This amazing box containing a set of 20 beauti...
2,978-9399391746,Kids,The Universe within Space,Ever looked up into the sky and wondered what’...
3,978-1639969993,Health,Natural Cures,Hundreds of thousands of readers have relied o...
4,978-1639999161,Health,If Your Mouth Could Talk with me,Your mouth is the gateway to your body and is ...


In [4]:
from langchain.document_loaders import DataFrameLoader

df_loader = DataFrameLoader(df, page_content_column="Description")
df_loader


<langchain_community.document_loaders.dataframe.DataFrameLoader at 0x240020f1f70>

In [5]:
documents = list(df_loader.load())
print(documents)

[Document(page_content='This is a box set of 5 early learning board books, each comprising 100 well-researched and attractive images. These books will help your child build vocabulary, observation skills, and prepare them for school', metadata={'ISBN': '978-9364499739', 'Genre': 'Kids', 'Title': 'My First 100 Learnings'}), Document(page_content='This amazing box containing a set of 20 beautiful classic children fairy tales will be a delightful read for modern readers. Each fairy tale selected in this set has been retold for centuries. These stories are bound to create memories to treasure and encourage your little ones to read books.  The stories with vibrant, captivating illustrations shall spark their imagination, develop speech and language skills', metadata={'ISBN': '978-9388369999', 'Genre': 'Kids', 'Title': 'My First Five Minutes Fairy Tales'}), Document(page_content='Ever looked up into the sky and wondered what’ s out there in the universe? How many stars are there in the sky? 

In [6]:
documents[0]

Document(page_content='This is a box set of 5 early learning board books, each comprising 100 well-researched and attractive images. These books will help your child build vocabulary, observation skills, and prepare them for school', metadata={'ISBN': '978-9364499739', 'Genre': 'Kids', 'Title': 'My First 100 Learnings'})

In [7]:
import pinecone
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone

In [8]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
docs = text_splitter.split_documents(documents)
docs[0]

Document(page_content='This is a box set of 5 early learning board books, each comprising 100 well-researched and attractive images. These books will help your child build vocabulary, observation skills, and prepare them for school', metadata={'ISBN': '978-9364499739', 'Genre': 'Kids', 'Title': 'My First 100 Learnings'})

In [9]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
embeddings

GoogleGenerativeAIEmbeddings(model='models/embedding-001', task_type=None, google_api_key=None)

In [10]:
os.environ["PINECONE_API_KEY"] = getpass.getpass(prompt="Enter your Pinecone API key: ")

Enter your Pinecone API key: ········


In [12]:
pinecone.init(
    api_key = os.getenv("PINECONE_API_KEY"),
    environment = "gcp-starter"
)

index_name = "gemini-pro-index"

if index_name not in pinecone.list_indexes():
    pinecone.create_index(index_name, 
                          dimension = 768, 
                          metric='cosine')
    print(f"Pinecone index: {index_name} is created")
    
index = pinecone.Index(index_name)

index.describe_index_stats()

Pinecone index: gemini-pro-index is created


{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [13]:
docsearch = Pinecone.from_documents(docs, embeddings, index_name=index_name)

In [15]:
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0001,
 'namespaces': {'': {'vector_count': 10}},
 'total_vector_count': 10}

In [16]:
# To load Pinecone index 

docsearch = Pinecone.from_existing_index(index_name, embeddings)

In [17]:
query = "Improve communication and build confidence"

response = docsearch.similarity_search(query)
response[0].metadata

{'Genre': 'Self-Learning',
 'ISBN': '978-1619693184',
 'Title': 'How to Win Friends and Influence People'}

In [18]:
len(response)

4

In [19]:
# To get top 3 response:

for resp in response[:3]:
    print(resp.metadata)

{'Genre': 'Self-Learning', 'ISBN': '978-1619693184', 'Title': 'How to Win Friends and Influence People'}
{'Genre': 'Business', 'ISBN': '978-1644691166', 'Title': 'Work Less, Make More'}
{'Genre': 'Health', 'ISBN': '978-1639999161', 'Title': 'If Your Mouth Could Talk with me'}


## Maximal Marginal Relevance (MMR)

MMR is a technique used to diversify search results by balancing the relevance and dissimilarity of retrieved items. The goal is to provide a set of results that not only contains highly relevant items but also covers a diverse range of information.

**Example:** Continuing with the article search example, after finding similar articles using similarity search, MMR can be applied to ensure that the selected articles are not too similar to each other. 

For instance, if the first article is about AI in healthcare, MMR might select a second article that covers a different aspect of AI, such as its impact on education. This ensures a more comprehensive and diverse set of results.

**Similarity Search:** Aims to retrieve items with high similarity to the query.

**MMR:** Aims to provide a balanced set of results, considering both relevance and dissimilarity.

In [32]:
query = "Learning boxset for children"

retriever = docsearch.as_retriever(search_type="mmr")
#retriever = docsearch.as_retriever(search_type="similarity")

response = retriever.get_relevant_documents(query)

for resp in response[:3]:
    print(resp.metadata)

{'Genre': 'Kids', 'ISBN': '978-9364499739', 'Title': 'My First 100 Learnings'}
{'Genre': 'Business', 'ISBN': '978-1644691166', 'Title': 'Work Less, Make More'}
{'Genre': 'Kids', 'ISBN': '978-9399391746', 'Title': 'The Universe within Space'}


## Retrieval Augmented Generative (RAG)

In [35]:
resp.metadata['Title']

'The Universe within Space'

In [36]:
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

model = genai.GenerativeModel('gemini-pro')

for resp in response[:3]:
    response = model.generate_content(f"Top 3 selling books related to {resp.metadata['Title']} along with its ISBN")
    print(f"The Related books of title: {resp.metadata['Title']}")
    print("-"*50)
    print(response.text)
    print("="*100)

The Related books of title: My First 100 Learnings
--------------------------------------------------
1. My First 100: Animals (ISBN: 978-1402785666)
2. My First 100: Numbers (ISBN: 978-1402785628)
3. My First 100: Words (ISBN: 978-1402785642)
The Related books of title: Work Less, Make More
--------------------------------------------------
1. **The 4-Hour Workweek: Escape 9-5, Live Anywhere, and Join the New Rich**

* By: Timothy Ferriss
* ISBN: ‎978-0307465358
* Summary: This book challenges the traditional notions of career and work-life balance, offering a set of tools and strategies for escaping the 9-to-5 grind and creating a more flexible, fulfilling, and productive lifestyle.


2. **The Power of Less: The Minimalist Guide to a Meaningful Life**

* By: Leo Babauta
* ISBN: ‎978-1623366286
* Summary: This book advocates for a minimalist lifestyle, emphasizing the importance of decluttering both physical and mental spaces to gain more clarity, focus, and freedom. It provides pract