In [2]:
import google.generativeai as genai
import pinecone

import pandas as pd
import numpy as np
import os, sys
from uuid import uuid4

from dotenv import load_dotenv
load_dotenv()

True

In [3]:
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

In [4]:
df = pd.read_excel('books_data.xlsx')
df.head()

Unnamed: 0,ISBN,Genre,Title,Description
0,978-9364499739,Kids,My First 100 Learnings,This is a box set of 5 early learning board bo...
1,978-9388369999,Kids,My First Five Minutes Fairy Tales,This amazing box containing a set of 20 beauti...
2,978-9399391746,Kids,The Universe within Space,Ever looked up into the sky and wondered what’...
3,978-1639969993,Health,Natural Cures,Hundreds of thousands of readers have relied o...
4,978-1639999161,Health,If Your Mouth Could Talk with me,Your mouth is the gateway to your body and is ...


In [5]:
df['Embeddings']=df.apply(lambda x: genai.embed_content(model='models/embedding-001',
                                                        content=x['Description'],
                                                        task_type="retrieval_document",
                                                        )["embedding"], axis=1)

df.head()

Unnamed: 0,ISBN,Genre,Title,Description,Embeddings
0,978-9364499739,Kids,My First 100 Learnings,This is a box set of 5 early learning board bo...,"[-0.0039749513, -0.012792546, -0.051348966, 0...."
1,978-9388369999,Kids,My First Five Minutes Fairy Tales,This amazing box containing a set of 20 beauti...,"[0.020358495, 0.009027424, -0.04788269, -0.010..."
2,978-9399391746,Kids,The Universe within Space,Ever looked up into the sky and wondered what’...,"[0.028995216, -0.009869022, -0.036497455, -0.0..."
3,978-1639969993,Health,Natural Cures,Hundreds of thousands of readers have relied o...,"[0.044540714, -0.016545665, -0.022143817, -0.0..."
4,978-1639999161,Health,If Your Mouth Could Talk with me,Your mouth is the gateway to your body and is ...,"[0.058522005, -0.009796286, -0.020162895, 0.02..."


In [6]:
len(df['Embeddings'][0])

768

In [7]:
df['id'] = [str(uuid4()) for i in range (len(df))]
df.to_excel('Data_Embeddings.xlsx')
df.head()

Unnamed: 0,ISBN,Genre,Title,Description,Embeddings,id
0,978-9364499739,Kids,My First 100 Learnings,This is a box set of 5 early learning board bo...,"[-0.0039749513, -0.012792546, -0.051348966, 0....",e0a23d82-1c7a-45a0-966d-619d2291f99a
1,978-9388369999,Kids,My First Five Minutes Fairy Tales,This amazing box containing a set of 20 beauti...,"[0.020358495, 0.009027424, -0.04788269, -0.010...",189488d9-4ca8-43bb-a147-8cdc8e57e725
2,978-9399391746,Kids,The Universe within Space,Ever looked up into the sky and wondered what’...,"[0.028995216, -0.009869022, -0.036497455, -0.0...",73fc1664-541b-40fc-b810-3df18705ff47
3,978-1639969993,Health,Natural Cures,Hundreds of thousands of readers have relied o...,"[0.044540714, -0.016545665, -0.022143817, -0.0...",b1060988-f80d-4c89-8ce4-3f939b07efd9
4,978-1639999161,Health,If Your Mouth Could Talk with me,Your mouth is the gateway to your body and is ...,"[0.058522005, -0.009796286, -0.020162895, 0.02...",4f640ecd-3e32-4b9d-a0a0-6c5e1e2b861d


In [8]:
def find_top_result(query, df):

  query_embedding = genai.embed_content(model='models/embedding-001',
                                        content=query,
                                        task_type="retrieval_query")
  
  dot_products = np.dot(np.stack(df['Embeddings']), query_embedding["embedding"])

  idx = np.argmax(dot_products)
  return df.iloc[idx][['ISBN', 'Title', 'Description']] # Return text from index with max value

In [9]:
query = "How to become friendly with everyone"

passage = find_top_result(query, df)
passage

ISBN                                              978-1619625601
Title                           How to Talk to Anyone at anytime
Description    The author has spent her career teaching peopl...
Name: 8, dtype: object

In [10]:
query = "Any suggestion or artical on business and entrepreneur"

passage = find_top_result(query, df)
passage

ISBN                                              978-1619693184
Title                    How to Win Friends and Influence People
Description    You can learn to expand your social circle, po...
Name: 9, dtype: object

In [11]:
def find_top_n_results(query, df, n):

  query_embedding = genai.embed_content(model='models/embedding-001',
                                        content=query,
                                        task_type="retrieval_query")
  
  dot_products = np.dot(np.stack(df['Embeddings']), query_embedding["embedding"])

  idx = np.argsort(dot_products)[::-1][:n]

  top_n_results = []
  for id in idx:
    top_n_results.append(df.iloc[id][['ISBN', 'Title', 'Description']])
    top_n_results.append('=' * 75) 

  return top_n_results

In [12]:
query = "Any artical on business and entrepreneur"

passage = find_top_n_results(query, df, 3)
passage

[ISBN                                              978-1644691166
 Title                                       Work Less, Make More
 Description    The counter-intuitive approach to building a p...
 Name: 6, dtype: object,
 ISBN                                              978-1619693184
 Title                    How to Win Friends and Influence People
 Description    You can learn to expand your social circle, po...
 Name: 9, dtype: object,
 ISBN                                              978-1619681130
 Title                                Rich Dad Poor Dad by people
 Description    Explodes the myth that you need to earn a high...
 Name: 7, dtype: object,

#### Pinecone

In [13]:
import pinecone

pinecone.init(
    api_key = os.getenv("PINECONE_API_KEY"),
    environment = os.getenv("PINECONE_ENVIRONMENT")
)

index_name = "gemini-pro-index"

if index_name not in pinecone.list_indexes():
    pinecone.create_index(index_name, 
                          dimension = 768, 
                          metric='cosine')
    print(f"Pinecone index: {index_name} is created")
    
index = pinecone.Index(index_name)

index.describe_index_stats()

Pinecone index: gemini-pro-index is created


{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [14]:
df.head()

Unnamed: 0,ISBN,Genre,Title,Description,Embeddings,id
0,978-9364499739,Kids,My First 100 Learnings,This is a box set of 5 early learning board bo...,"[-0.0039749513, -0.012792546, -0.051348966, 0....",e0a23d82-1c7a-45a0-966d-619d2291f99a
1,978-9388369999,Kids,My First Five Minutes Fairy Tales,This amazing box containing a set of 20 beauti...,"[0.020358495, 0.009027424, -0.04788269, -0.010...",189488d9-4ca8-43bb-a147-8cdc8e57e725
2,978-9399391746,Kids,The Universe within Space,Ever looked up into the sky and wondered what’...,"[0.028995216, -0.009869022, -0.036497455, -0.0...",73fc1664-541b-40fc-b810-3df18705ff47
3,978-1639969993,Health,Natural Cures,Hundreds of thousands of readers have relied o...,"[0.044540714, -0.016545665, -0.022143817, -0.0...",b1060988-f80d-4c89-8ce4-3f939b07efd9
4,978-1639999161,Health,If Your Mouth Could Talk with me,Your mouth is the gateway to your body and is ...,"[0.058522005, -0.009796286, -0.020162895, 0.02...",4f640ecd-3e32-4b9d-a0a0-6c5e1e2b861d


In [15]:
items = [{'id': str(idx), 
          'values': embedding, 
          'metadata': {'isbn': str(isbn), 'title': str(title)}}
         for idx, embedding, isbn, title in zip(df['id'], df['Embeddings'], df['ISBN'], df['Title'])]

index.upsert(items)

{'upserted_count': 10}

In [17]:
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0001,
 'namespaces': {'': {'vector_count': 10}},
 'total_vector_count': 10}

In [18]:
query = "Improve communication and build confidence"

query_embedding=genai.embed_content(model='models/embedding-001',
                                    content=query,
                                    task_type="retrieval_query")



In [19]:
len(query_embedding['embedding'])

768

In [20]:
response = index.query(query_embedding['embedding'], top_k=3)
response

{'matches': [{'id': '37f5b0a1-b763-4746-b4e0-e24f70ba46c9',
              'score': 0.648864388,
              'values': []},
             {'id': 'dbb350f5-6554-4b4b-9a6b-f4e7f5ed4754',
              'score': 0.646759927,
              'values': []},
             {'id': '4f640ecd-3e32-4b9d-a0a0-6c5e1e2b861d',
              'score': 0.593314946,
              'values': []}],
 'namespace': ''}

In [21]:
metadata = index.fetch([response['matches'][0]['id']])
metadata

{'namespace': '',
 'vectors': {'37f5b0a1-b763-4746-b4e0-e24f70ba46c9': {'id': '37f5b0a1-b763-4746-b4e0-e24f70ba46c9',
                                                      'metadata': {'isbn': '978-1619625601',
                                                                   'title': 'How '
                                                                            'to '
                                                                            'Talk '
                                                                            'to '
                                                                            'Anyone '
                                                                            'at '
                                                                            'anytime'},
                                                      'values': [-0.0062955413,
                                                                 -0.041787412,
                                           

In [22]:
isbn = metadata['vectors'][response['matches'][0]['id']]['metadata']['isbn']
title = metadata['vectors'][response['matches'][0]['id']]['metadata']['title']


print(f'{isbn} ----> {title}')

978-1619625601 ----> How to Talk to Anyone at anytime


In [23]:
for match in response['matches']:
    metadata = index.fetch([match['id']])
    isbn = metadata['vectors'][match['id']]['metadata']['isbn']
    title = metadata['vectors'][match['id']]['metadata']['title']
    
    print(f'{isbn} ----> {title}')

978-1619625601 ----> How to Talk to Anyone at anytime
978-1619693184 ----> How to Win Friends and Influence People
978-1639999161 ----> If Your Mouth Could Talk with me


In [24]:
for result in response['matches']:  
    isbn = df[df['id'] == result['id']]['ISBN'].values[0]
    title = df[df['id'] == result['id']]['Title'].values[0]

    print(str(isbn) + " ---> " + str(title))

978-1619625601 ---> How to Talk to Anyone at anytime
978-1619693184 ---> How to Win Friends and Influence People
978-1639999161 ---> If Your Mouth Could Talk with me
