In [18]:
from pinecone import Pinecone
from langchain_huggingface import HuggingFaceEmbeddings
from dotenv import load_dotenv
import os

In [19]:

# Load environment variables from the .env file
load_dotenv()

# Retrieve the API key from the environment
pc_api_key = os.getenv("PINECONE_KEY")

# Initialize client
pc = Pinecone(api_key=pc_api_key)

In [20]:

# Define the index name
index_name = "genai-serverless"

# Get the host of the index
host = pc.describe_index(index_name)['host']
index = pc.Index(index_name, host=host)

In [21]:
# Initialize the HuggingFaceEmbeddings model
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en", encode_kwargs={'normalize_embeddings': True})

In [14]:
# Function to get sentence embeddings
def get_sentence_embeddings(sentences):
    inputs = embeddings.embed_documents(sentences)
    return inputs

# Function to perform a search query
def search_query(query, top_k=3):
    query_embedding = get_sentence_embeddings([query])[0]
    results = index.query(vector=[query_embedding],
                          top_k=top_k,
                          include_metadata=True)
    return results

In [27]:

# User input for the search query
user_query = input("Enter your search query: ")

# Perform the search
search_results = search_query(user_query)

In [28]:
print(search_results)

{'matches': [{'id': '2',
              'metadata': {'page_number': 3.0,
                           'sentence': 'The sky is clear and the sun is '
                                       'shining.'},
              'score': 0.887913585,
              'values': []},
             {'id': '5',
              'metadata': {'page_number': 6.0,
                           'sentence': 'There is a bear in the town square.'},
              'score': 0.757892907,
              'values': []},
             {'id': '3',
              'metadata': {'page_number': 4.0,
                           'sentence': 'Machine learning models require a lot '
                                       'of data.'},
              'score': 0.748565257,
              'values': []}],
 'namespace': '',
 'usage': {'read_units': 6}}


In [29]:
# Display the search results
print("\nSearch Results:")
for match in search_results['matches']:
    print(f"Score: {match['score']}")
    print(f"Sentence: {match['metadata']['sentence']}")
    print(f"Page Number: {match['metadata']['page_number']}")
    print()


Search Results:
Score: 0.887913585
Sentence: The sky is clear and the sun is shining.
Page Number: 3.0

Score: 0.757892907
Sentence: There is a bear in the town square.
Page Number: 6.0

Score: 0.748565257
Sentence: Machine learning models require a lot of data.
Page Number: 4.0

