In [None]:
#!pip install pinecone-client

In [None]:
import pinecone
import dotenv
dotenv.load_dotenv()
import time
import os

API_KEY = ''
ENV = 'gcp-starter'

api_key = os.environ.get('PINECONE_API_KEY') or API_KEY
env = os.environ.get('PINECONE_ENVIRONMENT') or ENV

pinecone.init(api_key=api_key, environment=env)

  from tqdm.autonotebook import tqdm


In [None]:
pinecone.list_indexes()

['company-name']

In [None]:
# pinecone.delete_index(index_name)

In [None]:
index_name = "company-name"
dimension = 4 # dimensions of Encoder
namespace = "meeting-title"

In [None]:
if index_name in pinecone.list_indexes():
    pinecone.delete_index(index_name)

# we create a new index
pinecone.create_index(
    name=index_name, # meeting_name
    metric='dotproduct', # need to check
    dimension=dimension,
    #pod_type, # need to check
    #metadata_config --> need to check
)

# wait for index to be initialized
while not pinecone.describe_index(index_name).status['ready']:
    time.sleep(1)

print(pinecone.describe_index(index_name))

IndexDescription(name='company-name', metric='dotproduct', replicas=1, dimension=4.0, shards=1, pods=1, pod_type='starter', status={'ready': True, 'state': 'Ready'}, metadata_config=None, source_collection='')


In [None]:
index = pinecone.Index(index_name)
# data class
data = [
        #("id", [vector_data], {metadata})
        #("namespace_metadata", [0], {})
        ("person_1", [0.1, 0.2, 0.3, 0.4], {"timestamp": "04012024", "meeting_no": 1 ,"speaker": "person_1"}),
        ("person_2", [0.2, 0.3, 0.4, 0.5], {"timestamp": "04012024", "meeting_no": 1}),

        ("person_1", [0.5, 0.5, 0.5, 0.5], {"timestamp": "05012024", "meeting_no": 2}),
        ("person_2", [0.4, 0.4, 0.4, 0.4], {"timestamp": "05012024", "meeting_no": 2})
    ]

In [None]:
# function to insert data
upsert_response = index.upsert(
    vectors=data,
    namespace=namespace
)

In [None]:
# a func to get relevale id's
downstr_response = index.query(
    vector=[0.45, 0.45, 0.45, 0.45],
    filter={
        "meeting_no": {"$in":[1, 2]}
    },
    namespace=namespace, # restricting search by meeting_title
    top_k=2,
    include_metadata=True,
)

In [None]:
downstr_response

{'matches': [{'id': 'person_1',
              'metadata': {'meeting_no': 2.0, 'timestamp': '05012024'},
              'score': 0.9,
              'values': []},
             {'id': 'person_2',
              'metadata': {'meeting_no': 2.0, 'timestamp': '05012024'},
              'score': 0.719999969,
              'values': []}],
 'namespace': 'meeting-title'}

In [None]:
# a func to get data from id's : we need to filter by relevence scores!
id_list = [match['id'] for match in downstr_response['matches']]
fetch_response = index.fetch(ids=id_list, namespace=namespace)
fetch_response

{'namespace': 'meeting-title',
 'vectors': {'person_1': {'id': 'person_1',
                          'metadata': {'meeting_no': 2.0,
                                       'timestamp': '05012024'},
                          'values': [0.5, 0.5, 0.5, 0.5]},
             'person_2': {'id': 'person_2',
                          'metadata': {'meeting_no': 2.0,
                                       'timestamp': '05012024'},
                          'values': [0.4, 0.4, 0.4, 0.4]}}}