In [1]:
from pinecone import Pinecone, ServerlessSpec
import os
import dotenv
dotenv.load_dotenv()
import pandas as pd
import requests
import json

  from tqdm.autonotebook import tqdm


In [2]:
gemini_api_key = os.getenv('GEMINI_API_KEY')
pinecone_api = os.getenv('PINECONE_API_KEY')
index_name = os.getenv('PINECONE_INDEX_NAME')

In [None]:
from google import genai
from google.genai import types

client = genai.Client(api_key=gemini_api_key)

In [None]:
def get_embedding(user_input):
    result = client.models.embed_content(
    model="text-embedding-004",
    contents=user_input,
    config=types.EmbedContentConfig(output_dimensionality=768),
    )
    values = result.embeddings[0].values
    return values


In [5]:
pc = Pinecone(api = pinecone_api)


# trying embeddings

In [6]:
output = get_embedding("This is life")
print(output)

[-0.04860152, -0.034702502, -0.057613764, -0.027804255, -0.063004814, 0.0109969815, 0.013630775, 0.055198826, -0.0005110966, 0.06497508, -0.021059845, -0.014600874, 0.058970124, -0.032176983, -0.033848956, -0.05377753, -0.06540487, 0.010489974, -0.017473018, -0.035617243, 0.035842445, 0.036073335, -0.015176278, -0.027828649, -0.012233386, 0.011216728, 0.0063017323, 0.0042645335, 0.0014638172, -0.024745107, 0.033255354, 0.043358292, 0.0049319738, 0.031842817, -0.0023280238, 0.0013260462, -0.02509834, 0.015194871, -0.030408023, -0.059862256, -0.033109497, 0.022382341, -0.033533335, 0.029142674, 0.005424468, -0.045283087, 0.010341908, -0.060548056, 0.0034888086, 0.10062463, -0.014363857, -0.038134363, -0.075842075, 0.021946978, 0.042442255, 0.0004601971, -0.034194056, -0.066280775, 0.06079436, -0.05134312, -0.025084337, -0.011910327, 0.009015025, -0.0027222177, -0.010203241, -0.0008148636, 0.0071312236, 0.04308618, -0.039141763, 0.032861847, -0.017023949, -0.0030653651, -0.0513789, 0.0303

In [7]:
len(output)

768

# wrangle dataset

In [8]:
import json
file_path = 'chunks.json'
with open(file_path, 'r') as file:
    texts = json.load(file)

In [9]:
print(texts[1])

It also contains an Agreement to Arbitrate which will, with limited exception, require you to submit claims you have against us or related third parties to binding and final arbitration, unless you opt out of the Agreement to Arbitrate in accordance with section 19.B.9 (see Legal Disputes, Section B ("Agreement to Arbitrate")). If you do not opt out: (1) you will only be permitted to pursue claims against us or related third parties on an individual basis, not as a plaintiff or class member in any class or representative action or proceeding; (2) you will only be permitted to seek relief (including monetary, injunctive, and declaratory relief) on an individual basis; and (3) you are waiving your right to pursue disputes or claims and seek relief in a court of law and to have a jury trial. 2. About eBay eBay is a marketplace that allows users to offer, sell, and buy goods and services in various geographic locations using a variety of pricing formats. eBay is not a party to contracts fo

In [10]:
embeddings=[]
for i in texts:
    output = get_embedding(i)
    embeddings.append(output)


In [11]:
len(embeddings[1])

768

# pushing data to vector database

In [12]:
pc.create_index(
    name = index_name,
    dimension = 768,
    metric = "cosine",
    spec = ServerlessSpec(
        cloud = 'aws',
        region = 'us-east-1',
    )
)

In [13]:
import time
while not pc.describe_index(index_name).status.ready:
    print("index is not ready")
    time.sleep(1)

index = pc.Index(index_name)
vectors = []
for i, (text, e) in enumerate(zip(texts, embeddings)):
    entry_id = f"doc-{i}"
    vectors.append({
        "id": entry_id,
        "values": e,
        "metadata": {"text": text}
    })
index.upsert(vectors=vectors,
             namespace = 'ns1')

{'upserted_count': 41}

# get closest documents

In [14]:
output = get_embedding("Is ebay a car broker?")
embedding = output

In [15]:
results = index.query(
    namespace = 'ns1',
    vector = embedding,
    top_k = 2,
    include_values = False,
    include_metadata = True
)

In [16]:
results

{'matches': [{'id': 'doc-6',
              'metadata': {'text': 'eBay has never: (1) held title for any '
                                   'vehicle listed on eBay sites; (2) '
                                   'inspected any vehicle listed on eBay '
                                   'sites; (3) set or negotiated prices for '
                                   'any vehicle listed on eBay sites; (4) had '
                                   'any vehicle listed on eBay sites in its '
                                   'possession; (5) shipped, distributed, or '
                                   'delivered any vehicle listed on eBay '
                                   'sites; or (6) sold a vehicle listed on '
                                   'eBay sites by consignment. IN ADDITION TO, '
                                   'AND WITHOUT LIMITING THE GENERALITY OF THE '
                                   'DISCLAIMERS OF WARRANTIES DETAILED IN '
                                   'SECTIO