In [40]:
from pinecone import Pinecone, ServerlessSpec
from openai import OpenAI
import pandas as pd
import time
import os
import dotenv
dotenv.load_dotenv()

True

In [20]:
token = os.getenv("RUNPOD_TOKEN")
open_ai_base_url=os.getenv("RUNPOD_EMBEDDING_URL")
model_name=os.getenv("MODEL_NAME")
pinecone_api_key = os.getenv("PINECONE_API_KEY")
index_name = os.getenv("PINECONE_INDEX_NAME")

In [21]:
pc = Pinecone(api_key=pinecone_api_key)
client = OpenAI(
    api_key=token,
    base_url=open_ai_base_url
)

# Try out embeddings

In [22]:
output = client.embeddings.create(input=["hello world"], model=model_name)
embedding = output.data[0].embedding
print(embedding)

[0.015176702290773392, -0.02265065535902977, 0.00859504658728838, -0.0742514431476593, 0.003908572718501091, 0.0027112148236483335, -0.031238075345754623, 0.044630181044340134, 0.04405056685209274, -0.007908663712441921, -0.02519790083169937, -0.03337348997592926, 0.014375921338796616, 0.04639952629804611, 0.008610299788415432, -0.016137639060616493, 0.007561658509075642, -0.019020449370145798, -0.11458028852939606, -0.018105272203683853, 0.1262945681810379, 0.029728032648563385, 0.025274166837334633, -0.0342886708676815, -0.041091494262218475, 0.006669359747320414, 0.010303379036486149, 0.022437114268541336, 0.00444242637604475, -0.12727075815200806, -0.01610713265836239, -0.020255940034985542, 0.047375716269016266, 0.011561748571693897, 0.06815025955438614, 0.00739006232470274, -0.017998501658439636, 0.04084744676947594, -0.01028049923479557, 0.02373361587524414, 0.010509294457733631, -0.028538301587104797, 0.008137458004057407, -0.015138569287955761, 0.030948270112276077, -0.0659538

In [23]:
len(embedding)

384

# Wrangle dataset

In [24]:
df = pd.read_json("products/products.jsonl", lines=True)

In [25]:
df.head(2)

Unnamed: 0,name,category,description,price,rating,image_path
0,Barcelona Crew Socks,Crew Socks,"Comfortable and stylish, these Barcelona Crew ...",12.99,4.5,barcelona_crew_socks.jpg
1,Barcelona Women's Classic Cleats,Women's Cleats,"Designed for performance and style, the Barcel...",79.99,4.7,barcelona_womens_classic_cleats.jpg


In [27]:
df['text'] = df['name']+" : "+df["description"]+\
        " -- Price: "+df["price"].astype(str) +\
            " -- Rating: "+df["rating"].astype(str)

In [28]:
df['text'].head(2)

0    Barcelona Crew Socks : Comfortable and stylish...
1    Barcelona Women's Classic Cleats : Designed fo...
Name: text, dtype: object

In [29]:
texts = df['text'].tolist()

In [30]:
texts[:2]

['Barcelona Crew Socks : Comfortable and stylish, these Barcelona Crew Socks are perfect for fans looking to showcase their support. Made with breathable materials, they ensure your feet stay cool and dry during intense matches or casual wear. -- Price: 12.99 -- Rating: 4.5',
 "Barcelona Women's Classic Cleats : Designed for performance and style, the Barcelona Women's Classic Cleats offer superior traction and comfort on the field. Featuring the iconic club colors and emblem, these cleats are a must-have for dedicated female players. -- Price: 79.99 -- Rating: 4.7"]

In [33]:
with open('products/menu_items_text.txt') as f:
    menu_items_text = f.read()

menu_items_text = "Menu: " + menu_items_text
texts.append(menu_items_text)

In [35]:
with open('products/Barcelona_central_about_us.txt') as f:
    Barcelona_central_about_section = f.read()

Barcelona_central_about_section = "Soccer shop Barcelona Central about section: " + Barcelona_central_about_section
texts.append(Barcelona_central_about_section)

# Generating the Embeddings

In [36]:
output = client.embeddings.create(input=texts, model=model_name)

In [37]:
embeddings = output.data

# Push data to database

In [39]:
pc.create_index(
    name=index_name,
    dimension=384,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1",
    )
)

{
    "name": "soccershop",
    "metric": "cosine",
    "host": "soccershop-22zbnw4.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 384,
    "deletion_protection": "disabled",
    "tags": null
}

In [41]:
# wait for the index to be ready
while not pc.describe_index(index_name).status.ready:
    time.sleep(1)

index = pc.Index(index_name)

vectors = []
for text,e in zip(texts,embeddings):
    entry_id = text.split(":")[0]
    vectors.append({
        "id": entry_id,
        "values": e.embedding,
        "metadata": {"text": text}
    })

index.upsert(vectors=vectors,
             namespace="ns1")
    

{'upserted_count': 18}

# Get Closest documents

In [48]:
output = client.embeddings.create(input=["Are the barcelona women's advanced cleats free?"], model=model_name)
embedding = output.data[0].embedding

In [49]:
results = index.query(
    namespace='ns1',
    vector=embedding,
    top_k=3,
    include_values=False,
    include_metadata=True
)

In [50]:
results

{'matches': [{'id': "Barcelona Women's Advanced Cleats ",
              'metadata': {'text': "Barcelona Women's Advanced Cleats : Step "
                                   "up your game with the Barcelona Women's "
                                   'Advanced Cleats. Featuring cutting-edge '
                                   'technology, these cleats provide superior '
                                   'support and flexibility for advanced '
                                   'players. -- Price: 92.99 -- Rating: 4.8'},
              'score': 0.836881399,
              'values': []},
             {'id': "Barcelona Women's Classic Cleats ",
              'metadata': {'text': "Barcelona Women's Classic Cleats : "
                                   'Designed for performance and style, the '
                                   "Barcelona Women's Classic Cleats offer "
                                   'superior traction and comfort on the '
                                   'field. Featu