In [1]:
from pinecone import Pinecone, ServerlessSpec
from openai import OpenAI
import time
import pandas as pd
import os
import dotenv
dotenv.load_dotenv()

  from tqdm.autonotebook import tqdm


True

In [3]:
token = os.getenv("RUNPOD_TOKEN")
open_ai_base_url= os.getenv("RUNPOD_EMBEDDING_URL")
model_name = os.getenv("MODEL_NAME")
pinecone_api_key = os.getenv("PINECONE_API_KEY")
index_name = os.getenv("PINECONE_INDEX_NAME")

In [4]:
pc = Pinecone(api_key=pinecone_api_key)
client = OpenAI(api_key=token, base_url=open_ai_base_url)

## Try out embeddings

In [6]:
output = client.embeddings.create(input="Hello, world!", model=model_name)
embedding = output.data[0].embedding
print(embedding)

[-0.03214000165462494, -0.05115540325641632, 0.03696252778172493, -0.05100279301404953, 0.028492582961916924, 0.020663609728217125, 0.037206705659627914, 0.08094518631696701, 0.012170773930847645, 0.015406140126287937, 0.008057891391217709, -0.03305567055940628, 0.03259783610701561, 0.060128964483737946, 0.030064482241868973, 0.019992118701338768, 0.0056084757670760155, -0.023044349625706673, -0.13124597072601318, -0.006180769298225641, 0.06940774619579315, 0.0671490952372551, -0.014314967207610607, -0.05741247907280922, 0.0011436330387368798, -0.003426130162551999, -0.024982517585158348, 0.02060256339609623, 0.007386400830000639, -0.14614085853099823, -0.04419631510972977, -0.028111053630709648, 0.042517587542533875, -0.0014851015293970704, 0.011812136508524418, -0.022647559642791748, 0.043128035962581635, 0.013483233749866486, -0.08131145685911179, 0.021640323102474213, 0.03058336116373539, -0.03607738018035889, 0.029988177120685577, 0.00032215352985076606, 0.015841083601117134, -0.0

In [7]:
len(embedding)

384

# Wrangle dataset

In [8]:
df = pd.read_json("products/products.jsonl", lines=True)

In [9]:
df.head(2)

Unnamed: 0,name,category,description,ingredients,price,rating,image_path
0,Cappuccino,Coffee,A rich and creamy cappuccino made with freshly...,"[Espresso, Steamed Milk, Milk Foam]",4.5,4.7,cappuccino.jpg
1,Jumbo Savory Scone,Bakery,"Deliciously flaky and buttery, this jumbo savo...","[Flour, Butter, Cheese, Herbs, Baking Powder, ...",3.25,4.3,SavoryScone.webp


In [10]:
df['text'] = df['name']+" : "+df["description"]+\
    " -- Ingredients: "+df["ingredients"].astype(str) +\
    " -- Price: "+ df["price"].astype(str) +\
    " -- Rating: "+ df["rating"].astype(str)

In [11]:
df['text'].head(2)

0    Cappuccino : A rich and creamy cappuccino made...
1    Jumbo Savory Scone : Deliciously flaky and but...
Name: text, dtype: object

In [12]:
texts = df['text'].tolist()

In [13]:
with open("products/Merry's_way_about_us.txt") as file:
    Merry_way_about_section = file.read()

Merry_way_about_section = "Coffee shop Merry's Way about section: "+Merry_way_about_section
texts.append(Merry_way_about_section)

In [14]:
with open("products/menu_items_text.txt") as file:
    menu_items_text = file.read()

menu_items_text = "Menu Items: "+menu_items_text
texts.append(menu_items_text)

## Generate Embeddings

In [15]:
output = client.embeddings.create(input=texts, model=model_name)

In [16]:
embeddings = output.data

## Push data to database

In [None]:
pc.create_index(
    name=index_name, 
    dimension=384, 
    metric="cosine",
    spec= ServerlessSpec(
        cloud="aws",
        region="us-east-1", 
    )
    
)

In [20]:
while not pc.describe_index(index_name).status.ready:
    time.sleep(1)


index = pc.Index(index_name)

vectors = []
for text,e in zip(texts,embeddings):
    entry_id = text.split(":")[0]
    vectors.append({
        "id": entry_id,
        "values":e.embedding,
        "metadata": {"text": text}
    })
index.upsert(vectors=vectors,
             namespace='ns1')

{'upserted_count': 20}

# Get Closest Documents

In [21]:
output = client.embeddings.create(input=["Is Cappuccino lactuse-free?"], model=model_name)
embedding = output.data[0].embedding

In [22]:
results = index.query(
    namespace='ns1',
    vector=embedding,
    top_k=3,
    include_values=False,
    include_metadata=True
)

In [23]:
results

{'matches': [{'id': 'Cappuccino ',
              'metadata': {'text': 'Cappuccino : A rich and creamy cappuccino '
                                   'made with freshly brewed espresso, steamed '
                                   'milk, and a frothy milk cap. This '
                                   'delightful drink offers a perfect balance '
                                   'of bold coffee flavor and smooth milk, '
                                   'making it an ideal companion for relaxing '
                                   'mornings or lively conversations. -- '
                                   "Ingredients: ['Espresso', 'Steamed Milk', "
                                   "'Milk Foam'] -- Price: 4.5 -- Rating: 4.7"},
              'score': 0.696032405,
              'values': []},
             {'id': 'Menu Items',
              'metadata': {'text': 'Menu Items: Menu Items\n'
                                   '\n'
                                   'Cappuccino - $4.50\n'
