In [1]:
from pinecone import Pinecone, ServerlessSpec
import os
import pandas as pd
from time import time
import dotenv
dotenv.load_dotenv()
import requests
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
embedding_model= os.getenv("HF_EMBEDDING_MODEL")
pinecone_api_key = os.getenv("PINECONE_API_KEY") 
hf_token = os.getenv("HF_API_TOKEN")

In [3]:
print(embedding_model)

sentence-transformers/all-MiniLM-L6-v2


In [4]:
pc = Pinecone(api_key=pinecone_api_key)

## Try out Embeddings

In [5]:
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", use_auth_token=hf_token)



In [6]:
# Generate embedding
text_input = ["hello there"]
embedding = model.encode(text_input)[0]  # It's already a numpy array

print(embedding)  # Will print the 384-dimensional embedding vector

[-9.44389775e-02  7.32229427e-02  4.22687568e-02  5.43074869e-02
 -6.71972483e-02 -8.54509398e-02  5.02346195e-02  5.22130243e-02
 -3.30305360e-02 -1.92673616e-02  1.98906828e-02 -1.87793449e-02
 -4.50837091e-02 -2.30843984e-02 -8.01779889e-03 -5.37793851e-04
  1.77754201e-02 -7.73390476e-03 -1.33809924e-01  2.44181091e-03
 -3.39495279e-02  6.79327771e-02 -8.24581757e-02  5.16677983e-02
 -4.81917933e-02 -6.25122488e-02  3.12229022e-02  2.16497984e-02
 -9.58177354e-03 -4.12142463e-02  1.41428690e-02  4.40271683e-02
  4.06081267e-02  4.40277047e-02  4.67462204e-02  5.66849709e-02
 -9.73806158e-02 -5.96724376e-02  5.62413335e-02 -1.05548371e-02
  1.87592916e-02 -4.90627140e-02  1.64432230e-03 -1.40135549e-02
  4.78914641e-02 -8.65959674e-02  1.77434143e-02 -1.61414612e-02
  6.85802326e-02 -1.93422486e-03 -2.59225629e-02 -5.88065423e-02
 -8.67195576e-02 -1.13075897e-02  1.70807749e-01  1.87362339e-02
  9.57627594e-03 -6.21815175e-02  5.96843734e-02 -4.49172035e-02
 -2.04865038e-02  2.81824

In [7]:
embedding.shape

(384,)

In [8]:
# output

## Wrangle Dataset

In [9]:
df = pd.read_json("products/products.jsonl" , lines=True)

In [10]:
df.head(2)

Unnamed: 0,name,category,description,ingredients,price,rating,image_path
0,Cappuccino,Coffee,A rich and creamy cappuccino made with freshly...,"[Espresso, Steamed Milk, Milk Foam]",4.5,4.7,cappuccino.jpg
1,Jumbo Savory Scone,Bakery,"Deliciously flaky and buttery, this jumbo savo...","[Flour, Butter, Cheese, Herbs, Baking Powder, ...",3.25,4.3,SavoryScone.webp


In [11]:
df['text'] =  df['name']+" : "+df['description'] + \
                " -- Ingredients: " + df['ingredients'].astype(str) + \
                " -- Price: " + df['price'].astype(str) + \
                " -- rating: " + df['rating'].astype(str)

In [12]:
df['text'].head()

0    Cappuccino : A rich and creamy cappuccino made...
1    Jumbo Savory Scone : Deliciously flaky and but...
2    Latte : Smooth and creamy, our latte combines ...
3    Chocolate Chip Biscotti : Crunchy and delightf...
4    Espresso shot : A bold shot of rich espresso, ...
Name: text, dtype: object

In [13]:
texts = df['text'].tolist()

In [14]:
with open('products/Merry\'s_way_about_us.txt') as f:
    Merry_way_about_section = f.read()
    
Merry_way_about_section = "Coffee shop Merry's Way about section: " + Merry_way_about_section
texts.append(Merry_way_about_section)

In [15]:
with open('products/menu_items_text.txt') as f:
    menue_items_text = f.read()
    
menue_items_text = "Menu Items: " + menue_items_text
texts.append(menue_items_text)

## Generate Embeddings

In [16]:
texts

["Cappuccino : A rich and creamy cappuccino made with freshly brewed espresso, steamed milk, and a frothy milk cap. This delightful drink offers a perfect balance of bold coffee flavor and smooth milk, making it an ideal companion for relaxing mornings or lively conversations. -- Ingredients: ['Espresso', 'Steamed Milk', 'Milk Foam'] -- Price: 4.5 -- rating: 4.7",
 "Jumbo Savory Scone : Deliciously flaky and buttery, this jumbo savory scone is filled with herbs and cheese, creating a mouthwatering experience. Perfect for a hearty snack or a light lunch, it pairs beautifully with your favorite coffee or tea. -- Ingredients: ['Flour', 'Butter', 'Cheese', 'Herbs', 'Baking Powder', 'Salt'] -- Price: 3.25 -- rating: 4.3",
 "Latte : Smooth and creamy, our latte combines rich espresso with velvety steamed milk, creating a perfect balance of flavor and texture. Enjoy it as a comforting treat any time of day, whether you're starting your morning or taking a midday break. -- Ingredients: ['Espre

In [17]:
output = model.encode(texts)

In [18]:
output.shape

(20, 384)

## Push data to database

In [20]:
index_name = "coffeeshop"

pc.create_index(
    name=index_name,
    dimension=384, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

{
    "name": "coffeeshop",
    "metric": "cosine",
    "host": "coffeeshop-q4mme6u.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 384,
    "deletion_protection": "disabled",
    "tags": null
}

In [21]:
# Wait for the index to be ready
while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)

index = pc.Index(index_name)

vectors = []
for text, e in zip(texts, output):
    entry_id = text.split(":")[0].strip()
    vectors.append({
        "id": entry_id,
        "values": e,
        "metadata": {'text': text}
    })
index.upsert(
    vectors=vectors,
    namespace="ns1"
)

{'upserted_count': 20}

## Get closest documents

In [22]:
output = model.encode(["Is Cappuccino lactose-free?"])

In [23]:
import numpy as np
results = index.query(
    namespace="ns1",
    vector=np.array(output).tolist(),  # Convert to list
    top_k=3,
    include_values=False,
    include_metadata=True
)

print(results)

{'matches': [{'id': 'Cappuccino',
              'metadata': {'text': 'Cappuccino : A rich and creamy cappuccino '
                                   'made with freshly brewed espresso, steamed '
                                   'milk, and a frothy milk cap. This '
                                   'delightful drink offers a perfect balance '
                                   'of bold coffee flavor and smooth milk, '
                                   'making it an ideal companion for relaxing '
                                   'mornings or lively conversations. -- '
                                   "Ingredients: ['Espresso', 'Steamed Milk', "
                                   "'Milk Foam'] -- Price: 4.5 -- rating: 4.7"},
              'score': 0.632982075,
              'values': []},
             {'id': 'Sugar Free Vanilla syrup',
              'metadata': {'text': 'Sugar Free Vanilla syrup : Enjoy the sweet '
                                   'flavor of vanilla without the 