In [4]:
import os
from pinecone import Pinecone, ServerlessSpec
from dotenv import load_dotenv
import pandas as pd
import numpy as np


In [5]:
load_dotenv()

APIKEY = os.getenv("PINECONE_API_KEY")

pc = Pinecone(api_key=APIKEY)


In [6]:
index_name = "live-ai"

pc.create_index(
    name=index_name,
    dimension=1024,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"  # Must be a valid region for Pinecone serverless
    )
)


PineconeApiException: (409)
Reason: Conflict
HTTP response headers: HTTPHeaderDict({'content-type': 'text/plain; charset=utf-8', 'access-control-allow-origin': '*', 'vary': 'origin,access-control-request-method,access-control-request-headers', 'access-control-expose-headers': '*', 'x-pinecone-api-version': '2025-01', 'X-Cloud-Trace-Context': '4420ef31b5b76e276eba859494510db7', 'Date': 'Sat, 22 Feb 2025 23:31:36 GMT', 'Server': 'Google Frontend', 'Content-Length': '85', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000'})
HTTP response body: {"error":{"code":"ALREADY_EXISTS","message":"Resource  already exists"},"status":409}


In [7]:
index = pc.Index(index_name)

In [8]:
df = pd.read_csv("recipes_with_descriptions.csv")

In [9]:
df.head()

Unnamed: 0.1,Unnamed: 0,Title,Ingredients,Instructions,Image_Name,Cleaned_Ingredients,description
0,7354,Apple-Frangipane Galette,"['6 medium apples (3 pounds/1.5 kg)', 'Galette...",Preheat the oven to 375°F (190°C). Line a baki...,apple-frangipane-galette-363437,"['6 medium apples (3 pounds/1.5 kg)', 'Galette...",Apple-Frangipane Galette is a rustic yet elega...
1,1049,Eggplant Omelet (Tortang Talong),['2 large Asian eggplants (about 1/3 pound/155...,Preheat the broiler.\nLay the eggplants in a s...,eggplant-omelet-tortang-talong,['2 large Asian eggplants (about 1/3 pound/155...,"Eggplant Omelet (Tortang Talong) is a bold, sa..."
2,6989,Blue Cheese Dip,"['Kosher salt', '1 garlic clove, coarsely chop...","Sprinkle salt over garlic and chop, occasional...",blue-cheese-dip-366409,"['Kosher salt', '1 garlic clove, coarsely chop...","Blue Cheese Dip is a bold, tangy, and creamy c..."
3,243,Hawaij,"['2 1/4 teaspoons black peppercorns', '7 teasp...","Finely grind peppercorns and seeds in grinder,...",hawaij,"['2 1/4 teaspoons black peppercorns', '7 teasp...",Hawaij is a traditional Yemeni spice blend bur...
4,12825,Spiced Fritters with Maple-Vanilla Syrup,"['2 cups pure maple syrup', '1 cinnamon stick'...",Combine maple syrup and cinnamon stick in medi...,spiced-fritters-with-maple-vanilla-syrup-231315,"['2 cups pure maple syrup', '1 cinnamon stick'...",Spiced Fritters with Maple-Vanilla Syrup is a ...


In [10]:
data = []
for i, row in df.iterrows():
    vector_id=str(row["Unnamed: 0"]) if "Unamed: 0" in df.columns else str(i)
    metadata={
        "title": row["Title"],
        "ingredients": row["Ingredients"],
        "instructions": row["Instructions"],
        "image_name": row["Image_Name"],
        "cleaned_ingredients": row["Cleaned_Ingredients"],
        "description": row["description"]
    }
    data.append({
        "id": vector_id,
        "text": row["description"],
        "metadata": metadata
    })

In [11]:
embeddings = pc.inference.embed(
    model="multilingual-e5-large",
    inputs=[d["text"] for d in data],
    parameters={"input_type": "passage", "truncate": "END"}
)

In [12]:
vectors = []
for i, d in enumerate(data):
    vector_id = d["id"]
    vector_embedding = embeddings[i]["values"]
    metadata = d["metadata"]
    vectors.append((vector_id, vector_embedding, metadata))

upsert_response = index.upsert(vectors=vectors, namespace="live-ai-namespace")
print("Upsert response:", upsert_response)


Upsert response: {'upserted_count': 50}


In [None]:
# Making a query
def get_results(query):

    # Embed the query using the multilingual-e5-large model
    query_embedding = pc.inference.embed(
        model="multilingual-e5-large",
        inputs=[query],
        parameters={"input_type": "query"}
    )

    # Query the index using the embedding
    results = index.query(
        namespace="live-ai-namespace",
        vector=query_embedding[0].values,
        top_k=3,
        include_values=False,
        include_metadata=True
    )
    return results

print(get_results("I want a spicy food"))

NameError: name 'result' is not defined