In [7]:
import os
import pandas as pd
import numpy as np

In [8]:
data = pd.read_csv(
    "data/Food Ingredients and Recipe Dataset with Image Name Mapping.csv"
)
data.head()

Unnamed: 0.1,Unnamed: 0,Title,Ingredients,Instructions,Image_Name,Cleaned_Ingredients
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...","Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher..."
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"['2 large egg whites', '1 pound new potatoes (..."
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"['1 cup evaporated milk', '1 cup whole milk', ..."
3,3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,"['1 (¾- to 1-pound) round Italian loaf, cut in..."
4,4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"['1 teaspoon dark brown sugar', '1 teaspoon ho..."


In [9]:
# Cleanup

# drop rows that have Title with less than length 1
data = data[data["Title"].str.len() > 1]

# check the types of the column Title
# data["Title"].apply(type).value_counts()

In [10]:
from langchain.schema import Document
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Qdrant

# embedding config - using All MiniLM L6 v2
os.environ["OPENAI_API_KEY"] = "random-string"
embeddings = OpenAIEmbeddings(openai_api_base="http://localhost:8444/v1")

In [11]:
docs=[]

for index, row in data.iterrows():
    vector_embedding = embeddings.embed_query(row["Title"])
    docs.append(Document(
                page_content=row["Title"], metadata={"recipe": row["Instructions"], "image": f"{row['Image_Name']}.jpg"}
             ))
vectorstore = Qdrant.from_documents(
    docs,
    embeddings,
    url="http://localhost:6333",  # Qdrant gRPC API endpoint
    collection_name="new_recipe_collection",
)

100%|██████████| 1/1 [00:05<00:00,  5.38s/it]
100%|██████████| 1/1 [00:00<00:00,  3.54it/s]
100%|██████████| 1/1 [00:00<00:00,  3.52it/s]
100%|██████████| 1/1 [00:00<00:00,  4.68it/s]
100%|██████████| 1/1 [00:00<00:00,  3.13it/s]
100%|██████████| 1/1 [00:00<00:00,  4.06it/s]
100%|██████████| 1/1 [00:00<00:00,  4.21it/s]
100%|██████████| 1/1 [00:00<00:00,  4.27it/s]
100%|██████████| 1/1 [00:00<00:00,  4.24it/s]
100%|██████████| 1/1 [00:00<00:00,  4.07it/s]
100%|██████████| 1/1 [00:00<00:00,  4.68it/s]
100%|██████████| 1/1 [00:00<00:00,  4.32it/s]
100%|██████████| 1/1 [00:00<00:00,  4.09it/s]
100%|██████████| 1/1 [00:00<00:00,  4.47it/s]
100%|██████████| 1/1 [00:00<00:00,  4.12it/s]
100%|██████████| 1/1 [00:00<00:00,  4.62it/s]
100%|██████████| 1/1 [00:00<00:00,  4.80it/s]
100%|██████████| 1/1 [00:00<00:00,  3.79it/s]
100%|██████████| 1/1 [00:00<00:00,  4.45it/s]
100%|██████████| 1/1 [00:00<00:00,  4.71it/s]
100%|██████████| 1/1 [00:00<00:00,  4.28it/s]
100%|██████████| 1/1 [00:00<00:00,

UnexpectedResponse: Unexpected Response: 400 (Bad Request)
Raw response content:
b'{"result":null,"status":{"error":"Json deserialize error: expected value at line 1 column 547499"},"time":0.0}'

In [None]:
from langchain.llms import OpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.chat_models import ChatOpenAI

# Defining metadata fields
metadata_field_info = [
    AttributeInfo(
        name="recipe",
        description="The recipe of the dish",
        type="string",
    ),
    AttributeInfo(
        name="image",
        description="The file name of the image",
        type="string",
    ),
]
document_content_description = "Title of the recipe"

# Chat completion llm - Vicuna 7B
llm = ChatOpenAI(
    openai_api_base="http://localhost:8111/v1", max_tokens=128
)
retriever = SelfQueryRetriever.from_llm(
    llm, vectorstore, document_content_description, metadata_field_info, verbose=True
)

In [None]:
# This example only specifies a relevant query
retriever.get_relevant_documents("What are recipes with chicken, rice and beans?")