In [None]:
import pandas as pd
import ast
from collections import Counter

file_path = "/content/drive/MyDrive/dataset/full_dataset.csv"
df = pd.read_csv(file_path)

df["ingredients"] = df["ingredients"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)


ingredient_counter = Counter()
for ingredients in df["ingredients"]:
    ingredient_counter.update(ingredients)


print("🔹 Top 50 most common ingredients:")
print(ingredient_counter.most_common(50))

print(f"✅ Dataset contains {len(df)} recipes with {df['title'].nunique()} unique dish names.")


🔹 Top 50 most common ingredients:
[('2 eggs', 95855), ('1 tsp. vanilla', 85285), ('1/2 tsp. salt', 80645), ('1 tsp. salt', 80373), ('1 egg', 77876), ('1/2 teaspoon salt', 70412), ('1 c. sugar', 65050), ('1 teaspoon salt', 63534), ('1/4 teaspoon salt', 47646), ('2 c. sugar', 44173), ('salt and pepper', 44054), ('3 eggs', 43764), ('4 eggs', 42933), ('1/4 tsp. salt', 39618), ('salt', 39402), ('salt and pepper to taste', 34547), ('1/2 c. sugar', 33990), ('2 tablespoons olive oil', 32874), ('1 teaspoon vanilla extract', 32555), ('1 tablespoon olive oil', 30824), ('1 cup sugar', 28603), ('2 c. flour', 28130), ('1 c. milk', 27001), ('1 tsp. cinnamon', 24237), ('1/2 c. milk', 23178), ('1 teaspoon vanilla', 23052), ('2 tablespoons butter', 22780), ('1 can cream of mushroom soup', 22422), ('1/4 tsp. pepper', 22329), ('1 teaspoon baking soda', 21971), ('1 tsp. baking soda', 21285), ('pepper', 21183), ('1 tsp. soda', 20706), ('1 medium onion, chopped', 20668), ('1 tsp. baking powder', 20313), ('1/

In [None]:
ingredient_counter = Counter()
for ingredients in df["ingredients"]:
    ingredient_counter.update(ingredients)

def filter_recipe(ingredients):
    common_count = sum(1 for ing in ingredients if ing in ingredient_counter)
    return 2 <= common_count <= 15  # Ensures balance (not too generic or too niche)

df_filtered = df[df["ingredients"].apply(filter_recipe)]

df_sampled = df_filtered.sample(n=50000, random_state=42)

df_sampled.to_csv("filtered_recipenlg_50k.csv", index=False)

print(f"✅ Dataset reduced to {len(df_sampled)} rows and saved as filtered_recipenlg_50k.csv")


✅ Dataset reduced to 50000 rows and saved as filtered_recipenlg_50k.csv


In [None]:
df = pd.read_csv("/content/drive/MyDrive/dataset/filtered_recipenlg_50k.csv")


if "id" not in df.columns:
    df.insert(0, "id", range(1, len(df) + 1))

df.to_csv("filtered_recipenlg_50k.csv", index=False)

print("✅ 'id' column added and dataset saved!")

✅ 'id' column added and dataset saved!


In [None]:
import pandas as pd
import requests
import numpy as np
import time
import json
import os

file_path = "/content/filtered_recipenlg_50k.csv"
df = pd.read_csv(file_path)

JINA_API_URL = "https://api.jina.ai/v1/embeddings"
HEADERS = {
    "Content-Type": "application/json",
    "Authorization": "Bearer **"
}

df['recipe_text'] = df['title'] + " " + df['ingredients'] + " " + df['directions']

EMBEDDINGS_FILE = "recipe_embeddings.npy"
IDS_FILE = "recipe_ids.npy"
LOG_FILE = "processed_batches.txt"

if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(IDS_FILE):
    embeddings_list = list(np.load(EMBEDDINGS_FILE).tolist())
    recipe_ids = list(np.load(IDS_FILE).tolist())
    processed_batches = set(map(int, open(LOG_FILE).read().split())) if os.path.exists(LOG_FILE) else set()
else:
    embeddings_list = []
    recipe_ids = []
    processed_batches = set()


def get_embeddings(text_list):
    data = {
        "model": "jina-clip-v2",
        "dimensions": 1024,
        "normalized": True,
        "embedding_type": "float",
        "input": [{"text": text} for text in text_list]
    }

    response = requests.post(JINA_API_URL, headers=HEADERS, json=data)

    if response.status_code == 200:
        return json.loads(response.text)["data"]
    else:
        print(f"❌ Error: {response.status_code} - {response.text}")
        return None


BATCH_SIZE = 100
MAX_BATCHES = 50

for i in range(0, BATCH_SIZE * MAX_BATCHES, BATCH_SIZE):
    batch_num = i // BATCH_SIZE + 1

    if batch_num > MAX_BATCHES:
        break

    if batch_num in processed_batches:
        print(f"✅ Skipping batch {batch_num}, already processed.")
        continue

    batch_texts = df['recipe_text'][i:i+BATCH_SIZE].tolist()
    batch_ids = df['id'][i:i+BATCH_SIZE].tolist()

    print(f"🔹 Processing batch {batch_num}...")

    batch_embeddings = get_embeddings(batch_texts)

    if batch_embeddings:
        embeddings_list.extend(batch_embeddings)
        recipe_ids.extend(batch_ids)

        np.save(EMBEDDINGS_FILE, np.array(embeddings_list))
        np.save(IDS_FILE, np.array(recipe_ids))

        with open(LOG_FILE, "a") as f:
            f.write(f"{batch_num}\n")

        print(f"✅ Batch {batch_num} saved successfully!")

    time.sleep(2)

print("✅ Embedding process complete for 50 batches!")


🔹 Processing batch 1...
✅ Batch 1 saved successfully!
🔹 Processing batch 2...
✅ Batch 2 saved successfully!
🔹 Processing batch 3...
✅ Batch 3 saved successfully!
🔹 Processing batch 4...
✅ Batch 4 saved successfully!
🔹 Processing batch 5...
✅ Batch 5 saved successfully!
🔹 Processing batch 6...
✅ Batch 6 saved successfully!
🔹 Processing batch 7...
✅ Batch 7 saved successfully!
🔹 Processing batch 8...
✅ Batch 8 saved successfully!
🔹 Processing batch 9...
✅ Batch 9 saved successfully!
🔹 Processing batch 10...
✅ Batch 10 saved successfully!
🔹 Processing batch 11...
✅ Batch 11 saved successfully!
🔹 Processing batch 12...
✅ Batch 12 saved successfully!
🔹 Processing batch 13...
✅ Batch 13 saved successfully!
🔹 Processing batch 14...
✅ Batch 14 saved successfully!
🔹 Processing batch 15...
✅ Batch 15 saved successfully!
🔹 Processing batch 16...
✅ Batch 16 saved successfully!
🔹 Processing batch 17...
✅ Batch 17 saved successfully!
🔹 Processing batch 18...
✅ Batch 18 saved successfully!
🔹 Processi

In [None]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m67.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [None]:
import numpy as np

embeddings = np.load("/content/drive/MyDrive/dataset/recipe_embeddings.npy", allow_pickle=True)

print("✅ Embeddings shape:", embeddings.shape)  # Check shape

✅ Embeddings shape: (5000, 1024)


In [None]:
import numpy as np

embeddings = np.load("/content/drive/MyDrive/dataset/recipe_embeddings.npy", allow_pickle=True)

embeddings = np.vstack(embeddings)

np.save("recipe_embeddings.npy", embeddings)

print("✅ Fixed Embeddings Shape:", embeddings.shape)


✅ Fixed Embeddings Shape: (5000, 1024)


In [None]:
import numpy as np

embeddings = np.load("recipe_embeddings.npy", allow_pickle=True)

for i in range(5):
    print(f"Entry {i}: Type -> {type(embeddings[i])}, Value -> {embeddings[i]}")


Entry 0: Type -> <class 'numpy.ndarray'>, Value -> [ 0.05629175  0.0982436  -0.00143876 ... -0.00510287 -0.01075493
  0.00813103]
Entry 1: Type -> <class 'numpy.ndarray'>, Value -> [ 0.17123969  0.13887526 -0.02934685 ...  0.00671081 -0.01772417
  0.03442037]
Entry 2: Type -> <class 'numpy.ndarray'>, Value -> [0.07313133 0.02419892 0.00527012 ... 0.00994329 0.00991657 0.00239227]
Entry 3: Type -> <class 'numpy.ndarray'>, Value -> [ 0.01256496  0.13852888 -0.00968193 ...  0.00173453  0.0113884
  0.00850088]
Entry 4: Type -> <class 'numpy.ndarray'>, Value -> [-0.03603871  0.08283365 -0.02122657 ...  0.00075825  0.00271362
  0.00188614]


In [None]:
import numpy as np

raw_embeddings = np.load("/content/drive/MyDrive/dataset/recipe_embeddings.npy", allow_pickle=True)

for i in range(5):
    print(f"Entry {i}: Type -> {type(raw_embeddings[i])}, Value -> {raw_embeddings[i]}")


Entry 0: Type -> <class 'numpy.ndarray'>, Value -> [ 0.05629175  0.0982436  -0.00143876 ... -0.00510287 -0.01075493
  0.00813103]
Entry 1: Type -> <class 'numpy.ndarray'>, Value -> [ 0.17123969  0.13887526 -0.02934685 ...  0.00671081 -0.01772417
  0.03442037]
Entry 2: Type -> <class 'numpy.ndarray'>, Value -> [0.07313133 0.02419892 0.00527012 ... 0.00994329 0.00991657 0.00239227]
Entry 3: Type -> <class 'numpy.ndarray'>, Value -> [ 0.01256496  0.13852888 -0.00968193 ...  0.00173453  0.0113884
  0.00850088]
Entry 4: Type -> <class 'numpy.ndarray'>, Value -> [-0.03603871  0.08283365 -0.02122657 ...  0.00075825  0.00271362
  0.00188614]


In [None]:
import numpy as np

raw_embeddings = np.load("/content/drive/MyDrive/dataset/recipe_embeddings.npy", allow_pickle=True)

fixed_embeddings = np.vstack(raw_embeddings)

np.save("recipe_embeddings.npy", fixed_embeddings)

print("✅ Fixed Embeddings Shape:", fixed_embeddings.shape)

✅ Fixed Embeddings Shape: (5000, 1024)


In [None]:
if "id" not in df.columns:
    df.insert(0, "id", range(1, len(df) + 1))  

df.to_csv("/content/drive/MyDrive/dataset/filtered_recipenlg_50k.csv", index=False)

print("✅ 'id' column added and dataset saved!")

✅ 'id' column added and dataset saved!


In [None]:
import faiss
import numpy as np
import pandas as pd
import requests
import json

embeddings = np.load("/content/recipe_embeddings.npy", allow_pickle=True)
recipe_ids = np.load("/content/drive/MyDrive/dataset/recipe_ids.npy", allow_pickle=True)


df = pd.read_csv("/content/drive/MyDrive/dataset/filtered_recipenlg_50k.csv")

df = df[df["id"].isin(recipe_ids)]

sorted_indices = np.argsort(recipe_ids)
recipe_ids = recipe_ids[sorted_indices]
embeddings = embeddings[sorted_indices]
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)


JINA_API_URL = "https://api.jina.ai/v1/embeddings"
HEADERS = {
    "Content-Type": "application/json",
    "Authorization": "Bearer **"
}


def get_query_embedding(text):
    data = {
        "model": "jina-clip-v2",
        "dimensions": 1024,
        "normalized": True,
        "embedding_type": "float",
        "input": [{"text": text}]
    }

    response = requests.post(JINA_API_URL, headers=HEADERS, json=data)

    if response.status_code == 200:
        response_data = json.loads(response.text)

        embedding_vector = np.array(response_data["data"][0]["embedding"], dtype=np.float32)

        return embedding_vector
    else:
        print(f"❌ Error: {response.status_code} - {response.text}")
        return None


def retrieve_similar_recipes(user_query, k=5):
    query_embedding = get_query_embedding(user_query)

    if query_embedding is None:
        print("❌ Error generating query embedding. Try again.")
        return None

    query_embedding = query_embedding.reshape(1, -1)
    distances, indices = index.search(query_embedding, k)

    matched_ids = [recipe_ids[i] for i in indices[0] if i < len(recipe_ids)]

    results = df[df["id"].isin(matched_ids)]
    return results[["title", "ingredients", "directions"]]

user_input = input("Enter ingredients: ")
similar_recipes = retrieve_similar_recipes(user_input, k=5)

print("🔹 Top Matching Recipes:")
print(similar_recipes)


Enter ingredients: chicken. rice
🔹 Top Matching Recipes:
                           title  \
870   Chicken And Rice Casserole   
1124            Chicken And Rice   
1871      Baked Chicken And Rice   
2347            Chicken And Rice   
2503            Chicken And Rice   

                                            ingredients  \
870   ['4 c. cooked chicken', '1/4 c. onion', '2 Tbs...   
1124  ['chicken pieces', '1 c. long grain rice', '2 ...   
1871  ['whole chicken, cut up into pieces', '1 chopp...   
2347  ['1 1/3 c. Minute rice', '2 cans (10 3/4 oz.) ...   
2503  ['1 c. rice*', 'chicken pieces*', '1 can water...   

                                             directions  
870   ["Simmer the onion in butter.", "Add chicken; ...  
1124  ["There is no salt in this recipe.", "Scatter ...  
1871  ["Place chicken pieces in a greased baking dis...  
2347  ["Mix all", "ingredients except chicken and pa...  
2503  ["Grease 9 x 13-inch pan.", "Pour in rice and ...  


In [None]:
!pip install groq

Collecting groq
  Downloading groq-0.18.0-py3-none-any.whl.metadata (14 kB)
Downloading groq-0.18.0-py3-none-any.whl (121 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/121.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.9/121.9 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-0.18.0


In [None]:
from groq import Groq
import pandas as pd


client = Groq(api_key="**")

def generate_recipe(user_query, retrieved_recipes):
    retrieved_text = "\n\n".join(
        f"Title: {row['title']}\nIngredients: {row['ingredients']}\nDirections: {row['directions']}"
        for _, row in retrieved_recipes.iterrows()
    )

    prompt = f"""
    The user wants a recipe with the following ingredients: {user_query}.

    Here are some similar recipes:
    {retrieved_text}

    Based on these, generate a **new recipe** that:
    - Uses the user-provided ingredients.
    - Follows a structured format: **Title, Ingredients, Directions**.
    - Can be adjusted for dietary needs if required.
    """

    completion = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[
            {"role": "system", "content": "You are an expert chef AI."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.7,
        max_completion_tokens=1024,
        top_p=1,
        stream=True,
        stop=None,
    )

    generated_text = ""
    for chunk in completion:
        generated_text += chunk.choices[0].delta.content or ""

    return generated_text

user_input = input("Enter ingredients: ")
retrieved_recipes = retrieve_similar_recipes(user_input, k=5)

if retrieved_recipes is not None:
    generated_recipe = generate_recipe(user_input, retrieved_recipes)
    print("🔹 AI-Generated Recipe:")
    print(generated_recipe)
else:
    print("❌ No recipes retrieved. Try again.")



Enter ingredients: chicken , rice
🔹 AI-Generated Recipe:
**Simple Chicken and Rice Bake**
Ingredients:
- 1 1/2 cups uncooked white or brown rice
- 2 lbs boneless, skinless chicken breast or thighs, cut into 1-inch pieces
- 2 cups chicken broth
- 1 tablespoon olive oil
- Salt and pepper to taste
- Optional: garlic powder, paprika, or other spices of your choice

Directions:
1. Preheat your oven to 375°F (190°C).
2. In a large mixing bowl, combine the rice, chicken broth, and a pinch of salt. Stir well to combine.
3. In a separate bowl, toss the chicken pieces with olive oil, salt, pepper, and any desired spices (like garlic powder or paprika) until the chicken is evenly coated.
4. In a 9x13-inch baking dish, create a layer of the rice mixture. You can use all of it or save some for the top, depending on your preference.
5. Add the chicken pieces on top of the rice layer. If you saved some rice, you can add it on top of the chicken.
6. Cover the baking dish with aluminum foil and bake fo