In [1]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install unsloth torch pandas duckduckgo_search



In [3]:
from unsloth import FastLanguageModel
from transformers import AutoTokenizer

def load_llama_model():
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "unsloth/llama-3-8b-Instruct-bnb-4bit",  # or whichever you're using
        max_seq_length = 2048,
        dtype = torch.float16,  # ✅ explicitly set dtype
        load_in_4bit = True,
    )
    model.eval()
    return model, tokenizer


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
import os
import pandas as pd
import torch
from duckduckgo_search import DDGS



device = "cuda" if torch.cuda.is_available() else "cpu"
if device != "cuda":
    print("⚠️ No GPU found. This may be slow or cause issues with large models.")

model, tokenizer = load_llama_model()

def duckduckgo_density_search(ingredient: str) -> str:
    clean_ingredient = ingredient.strip()
    if clean_ingredient.startswith("[") and clean_ingredient.endswith("]"):
        clean_ingredient = clean_ingredient[1:-1].replace("'", "").strip()

    query = f"{clean_ingredient} density in g/ml OR {clean_ingredient} grams per cup"
    try:
        with DDGS() as ddgs:
            results = ddgs.text(query, max_results=3)
            for r in results:
                snippet = r.get("body", "")
                if "g" in snippet and any(unit in snippet for unit in ["ml", "cup", "grams"]):
                    return snippet
    except Exception as e:
        print(f"[❌] DuckDuckGo search failed: {e}")
    return "NOT_FOUND"

def process_ingredient_list(ingredient_text: str, recipe_steps: str) -> tuple[str, str]:
    """
    First prompt: process ingredients with recipe steps to generate:
      - Standard format (e.g., "2 cups flour, 1 tsp salt")
      - Metric conversion (e.g., "240g flour, 5g salt")
    If NEED_SEARCH is detected in the metric conversion, call DuckDuckGo and
    then use a secondary prompt with the density info.
    """
    prompt = f"""### Instruction:
Convert this list of ingredients into a standardized format with inferred quantities where possible, and convert them to metric. Return as two lines only.
Line 1: Standard format (e.g., "2 cups flour, 1 tsp salt")
Line 2: Metric format (e.g., "240g flour, 5g salt")
If any metric data is missing, mention "NEED_SEARCH" in its place.

### Example:
Input: "flour, salt"
Recipe Steps: "Mix ingredients and bake."
Output:
"1 cup flour, 1/2 tsp salt"
"240g flour, 2.5g salt"

### Ingredients:
{ingredient_text}

### Recipe Steps:
{recipe_steps}

### Response:
"""
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=200,
        do_sample=True,
        temperature=0.7,
        eos_token_id=tokenizer.eos_token_id
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

    if "### Response:" in response:
        response = response.split("### Response:")[-1].strip()

    lines = response.split("\n")
    if len(lines) >= 2:
        standard_line = lines[0].strip()
        metric_line = lines[1].strip()
    else:
        standard_line, metric_line = response.strip(), ""

    # If metric conversion is incomplete, perform DuckDuckGo search and a secondary prompt.
    if "NEED_SEARCH" in metric_line:
        search_info = duckduckgo_density_search(ingredient_text)
        revised_prompt = f"""### Instruction:
Based on the following ingredients, recipe steps, and additional density information,
revise the metric conversion so that missing values are filled in.
Return as one line containing the complete metric conversion.
If density data is still missing for any ingredient, mention "NEED_SEARCH".

### Ingredients:
{ingredient_text}

### Recipe Steps:
{recipe_steps}

### Additional Density Information:
{search_info}

### Revised Metric Conversion:
"""
        revised_inputs = tokenizer(revised_prompt, return_tensors="pt").to(device)
        revised_outputs = model.generate(
            **revised_inputs,
            max_new_tokens=100,
            do_sample=True,
            temperature=0.7,
            eos_token_id=tokenizer.eos_token_id
        )
        revised_response = tokenizer.decode(revised_outputs[0], skip_special_tokens=True).strip()
        if revised_response:
            metric_line = revised_response

    return standard_line, metric_line

def main():
    input_csv = "/content/drive/MyDrive/demeter/batch-401to600.csv"
    output_csv = "/content/drive/MyDrive/demeter/opBatch-401to600.csv"

    if not os.path.exists(input_csv):
        print(f"[❌] CSV not found at {input_csv}")
        return

    df = pd.read_csv(input_csv)

    if 'ingredients' not in df.columns:
        print("[❌] 'ingredients' column missing.")
        return
    if 'recipe_steps' not in df.columns:
        df['recipe_steps'] = ""

    print("[🔁] Processing ingredients...")

    df[['standard_format', 'metric_conversion']] = df.apply(
        lambda row: pd.Series(process_ingredient_list(row['ingredients'], row['recipe_steps'])),
        axis=1
    )

    df.to_csv(output_csv, index=False)
    print(f"[✅] Output saved to {output_csv}")

if __name__ == "__main__":
    main()


==((====))==  Unsloth 2025.3.19: Fast Llama patching. Transformers: 4.50.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
[🔁] Processing ingredients...
[❌] DuckDuckGo search failed: https://lite.duckduckgo.com/lite/ return None. params=None content=None data={'q': 'vanilla wafer crumbs, powdered sugar, cocoa powder, unsalted butter, cream cheese, sugar, eggs, sour cream, all-purpose flour, vanilla, salt, butterscotch chips, "hersheys semi-sweet chocolate chips", white chocolate chips, shortening density in g/ml OR vanilla wafer crumbs, powdered sugar, cocoa powder, unsalted butter, cream cheese, sugar, eggs, sour cream, all-purpose flour, vanilla, salt, but