In [None]:
# uploading backend requirements file to Colab
from google.colab import files
uploaded = files.upload()

In [None]:
!pip install -r requirements.txt

In [None]:
!pip install -q bitsandbytes accelerate transformers sentencepiece --quiet

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

# 4-bit quantization for Zephyr-7B-β
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

model_id = "HuggingFaceH4/zephyr-7b-beta"

tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=quantization_config,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)

print("Zephyr-7B-β loaded — ready for recipes!")

In [None]:

def generate_recipe(ingredients):
    ings = ", ".join(ingredients)
    prompt = f"""<|system|>
You are a world-class creative chef. Generate ONE beautiful, detailed recipe using ONLY these ingredients: {ings}
Include:
- A catchy, creative title
- Full ingredients with realistic quantities
- 5–8 numbered steps with clear instructions
- Cooking time and servings
Do NOT repeat. Do NOT add extra ingredients.</|system|>
<|user|>
Generate the recipe now.</|user|>
<|assistant|>"""

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.9,
        repetition_penalty=1.2,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id,
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("<|assistant|>")[-1].strip()

In [None]:
generate_recipe(['chicken', 'garlic', 'lemon', 'bellpepper', 'potato'])

In [None]:
import nest_asyncio
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from pyngrok import ngrok, conf
import uvicorn

nest_asyncio.apply()

# ngrok token 
conf.get_default().auth_token = "Your_Ngrok_Auth_Token_Here"

# FastAPI app setup
app = FastAPI(title="AI Chef Pro — Zephyr-7B")
# CORS is needed for frontend to connect from localhost
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

print("Loading Zephyr-7B-β in 4-bit (takes ~90 seconds on first run)...")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)


MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=quant_config,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=False  
)


if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print("ZEPHYR-7B-β LOADED")

# Pydantic models for request/response
class RecipeRequest(BaseModel):
    ingredients: list[str]

class RecipeResponse(BaseModel):
    recipe: str
    status: str = "success"

def generate_recipe(ingredients: list[str]) -> str:
    ings = ", ".join(ingredients)

    prompt = f"""<|system|>
You are a world-renowned creative chef. Create ONE stunning, restaurant-quality recipe using ONLY these ingredients: {ings}

Requirements:
- Give a catchy, creative title
- Include servings and total cooking time
- List ingredients with realistic quantities
- Write 5–8 clear, detailed numbered steps
- Optional: add a plating suggestion
- NEVER repeat yourself
- NEVER add ingredients not listed
</|system|>
<|user|>
Generate the recipe now.
</|user|>
<|assistant|>"""

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=600,  
        temperature=0.75,  
        top_p=0.90,
        repetition_penalty=1.25,  
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id
    )

    full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # extract only the recipe part, ignore the prompt
    recipe_text = full_response.split("<|assistant|>")[-1].strip()

    return recipe_text

# API Endpoints
@app.get("/")
def root():
    return {"message": "AI Chef Pro with Zephyr-7B is running!", "model": "Zephyr-7B-β"}

@app.get("/health")
def health():
    return {"status": "healthy", "model": "Zephyr-7B-β"}

@app.post("/api/generate-recipe", response_model=RecipeResponse)
async def create_recipe(request: RecipeRequest):
    # basic validation
    if not request.ingredients or len(request.ingredients) == 0:
        raise HTTPException(status_code=400, detail="Please provide at least one ingredient")

    recipe = generate_recipe(request.ingredients)
    return RecipeResponse(recipe=recipe)

# Start server + ngrok
if __name__ == "__main__":
    PORT = 8000

    print("\n" + "═"*85)
    print("STARTING AI CHEF PRO WITH ZEPHYR-7B-β")
    print("═"*85)

    # create ngrok tunnel so frontend can access from local machine
    tunnel = ngrok.connect(PORT)
    public_url = f"{tunnel.public_url}/api/generate-recipe"

    print(f"\nPUBLIC API URL → {public_url}")
    print("═"*85)

    # auto-copy URL to clipboard 
    try:
        from google.colab import output
        output.eval_js(f'navigator.clipboard.writeText("{public_url}")')
        print("URL AUTO-COPIED TO CLIPBOARD! Just press Ctrl+V in your frontend!")
    except:
        print("Copy the URL above manually")

    print("═"*85)
    print("AI CHEF PRO IS LIVE — TIME TO COOK!")
    print("═"*85)

    # run the server
    uvicorn.run(app, host="0.0.0.0", port=PORT, log_level="info")