# Gifty Production Worker: LLM Scoring

This notebook connects to the Gifty Internal API, fetches products that need scoring, and pushes the results back to the database. Optimized for 2x T4 GPUs.

### Setup Guide
1. Set `API_BASE_URL` to your backend URL (e.g. `https://api.gifty.gift`).
2. Set `INTERNAL_TOKEN` to match the value in your backend settings.

In [None]:
!pip -q install -U transformers accelerate bitsandbytes sentencepiece pandas tqdm requests

In [None]:
API_BASE_URL = "https://your-api-url.com" # Update this
INTERNAL_TOKEN = "default_internal_token"  # Update this

MODEL_ID = "Qwen/Qwen2.5-32B-Instruct"
MODEL_VERSION = "v1.0" # Prompt version
MODEL_TAG = "qwen2.5-32b-4bit" # Hardware/quantization tag

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

tok = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb,
    device_map="auto"
)
model.eval()

In [None]:
import torch.nn.functional as F

SYSTEM = """You are a strict giftability classifier.
First, provide a very brief reasoning in Russian or English (max 2 sentences).
Then conclude with 'Answer: GIFT' or 'Answer: NOT_GIFT'."""

def build_prompt(title, category="", merchant="", price=None):
    user = f"""Decide if this product is a good gift item for most people.
Utilitarian/chemical/spare parts -> NOT_GIFT. 
Decor/gadgets/jewelry/toys -> GIFT.

Product:
- title: {title}
- category: {category}
- merchant: {merchant}
- price: {price}
Reasoning:"""
    msgs = [
        {"role": "system", "content": SYSTEM},
        {"role": "user", "content": user},
    ]
    return tok.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)

@torch.no_grad()
def score_label(prompt_with_reasoning: str, label: str) -> float:
    full = prompt_with_reasoning + label
    enc_full = tok(full, return_tensors="pt").to(model.device)
    enc_prompt = tok(prompt_with_reasoning, return_tensors="pt").to(model.device)
    logits = model(**enc_full).logits
    prompt_len = enc_prompt["input_ids"].shape[1]
    label_ids = enc_full["input_ids"][:, prompt_len:]
    lp = 0.0
    for j in range(label_ids.shape[1]):
        token_id = label_ids[0, j].item()
        logp = F.log_softmax(logits[0, prompt_len - 1 + j, :], dim=-1)[token_id].item()
        lp += logp
    return lp

def process_one(item):
    prompt = build_prompt(item['title'], item['category'], item['merchant'], item['price'])
    inputs = tok(prompt, return_tensors="pt").to(model.device)
    out = model.generate(**inputs, max_new_tokens=80, do_sample=False, pad_token_id=tok.eos_token_id)
    gen = tok.decode(out[0][len(inputs.input_ids[0]):], skip_special_tokens=True)
    
    reasoning = gen.split("Answer:")[0].strip() if "Answer:" in gen else gen.strip()
    score_prompt = prompt + reasoning + "\nAnswer:"
    
    s_gift = score_label(score_prompt, " GIFT")
    s_not = score_label(score_prompt, " NOT_GIFT")
    p = float(torch.softmax(torch.tensor([s_not, s_gift]), dim=0)[1].item())
    
    p = round(p, 2)
    if p < 0.01: p = 0.0
    return p, reasoning

In [None]:
import requests
import time
from tqdm import tqdm

headers = {"X-Internal-Token": INTERNAL_TOKEN}

print("Starting worker loop...")
while True:
    try:
        # 1. Get tasks
        resp = requests.get(f"{API_BASE_URL}/internal/scoring/tasks?limit=20", headers=headers)
        if resp.status_code != 200:
            print(f"Error fetching tasks: {resp.status_code}")
            time.sleep(30)
            continue
            
        tasks = resp.json()
        if not tasks:
            print("No more products to score. Waiting 5 minutes...")
            time.sleep(300)
            continue
            
        print(f"Processing batch of {len(tasks)} items...")
        results = []
        for t in tqdm(tasks):
            p, reason = process_one(t)
            results.append({
                "gift_id": t['gift_id'],
                "llm_gift_score": p,
                "llm_gift_reasoning": reason,
                "llm_scoring_model": MODEL_TAG,
                "llm_scoring_version": MODEL_VERSION
            })
            
        # 2. Submit results
        s_resp = requests.post(f"{API_BASE_URL}/internal/scoring/submit", json={"results": results}, headers=headers)
        if s_resp.status_code == 200:
            print(f"Successfully updated {s_resp.json().get('updated')} items.")
        else:
            print(f"Failed to submit results: {s_resp.status_code}")
            
    except Exception as e:
        print(f"Unexpected error: {e}")
        time.sleep(30)