# Gifty Fast Scorer: Text-Based Giftability

This worker performs an initial pass through the catalog to determine basic **giftability** using only text data (`content_text`).

### Goal
Quickly filter the catalog and assign a 0.0 - 1.0 score to prioritize items for deep 10-D vision scoring later.


In [None]:
!pip -q install -U transformers accelerate bitsandbytes requests tqdm

In [None]:
import os
import json
import logging
import torch
import requests
import time
from tqdm.auto import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer

try:
    from kaggle_secrets import UserSecretsClient
    INTERNAL_TOKEN = UserSecretsClient().get_secret("INTERNAL_API_TOKEN")
except:
    INTERNAL_TOKEN = os.getenv("INTERNAL_API_TOKEN", "default_token")

# --- Configuration ---
API_BASE_URL = "https://api.giftyai.ru"
MODEL_ID = "Qwen/Qwen2-7B-Instruct" # Using text-only Qwen for speed
ENGINE_VERSION = "v1.0-fast-text-only"
BATCH_LIMIT = 20

logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
logger = logging.getLogger("FastScorer")

In [None]:
logger.info(f"Loading {MODEL_ID}...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID, 
    torch_dtype="auto", 
    device_map="auto"
)
model.eval()
logger.info("Fast Text Model Loaded.")

In [None]:
SYSTEM_PROMPT = """You are a professional gift curator.
Analyze the product details and determine if this item is a GOOD GIFT.

A good gift is:
1. Emotionally valuable or highly practical for a specific person.
2. Not a generic raw material (like raw wood) or industrial part.
3. Suitable for giving (packaging/intent).

OUTPUT FORMAT: Return ONLY a valid JSON object.
JSON Structure:
{
  "gift_score": 0.0-1.0,  // 1.0 = Perfect gift, 0.0 = Not a gift at all
  "reasoning": "Brief explanation (1 sentence)"
}
"""

def extract_json(raw_text):
    try:
        content = raw_text[raw_text.find('{'):raw_text.rfind('}')+1]
        return json.loads(content)
    except:
        return None

def process_one(item):
    context = item.get('content_text') or f"Title: {item.get('title')}\nDescription: {item.get('description')}"
    
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": f"Evaluate this product:\n{context}"}
    ]
    
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=150, do_sample=False)
    
    # Remove input ids from result
    generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    return extract_json(response)

In [None]:
headers = {"X-Internal-Token": INTERNAL_TOKEN}

logger.info("Starting Fast Text Scoring Cycle...")

while True:
    try:
        # 1. Fetch tasks
        repo_resp = requests.get(f"{API_BASE_URL}/internal/scoring/tasks?limit={BATCH_LIMIT}", headers=headers)
        if repo_resp.status_code != 200:
            logger.error(f"API Error: {repo_resp.text}")
            time.sleep(30); continue
            
        tasks = repo_resp.json()
        if not tasks:
            logger.info("All items scored. Sleeping...")
            time.sleep(3600); continue
            
        logger.info(f"Syncing {len(tasks)} items...")
        results = []
        for t in tasks:
            try:
                data = process_one(t)
                if data:
                    results.append({
                        "gift_id": t['gift_id'],
                        "llm_gift_score": data.get('gift_score', 0.5),
                        "llm_gift_reasoning": data.get('reasoning', ''),
                        "llm_scoring_model": MODEL_ID,
                        "llm_scoring_version": ENGINE_VERSION
                    })
            except Exception as e: logger.error(f"Item {t['gift_id']} failed: {e}")
            
        # 2. Submit
        if results:
            requests.post(f"{API_BASE_URL}/internal/scoring/submit", json={"results": results}, headers=headers)
            logger.info(f"Submitted {len(results)} items.")
            
    except Exception as e:
        logger.error(f"Global Error: {e}")
        time.sleep(60)