In [None]:
import re
import math
from collections import Counter

def tokenize(text):
    return re.findall(r"[a-zA-Z0-9']+", text.lower())

def cosine_similarity(a, b):
    a, b = Counter(a), Counter(b)
    dot = sum(a[k] * b.get(k, 0) for k in a)
    norm_a = math.sqrt(sum(v*v for v in a.values()))
    norm_b = math.sqrt(sum(v*v for v in b.values()))
    return dot / (norm_a * norm_b) if norm_a and norm_b else 0.0

def extract_keywords(text):
    stopwords = {"the","is","are","of","to","for","and","or","with","my","i","you"}
    return {t for t in tokenize(text) if t not in stopwords}

def completeness_score(user_text, ai_text):
    user_keys = extract_keywords(user_text)
    ai_tokens = set(tokenize(ai_text))
    return len(user_keys & ai_tokens) / len(user_keys) if user_keys else 1.0

def extract_entities(text):
    return re.findall(r"\b[A-Z][a-zA-Z]+\b", text)

def extract_numbers(text):
    return re.findall(r"\b\d+(?:,\d+)*\b", text)

def hallucination_score(answer, context):
    ans_entities = set(extract_entities(answer))
    ctx_entities = set(extract_entities(context))

    ans_nums = set(extract_numbers(answer))
    ctx_nums = set(extract_numbers(context))

    entity_match = len(ans_entities & ctx_entities) / (len(ans_entities) or 1)
    number_match = len(ans_nums & ctx_nums) / (len(ans_nums) or 1)

    return 1 - (0.6 * entity_match + 0.4 * number_match)

def estimate_tokens(text):
    return len(tokenize(text))

def estimate_cost(tokens, price_per_token=0.000002):
    return tokens * price_per_token
