<a href="https://colab.research.google.com/github/AnamariaVLR/noura-rag/blob/main/NOURA_RAG_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# NOURA - Cell 1: Verify environment
print("NOURA is starting...")
print("Python ready")

NOURA is starting...
Python ready


In [None]:
# NOURA - Cell 2: Scoring methodology (NOURA core IP)

EVIDENCE_HIERARCHY = {
    "systematic_review_meta_analysis": {"base_weight": 1.00, "requires_independence_check": True},
    "rct":                             {"base_weight": 0.85, "requires_independence_check": True},
    "regulatory_opinion":              {"base_weight": 0.75, "requires_independence_check": False},
    "observational_cohort":            {"base_weight": 0.60, "requires_independence_check": True},
    "ewg_hazard":                      {"base_weight": 0.50, "requires_dose_adjustment": True},
    "cosing_regional":                 {"base_weight": 0.45, "requires_independence_check": False},
    "in_vitro":                        {"base_weight": 0.30, "requires_independence_check": False},
    "clinical_case":                   {"base_weight": 0.15, "requires_independence_check": False},
    "expert_opinion":                  {"base_weight": 0.10, "requires_independence_check": True},
}

INDUSTRY_FUNDING_PENALTY = 0.20
HEALTH_HARD_BLOCK = 50
PLANET_FLAG_THRESHOLD = 40

SUFFICIENCY_CAPS = {
    "only_in_vitro_or_case":   60,
    "only_regulatory_strong":  70,
    "only_regulatory_weak":    50,
    "single_rct":              80,
}

CATEGORY_CLAIM_REQUIREMENTS = {
    "skincare": {
        "hydration":   {"min_evidence": "rct", "min_studies": 1},
        "anti_aging":  {"min_evidence": "rct", "min_studies": 2},
        "brightening": {"min_evidence": "observational_cohort", "min_studies": 1},
        "acne":        {"min_evidence": "rct", "min_studies": 2},
    }
}

print("Scoring methodology loaded")
print(f"Evidence sources defined: {len(EVIDENCE_HIERARCHY)}")
print(f"Health hard block threshold: {HEALTH_HARD_BLOCK}")

Scoring methodology loaded
Evidence sources defined: 9
Health hard block threshold: 50


In [None]:
# NOURA - Cell 3: Scoring engine

def evaluate_evidence(source_type, industry_funded=False, dose_adjusted=True):
    if source_type not in EVIDENCE_HIERARCHY:
        return {"weight": 0, "source_type": source_type, "flags": [f"Unknown source type: {source_type}"]}

    weight = EVIDENCE_HIERARCHY[source_type]["base_weight"]
    flags = []

    if industry_funded and EVIDENCE_HIERARCHY[source_type].get("requires_independence_check"):
        weight = weight * (1 - INDUSTRY_FUNDING_PENALTY)
        flags.append("Industry-funded study: weight reduced 20%")

    if source_type == "ewg_hazard" and not dose_adjusted:
        weight = 0
        flags.append("EWG score excluded: not adjusted for actual product concentration")

    return {"source_type": source_type, "weight": round(weight, 3), "flags": flags}


def calculate_health_score(evaluated_evidence, prohibited=False):
    if prohibited:
        return {
            "score": 0,
            "verdict": "HIGHER RISK",
            "flag": "Ingredient prohibited by regulatory authority",
            "evidence_situation": "regulatory_block"
        }

    if not evaluated_evidence:
        return {
            "score": None,
            "verdict": "INSUFFICIENT DATA",
            "flag": "No scientific evidence retrieved for this ingredient",
            "evidence_situation": "no_evidence"
        }

    source_types = [e["source_type"] for e in evaluated_evidence]
    only_lab = all(t in {"in_vitro", "clinical_case"} for t in source_types)
    only_regulatory = all(t in {"regulatory_opinion", "cosing_regional"} for t in source_types)

    # FIXED FORMULA: use top 3 highest-weight studies, not average of all
    # This prevents weak studies from dragging down a strong evidence base
    weights = sorted([e["weight"] for e in evaluated_evidence], reverse=True)
    top_weights = weights[:3]
    score_raw = round((sum(top_weights) / len(top_weights)) * 100, 1)

    # Apply sufficiency caps
    if only_lab:
        score = min(score_raw, SUFFICIENCY_CAPS["only_in_vitro_or_case"])
        flag = "Health score capped at 60 — only lab-based evidence retrieved; human clinical data insufficient"
        situation = "only_lab"
    elif only_regulatory:
        score = min(score_raw, SUFFICIENCY_CAPS["only_regulatory_strong"])
        flag = "Health score capped at 70 — regulatory approval present but no clinical studies retrieved"
        situation = "only_regulatory"
    else:
        score = score_raw
        flag = None
        situation = "sufficient"

    # Assign verdict
    if score is None or score < HEALTH_HARD_BLOCK:
        verdict = "HIGHER RISK"
    elif score >= 71:
        verdict = "WELL SUPPORTED"
    elif score >= 41:
        verdict = "LIMITED SUPPORT"
    else:
        verdict = "HIGHER RISK"

    return {
        "score": score,
        "verdict": verdict,
        "flag": flag,
        "evidence_situation": situation
    }

print("Scoring engine loaded")

Scoring engine loaded


In [None]:
# NOURA - Cell 4: PubMed connection with abstract retrieval (50 studies)
import requests
import time
import xml.etree.ElementTree as ET
import re

def search_pubmed(ingredient, max_results=50):
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"

    time.sleep(1)

    # Step 1: Search for IDs
    search_response = requests.get(
        f"{base_url}esearch.fcgi",
        params={
            "db": "pubmed",
            "term": ingredient + "[Title]",
            "retmax": max_results,
            "retmode": "json",
            "sort": "relevance"
        }
    )

    search_data = search_response.json()

    if "esearchresult" not in search_data:
        return {"ingredient": ingredient, "studies_found": 0, "studies": [],
                "error": "PubMed rate limit — try again in 30 seconds"}

    ids = search_data["esearchresult"]["idlist"]

    if not ids:
        return {"ingredient": ingredient, "studies_found": 0, "studies": []}

    time.sleep(1)

    # Step 2: Fetch abstracts via XML
    fetch_response = requests.get(
        f"{base_url}efetch.fcgi",
        params={
            "db": "pubmed",
            "id": ",".join(ids),
            "retmode": "xml",
            "rettype": "abstract"
        }
    )

    time.sleep(1)

    # Step 3: Fetch metadata
    summary_response = requests.get(
        f"{base_url}esummary.fcgi",
        params={"db": "pubmed", "id": ",".join(ids), "retmode": "json"}
    )
    summary_data = summary_response.json()

    if "result" not in summary_data:
        return {"ingredient": ingredient, "studies_found": 0, "studies": []}

    # Step 4: Parse abstracts and sample sizes from XML
    abstracts = {}
    sample_sizes = {}

    try:
        root = ET.fromstring(fetch_response.content)
        for article in root.findall(".//PubmedArticle"):
            pmid_el = article.find(".//PMID")
            if pmid_el is None:
                continue
            pmid = pmid_el.text

            abstract_texts = article.findall(".//AbstractText")
            if abstract_texts:
                abstract = " ".join([el.text or "" for el in abstract_texts])
                abstracts[pmid] = abstract[:600]

            full_text = " ".join([el.text or "" for el in article.findall(".//AbstractText")])
            size_matches = re.findall(
                r'\b(\d+)\s*(?:patients|participants|subjects|women|men|volunteers|individuals|adults)',
                full_text, re.IGNORECASE
            )
            if size_matches:
                sample_sizes[pmid] = max([int(x) for x in size_matches])
    except Exception:
        pass

    # Step 5: Build studies list
    studies = []
    for uid in ids:
        article = summary_data["result"].get(uid, {})
        if isinstance(article, dict) and "title" in article:
            studies.append({
                "id": uid,
                "title": article.get("title", ""),
                "abstract": abstracts.get(uid, ""),
                "sample_size": sample_sizes.get(uid, None),
                "year": article.get("pubdate", "")[:4],
                "source": "PubMed",
                "pubmed_url": f"https://pubmed.ncbi.nlm.nih.gov/{uid}/"
            })

    return {"ingredient": ingredient, "studies_found": len(studies), "studies": studies}

print("PubMed connection ready — up to 50 studies with abstracts")

PubMed connection ready — up to 50 studies with abstracts


In [None]:
# NOURA - Cell 5: Evidence classifier

def classify_evidence_type(title):
    title_lower = title.lower()

    if any(w in title_lower for w in ["meta-analysis", "systematic review", "cochrane"]):
        return "systematic_review_meta_analysis"

    elif any(w in title_lower for w in ["randomized", "rct", "controlled trial", "double-blind", "clinical trial"]):
        return "rct"

    elif any(w in title_lower for w in ["cohort", "observational", "prospective", "retrospective", "epidemiolog"]):
        return "observational_cohort"

    elif any(w in title_lower for w in ["guideline", "regulatory", "safety assessment", "efsa", "fda", "final report"]):
        return "regulatory_opinion"

    elif any(w in title_lower for w in ["review", "overview", "narrative", "update", "current evidence", "mechanisms of action", "mechanistic", "applications of"]):
        return "observational_cohort"  # Reviews treated as observational — higher than in_vitro

    elif any(w in title_lower for w in ["in vitro", "cell culture", "in-vitro"]):
        return "in_vitro"

    elif any(w in title_lower for w in ["case report", "case study"]):
        return "clinical_case"

    else:
        return "in_vitro"

print("Evidence classifier loaded")

Evidence classifier loaded


In [None]:
# NOURA - Cell 6: Full evaluation pipeline

def noura_evaluate(ingredient, category="skincare"):

    # Step 1: Search PubMed
    pubmed_results = search_pubmed(ingredient, max_results=10)

    # Step 2: Classify and evaluate each study
    evaluated = []
    evidence_count = {}

    for study in pubmed_results.get("studies", []):
        source_type = classify_evidence_type(study["title"])
        ev = evaluate_evidence(source_type)
        ev["study_title"] = study["title"][:80]
        ev["year"] = study["year"]
        evaluated.append(ev)
        evidence_count[source_type] = evidence_count.get(source_type, 0) + 1

    # Step 3: Calculate score
    result = calculate_health_score(evaluated)

    # Step 4: Build evidence summary
    evidence_str = " + ".join([f"{v} {k.replace('_', ' ')}"
                                for k, v in evidence_count.items()])

    # Step 5: Display NOURA assessment
    print(f"NOURA Health Assessment: {ingredient.title()} ({category})")
    print("=" * 65)
    print(f"Score:   {result['score']}/100")
    print(f"Verdict: {result['verdict']}")
    print()
    print(f"Studies retrieved:  {pubmed_results['studies_found']} (PubMed)")
    print(f"Evidence types:     {evidence_str if evidence_str else 'None'}")
    print()

    if result['flag']:
        print(f"Note: {result['flag']}")
        print()

    print("Evidence breakdown:")
    for e in evaluated:
        print(f"  [{e['year']}] {e['source_type'].replace('_', ' ')} "
              f"(weight: {e['weight']}) — {e['study_title']}...")

    print()
    print("What would you like next?")
    print("  - View full source links")
    print("  - Compare with alternatives")
    print("  - Check regulatory status")
    print("  - Assess another ingredient")
    print("=" * 65)
    print()

    return result


# Test
noura_evaluate("niacinamide")
noura_evaluate("parabens", category="skincare")

NOURA Health Assessment: Niacinamide (skincare)
Score:   65.0/100
Verdict: LIMITED SUPPORT

Studies retrieved:  10 (PubMed)
Evidence types:     4 observational cohort + 5 in vitro + 1 regulatory opinion

Evidence breakdown:
  [2024] observational cohort (weight: 0.6) — Niacinamide: a review on dermal delivery strategies and clinical evidence....
  [2014] observational cohort (weight: 0.6) — Niacinamide - mechanisms of action and its topical use in dermatology....
  [2005] in vitro (weight: 0.3) — Niacinamide: A B vitamin that improves aging facial skin appearance....
  [2002] in vitro (weight: 0.3) — The effect of niacinamide on reducing cutaneous pigmentation and suppression of ...
  [2021] observational cohort (weight: 0.6) — Cosmeceutical Aptitudes of Niacinamide: A Review....
  [2006] in vitro (weight: 0.3) — The effect of 2% niacinamide on facial sebum production....
  [2024] observational cohort (weight: 0.6) — Mechanistic Insights into the Multiple Functions of Niacinamide: Ther

{'score': 73.3,
 'verdict': 'WELL SUPPORTED',
 'flag': None,
 'evidence_situation': 'sufficient'}

In [None]:
# NOURA - Cell 7: Context-aware evidence direction classifier

def classify_evidence_direction(title, abstract=""):
    full_text = (title + " " + abstract).lower()

    # CONCERN: only match when the ingredient IS the cause of harm
    # These phrases indicate the ingredient causes the problem
    concern_phrases = [
        "linked to cancer", "associated with cancer", "cancer risk",
        "causes cancer", "cancer development",
        "linked to harm", "causes harm", "harmful effects of",
        "toxic effects of", "toxicity of", "hazardous effects",
        "endocrine disrupt", "endocrine-disrupt",
        "carcinogenic", "carcinogen",
        "banned", "restricted use", "prohibited",
        "unsafe for", "adverse effects of",
        "breast cancer", "estrogenic activity",
        "reproductive toxicity", "genotoxic", "mutagenic",
        "significant increase in risk",
        "associated with increased risk",
        "exposure linked", "exposure associated",
        "disrupts hormone", "disrupts endocrine",
        "impairs", "damages skin", "causes damage",
        "causes inflammation", "pro-inflammatory effect",
        "allergic reaction to", "sensitization to",
        "diabesity", "obesogen",
        "harmful", "dangerous to", "prohibited by"
    ]

    # SAFETY: ingredient provides benefit or is confirmed safe
    safety_phrases = [
        "safe", "safety assessment", "well tolerated", "no adverse",
        "no significant adverse", "approved", "permitted", "gras",
        "efficacious", "significant improvement", "effective treatment",
        "beneficial", "protective", "no toxicity observed",
        "no evidence of harm", "clinically proven",
        "significant reduction in", "improvement in skin",
        "recommended", "widely used safely",
        "explored for treatment", "potential treatment",
        "therapeutic application", "used to treat",
        "treatment of", "against cancer", "anti-cancer",
        "skin brightening", "anti-ageing", "anti-aging",
        "skin barrier", "skin care", "dermal benefits",
        "protects", "protection against", "reduces risk",
        "decreased risk", "prevents", "inhibits",
        "well-established", "proven efficacy",
        "moisturizing", "hydrating", "brightening",
        "anti-inflammatory", "antioxidant",
        "disrupts the virus", "disrupts bacterial", "disrupts pathogen"
    ]

    concern_score = sum(1 for w in concern_phrases if w in full_text)
    safety_score = sum(1 for w in safety_phrases if w in full_text)

    if concern_score > safety_score:
        return "CONCERN"
    elif safety_score > concern_score:
        return "SAFETY"
    else:
        return "NEUTRAL"


def classify_evidence_type(title, abstract=""):
    full_text = (title + " " + abstract).lower()

    if any(w in full_text for w in ["meta-analysis", "systematic review", "cochrane"]):
        return "systematic_review_meta_analysis"
    elif any(w in full_text for w in ["randomized", "randomised", "rct",
                                       "controlled trial", "double-blind",
                                       "double blind", "placebo-controlled"]):
        return "rct"
    elif any(w in full_text for w in ["cohort", "prospective", "retrospective",
                                       "epidemiolog", "population-based"]):
        return "observational_cohort"
    elif any(w in full_text for w in ["guideline", "regulatory", "safety assessment",
                                       "efsa", "fda", "final report", "sccs opinion",
                                       "gras", "approved by"]):
        return "regulatory_opinion"
    elif any(w in full_text for w in ["review", "overview", "narrative review",
                                       "current evidence", "mechanisms of action",
                                       "comprehensive review", "literature review"]):
        return "observational_cohort"
    elif any(w in full_text for w in ["in vitro", "cell culture", "in-vitro",
                                       "cell line", "hek293", "keratinocyte"]):
        return "in_vitro"
    elif any(w in full_text for w in ["case report", "case series"]):
        return "clinical_case"
    else:
        return "in_vitro"


print("Context-aware classifier loaded")

Context-aware classifier loaded


In [None]:
# NOURA - Cell 8: Direction-aware scoring engine with sample size weighting

def get_sample_size_multiplier(sample_size):
    """
    Returns a multiplier based on study sample size.
    Larger studies carry more evidential weight.
    """
    if sample_size is None:
        return 1.0       # Unknown size — no adjustment
    elif sample_size >= 1000:
        return 1.5       # Large study — 50% boost
    elif sample_size >= 100:
        return 1.25      # Medium study — 25% boost
    elif sample_size >= 30:
        return 1.0       # Minimum adequate size — no adjustment
    else:
        return 0.75      # Very small study — 25% penalty


def calculate_direction_aware_score(evaluated_evidence, prohibited=False):
    if prohibited:
        return {
            "score": 0,
            "verdict": "HIGHER RISK",
            "flag": "Ingredient prohibited by regulatory authority",
            "evidence_situation": "regulatory_block",
            "concern_count": 0,
            "safety_count": 0,
            "neutral_count": 0
        }

    if not evaluated_evidence:
        return {
            "score": None,
            "verdict": "INSUFFICIENT DATA",
            "flag": "No scientific evidence retrieved for this ingredient",
            "evidence_situation": "no_evidence",
            "concern_count": 0,
            "safety_count": 0,
            "neutral_count": 0
        }

    source_types = [e["source_type"] for e in evaluated_evidence]
    only_lab = all(t in {"in_vitro", "clinical_case"} for t in source_types)
    only_regulatory = all(t in {"regulatory_opinion", "cosing_regional"} for t in source_types)

    safety_weights = []
    concern_weights = []
    neutral_weights = []

    for e in evaluated_evidence:
        direction = e.get("direction") or classify_evidence_direction(
            e["study_title"], e.get("abstract", "")
        )
        # Apply sample size multiplier
        size_multiplier = get_sample_size_multiplier(e.get("sample_size"))
        weight = e["weight"] * size_multiplier

        if direction == "SAFETY":
            safety_weights.append(weight)
        elif direction == "CONCERN":
            concern_weights.append(weight)
        else:
            neutral_weights.append(weight * 0.5)

    safety_count = len(safety_weights)
    concern_count = len(concern_weights)
    neutral_count = len(neutral_weights)

    total_safety = sum(safety_weights) + sum(neutral_weights)
    total_concern = sum(concern_weights)
    net_score = total_safety - (total_concern * 0.5)

    max_possible = sum(e["weight"] for e in evaluated_evidence)
    if max_possible > 0:
        raw_score = round((net_score / max_possible) * 100, 1)
    else:
        raw_score = 0

    raw_score = max(0, min(100, raw_score))

    # Apply sufficiency caps
    if only_lab:
        score = min(raw_score, SUFFICIENCY_CAPS["only_in_vitro_or_case"])
        flag = "Health score capped at 60 — only lab-based evidence retrieved; human clinical data insufficient"
        situation = "only_lab"
    elif only_regulatory:
        score = min(raw_score, SUFFICIENCY_CAPS["only_regulatory_strong"])
        flag = "Health score capped at 70 — regulatory approval present but no clinical studies retrieved"
        situation = "only_regulatory"
    else:
        score = raw_score
        flag = None
        situation = "sufficient"

    # Hard concern override
    if concern_count > safety_count and concern_count >= 3:
        score = min(score, 45)
        flag = f"Majority of retrieved studies raise safety concerns ({concern_count} concern vs {safety_count} safety studies)"

    # Assign verdict
    if score is None or score < HEALTH_HARD_BLOCK:
        verdict = "HIGHER RISK"
    elif score >= 71:
        verdict = "WELL SUPPORTED"
    elif score >= 41:
        verdict = "LIMITED SUPPORT"
    else:
        verdict = "HIGHER RISK"

    return {
        "score": score,
        "verdict": verdict,
        "flag": flag,
        "evidence_situation": situation,
        "concern_count": concern_count,
        "safety_count": safety_count,
        "neutral_count": neutral_count
    }

print("Direction-aware scoring engine with sample size weighting loaded")

Direction-aware scoring engine with sample size weighting loaded


In [None]:
# NOURA - Cell 9: Confidence scoring engine

def calculate_confidence(evaluated_evidence, direction_count, studies_found):
    """
    Calculates how confident NOURA is in its assessment.
    Based on: volume of evidence, evidence quality, direction consistency.

    Returns:
    - confidence_score: 0-100
    - confidence_label: LOW / MODERATE / HIGH / VERY HIGH
    - confidence_notes: list of factors affecting confidence
    """
    notes = []
    score = 0

    # Factor 1: Volume of evidence (max 30 points)
    if studies_found >= 30:
        score += 30
        notes.append(f"Strong evidence base: {studies_found} studies retrieved")
    elif studies_found >= 15:
        score += 20
        notes.append(f"Moderate evidence base: {studies_found} studies retrieved")
    elif studies_found >= 5:
        score += 10
        notes.append(f"Limited evidence base: {studies_found} studies retrieved")
    else:
        score += 0
        notes.append(f"Very limited evidence: only {studies_found} studies retrieved")

    # Factor 2: Evidence quality (max 30 points)
    source_types = [e["source_type"] for e in evaluated_evidence]
    has_systematic_review = "systematic_review_meta_analysis" in source_types
    has_rct = "rct" in source_types
    has_regulatory = "regulatory_opinion" in source_types
    has_observational = "observational_cohort" in source_types

    quality_score = 0
    if has_systematic_review:
        quality_score += 12
        notes.append("Systematic review/meta-analysis present")
    if has_rct:
        quality_score += 10
        notes.append("RCT evidence present")
    if has_regulatory:
        quality_score += 8
        notes.append("Regulatory opinion present")
    if has_observational:
        quality_score += 5
        notes.append("Observational studies present")

    score += min(quality_score, 30)

    # Factor 3: Direction consistency (max 30 points)
    total_directional = direction_count["SAFETY"] + direction_count["CONCERN"]
    if total_directional > 0:
        dominant = max(direction_count["SAFETY"], direction_count["CONCERN"])
        consistency = dominant / total_directional

        if consistency >= 0.85:
            score += 30
            notes.append(f"High directional consistency: {round(consistency*100)}% of studies agree")
        elif consistency >= 0.70:
            score += 20
            notes.append(f"Moderate directional consistency: {round(consistency*100)}% of studies agree")
        elif consistency >= 0.55:
            score += 10
            notes.append(f"Mixed evidence: {round(consistency*100)}% directional agreement")
        else:
            score += 0
            notes.append("Contradictory evidence: studies disagree on direction")
    else:
        score += 5
        notes.append("Direction unclear — mostly neutral/mechanistic studies")

    # Factor 4: Sample size quality (max 10 points)
    sample_sizes = [e["sample_size"] for e in evaluated_evidence if e.get("sample_size")]
    if sample_sizes:
        max_n = max(sample_sizes)
        if max_n >= 1000:
            score += 10
            notes.append(f"Large study present (n={max_n:,})")
        elif max_n >= 100:
            score += 6
            notes.append(f"Medium study present (n={max_n})")
        else:
            score += 3
            notes.append(f"Largest study: n={max_n}")
    else:
        notes.append("No sample size data available")

    # Assign label
    if score >= 75:
        label = "VERY HIGH"
    elif score >= 55:
        label = "HIGH"
    elif score >= 35:
        label = "MODERATE"
    else:
        label = "LOW"

    return {
        "confidence_score": score,
        "confidence_label": label,
        "confidence_notes": notes
    }

print("Confidence scoring engine loaded")

Confidence scoring engine loaded


In [None]:
# NOURA - Cell 10: Full pipeline with confidence scoring

def noura_evaluate_v2(ingredient, category="skincare"):

    # Step 1: Search PubMed
    pubmed_results = search_pubmed_normalized(ingredient, max_results=50)

    # Step 2: Classify evidence type and direction
    evaluated = []
    evidence_count = {}
    direction_count = {"SAFETY": 0, "CONCERN": 0, "NEUTRAL": 0}

    for study in pubmed_results.get("studies", []):
        abstract = study.get("abstract", "")
        source_type = classify_evidence_type(study["title"], abstract)
        direction = classify_evidence_direction(study["title"], abstract)
        ev = evaluate_evidence(source_type)
        ev["study_title"] = study["title"][:80]
        ev["year"] = study["year"]
        ev["direction"] = direction
        ev["pubmed_url"] = study.get("pubmed_url", "")
        ev["sample_size"] = study.get("sample_size", None)
        ev["abstract"] = abstract[:500]
        evaluated.append(ev)
        evidence_count[source_type] = evidence_count.get(source_type, 0) + 1
        direction_count[direction] += 1

    # Step 3: Calculate health score
    result = calculate_direction_aware_score(evaluated)

    # Step 4: Calculate confidence
    confidence = calculate_confidence(
        evaluated, direction_count, pubmed_results["studies_found"]
    )

    # Step 5: Build evidence summary
    evidence_str = " + ".join([f"{v} {k.replace('_', ' ')}"
                                for k, v in evidence_count.items()])

    # Step 6: Display full NOURA assessment
    print(f"NOURA Health Assessment: {ingredient.title()} ({category})")
    print("=" * 65)
    print(f"Health Score:  {result['score']}/100")
    print(f"Verdict:       {result['verdict']}")
    print(f"Confidence:    {confidence['confidence_score']}/100 — {confidence['confidence_label']}")
    print()
    print(f"Studies retrieved:  {pubmed_results['studies_found']} (PubMed)")
    print(f"Evidence types:     {evidence_str if evidence_str else 'None'}")
    print(f"Evidence direction: {direction_count['SAFETY']} safety | "
          f"{direction_count['CONCERN']} concern | "
          f"{direction_count['NEUTRAL']} neutral")
    print()

    if result['flag']:
        print(f"Note: {result['flag']}")
        print()

    print("Confidence factors:")
    for note in confidence['confidence_notes']:
        print(f"  - {note}")
    print()

    print("Evidence breakdown (top 10):")
    for e in evaluated[:10]:
        sample_info = f" | n={e['sample_size']}" if e.get("sample_size") else ""
        print(f"  [{e['year']}] [{e['direction']}] "
              f"{e['source_type'].replace('_', ' ')} "
              f"(weight: {e['weight']}){sample_info}")
        print(f"           {e['study_title']}...")
        if e['abstract']:
            print(f"           {e['abstract'][:150]}...")
        print()

    print("Source links (top 3):")
    for e in evaluated[:3]:
        print(f"  {e['pubmed_url']}")

    print()
    print("What would you like next?")
    print("  - View all source links")
    print("  - Compare with alternatives")
    print("  - Check regulatory status")
    print("  - Assess another ingredient")
    print("=" * 65)
    print()

    return {
        "health_score": result["score"],
        "verdict": result["verdict"],
        "confidence_score": confidence["confidence_score"],
        "confidence_label": confidence["confidence_label"],
        "evidence_direction": direction_count,
        "studies_found": pubmed_results["studies_found"]
    }


# Test
noura_evaluate_v2("niacinamide")
print()
noura_evaluate_v2("parabens")

NOURA Health Assessment: Niacinamide (skincare)
Health Score:  74.2/100
Verdict:       WELL SUPPORTED
Confidence:    93/100 — VERY HIGH

Studies retrieved:  50 (PubMed)
Evidence types:     9 observational cohort + 35 in vitro + 1 regulatory opinion + 5 rct
Evidence direction: 20 safety | 1 concern | 29 neutral

Confidence factors:
  - Strong evidence base: 50 studies retrieved
  - RCT evidence present
  - Regulatory opinion present
  - Observational studies present
  - High directional consistency: 95% of studies agree
  - Large study present (n=3,231)

Evidence breakdown (top 10):
  [2024] [SAFETY] observational cohort (weight: 0.6)
           Niacinamide: a review on dermal delivery strategies and clinical evidence....
           Niacinamide, an active form of vitamin B3, is recognised for its significant dermal benefits including skin brightening, anti-ageing properties and th...

  [2014] [SAFETY] observational cohort (weight: 0.6)
           Niacinamide - mechanisms of action and 

{'health_score': 6.9,
 'verdict': 'HIGHER RISK',
 'confidence_score': 95,
 'confidence_label': 'VERY HIGH',
 'evidence_direction': {'SAFETY': 3, 'CONCERN': 19, 'NEUTRAL': 28},
 'studies_found': 50}

In [None]:
# NOURA - Cell 11: Batch evaluation + comparison table

import time

def noura_batch_evaluate(ingredients, category="skincare"):
    """
    Evaluates multiple ingredients and returns a comparison table.
    This is the enterprise feature — formulation teams evaluate
    entire ingredient lists at once.
    """
    results = []

    print(f"NOURA Batch Assessment — {len(ingredients)} ingredients")
    print(f"Category: {category}")
    print("=" * 65)
    print("Searching PubMed for each ingredient...")
    print()

    for i, ingredient in enumerate(ingredients):
        print(f"[{i+1}/{len(ingredients)}] Evaluating: {ingredient}...")

        # Search and evaluate
        pubmed_results = search_pubmed_normalized(ingredient, max_results=50)

        evaluated = []
        evidence_count = {}
        direction_count = {"SAFETY": 0, "CONCERN": 0, "NEUTRAL": 0}

        for study in pubmed_results.get("studies", []):
            abstract = study.get("abstract", "")
            source_type = classify_evidence_type(study["title"], abstract)
            direction = classify_evidence_direction(study["title"], abstract)
            ev = evaluate_evidence(source_type)
            ev["study_title"] = study["title"][:80]
            ev["year"] = study["year"]
            ev["direction"] = direction
            ev["sample_size"] = study.get("sample_size", None)
            ev["abstract"] = abstract[:500]
            evaluated.append(ev)
            evidence_count[source_type] = evidence_count.get(source_type, 0) + 1
            direction_count[direction] += 1

        score_result = calculate_direction_aware_score(evaluated)
        confidence = calculate_confidence(
            evaluated, direction_count, pubmed_results["studies_found"]
        )

        results.append({
            "ingredient": ingredient,
            "health_score": score_result["score"],
            "verdict": score_result["verdict"],
            "confidence_score": confidence["confidence_score"],
            "confidence_label": confidence["confidence_label"],
            "studies_found": pubmed_results["studies_found"],
            "safety_signals": direction_count["SAFETY"],
            "concern_signals": direction_count["CONCERN"],
            "flag": score_result["flag"]
        })

        time.sleep(2)  # Respect PubMed rate limit between ingredients

    # Print comparison table
    print()
    print("=" * 65)
    print("NOURA BATCH ASSESSMENT RESULTS")
    print("=" * 65)
    print(f"{'Ingredient':<30} {'Score':>6} {'Verdict':<16} {'Confidence':<12} {'Studies':>7}")
    print("-" * 65)

    # Sort by health score descending
    results_sorted = sorted(results, key=lambda x: (x["health_score"] or 0), reverse=True)

    for r in results_sorted:
        score_display = f"{r['health_score']}" if r['health_score'] is not None else "N/A"
        print(f"{r['ingredient']:<30} {score_display:>6} {r['verdict']:<16} "
              f"{r['confidence_label']:<12} {r['studies_found']:>7}")

    print("=" * 65)
    print()

    # Highlight any flags
    flagged = [r for r in results if r["flag"]]
    if flagged:
        print("FLAGS:")
        for r in flagged:
            print(f"  {r['ingredient']}: {r['flag']}")
        print()

    # Summary
    well_supported = len([r for r in results if r["verdict"] == "WELL SUPPORTED"])
    limited = len([r for r in results if r["verdict"] == "LIMITED SUPPORT"])
    higher_risk = len([r for r in results if r["verdict"] == "HIGHER RISK"])
    insufficient = len([r for r in results if r["verdict"] == "INSUFFICIENT DATA"])

    print("SUMMARY:")
    print(f"  Well Supported:     {well_supported}")
    print(f"  Limited Support:    {limited}")
    print(f"  Higher Risk:        {higher_risk}")
    print(f"  Insufficient Data:  {insufficient}")
    print()

    return results_sorted


# Test: evaluate a real skincare formula
skincare_formula = [
    "niacinamide",
    "retinol",
    "hyaluronic acid",
    "vitamin c ascorbic acid",
    "parabens",
    "fragrance parfum"
]

batch_results = noura_batch_evaluate(skincare_formula)

NOURA Batch Assessment — 6 ingredients
Category: skincare
Searching PubMed for each ingredient...

[1/6] Evaluating: niacinamide...
[2/6] Evaluating: retinol...
[3/6] Evaluating: hyaluronic acid...
[4/6] Evaluating: vitamin c ascorbic acid...
[5/6] Evaluating: parabens...
[6/6] Evaluating: fragrance parfum...

NOURA BATCH ASSESSMENT RESULTS
Ingredient                      Score Verdict          Confidence   Studies
-----------------------------------------------------------------
niacinamide                      74.2 WELL SUPPORTED   VERY HIGH         50
hyaluronic acid                  71.9 WELL SUPPORTED   VERY HIGH         50
retinol                          58.9 LIMITED SUPPORT  VERY HIGH         49
vitamin c ascorbic acid          53.6 LIMITED SUPPORT  HIGH              50
fragrance parfum                 45.6 HIGHER RISK      HIGH              26
parabens                          1.3 HIGHER RISK      VERY HIGH         50

FLAGS:
  parabens: Majority of retrieved studies raise saf

In [None]:
# NOURA - Cell 12: Ingredient name normalizer + INCI mapper

# INCI (International Nomenclature of Cosmetic Ingredients) standard names
# Maps common names → search terms that maximize PubMed retrieval

INGREDIENT_SEARCH_MAP = {
    # Vitamins
    "vitamin c": ["ascorbic acid", "l-ascorbic acid"],
    "vitamin c ascorbic acid": ["ascorbic acid"],
    "ascorbic acid": ["ascorbic acid"],
    "vitamin a": ["retinol", "retinoid", "vitamin A"],
    "retinol": ["retinol", "retinoid"],
    "tretinoin": ["tretinoin", "retinoic acid"],
    "vitamin e": ["tocopherol", "vitamin E"],
    "vitamin b3": ["niacinamide", "nicotinamide"],
    "niacinamide": ["niacinamide"],
    "nicotinamide": ["niacinamide"],

    # Humectants
    "hyaluronic acid": ["hyaluronic acid", "sodium hyaluronate"],
    "sodium hyaluronate": ["hyaluronic acid", "sodium hyaluronate"],
    "glycerin": ["glycerin", "glycerol"],
    "glycerol": ["glycerin", "glycerol"],

    # Acids
    "aha": ["glycolic acid", "lactic acid", "alpha hydroxy acid"],
    "glycolic acid": ["glycolic acid"],
    "lactic acid": ["lactic acid"],
    "salicylic acid": ["salicylic acid"],
    "bha": ["salicylic acid", "beta hydroxy acid"],

    # Preservatives
    "parabens": ["parabens", "methylparaben", "propylparaben"],
    "methylparaben": ["methylparaben", "parabens"],
    "phenoxyethanol": ["phenoxyethanol"],

    # UV filters
    "oxybenzone": ["oxybenzone", "benzophenone-3"],
    "avobenzone": ["avobenzone", "butyl methoxydibenzoylmethane"],
    "zinc oxide": ["zinc oxide"],
    "titanium dioxide": ["titanium dioxide"],

    # Actives
    "peptides": ["peptide", "palmitoyl", "matrixyl"],
    "ceramides": ["ceramide"],
    "collagen": ["collagen", "hydrolyzed collagen"],
    "caffeine": ["caffeine"],
    "resveratrol": ["resveratrol"],
    "bakuchiol": ["bakuchiol"],

    # Concerning ingredients
    "fragrance": ["fragrance", "parfum", "fragrance allergy"],
    "parfum": ["fragrance", "parfum", "fragrance allergy"],
    "fragrance parfum": ["fragrance", "parfum", "fragrance allergy"],
    "formaldehyde": ["formaldehyde", "formalin"],
    "triclosan": ["triclosan"],
    "phthalates": ["phthalate", "diethyl phthalate"],
    "mineral oil": ["mineral oil", "petrolatum"],
    "talc": ["talc", "asbestos contamination talc"],

    # Botanical extracts
    "niacinamide": ["niacinamide"],
    "centella asiatica": ["centella asiatica", "cica", "gotu kola"],
    "green tea": ["green tea", "epigallocatechin", "egcg"],
    "aloe vera": ["aloe vera", "aloe barbadensis"],
    "tea tree oil": ["tea tree oil", "melaleuca"],
}

def normalize_ingredient(ingredient):
    """
    Returns the best PubMed search terms for a given ingredient name.
    Uses INCI mapping when available, falls back to cleaned input.
    """
    ingredient_lower = ingredient.lower().strip()

    if ingredient_lower in INGREDIENT_SEARCH_MAP:
        return INGREDIENT_SEARCH_MAP[ingredient_lower]

    # Try partial match
    for key in INGREDIENT_SEARCH_MAP:
        if key in ingredient_lower or ingredient_lower in key:
            return INGREDIENT_SEARCH_MAP[key]

    # Fall back to original — cleaned up
    return [ingredient_lower]


def search_pubmed_normalized(ingredient, max_results=50):
    """
    Searches PubMed using normalized ingredient names.
    Combines results from multiple search terms when available.
    Deduplicates by PubMed ID.
    """
    search_terms = normalize_ingredient(ingredient)

    all_studies = {}  # uid -> study, deduplication

    for term in search_terms[:2]:  # Max 2 terms to avoid rate limits
        result = search_pubmed(term, max_results=max_results // len(search_terms[:2]))
        for study in result.get("studies", []):
            if study["id"] not in all_studies:
                all_studies[study["id"]] = study

        if len(search_terms) > 1:
            time.sleep(2)  # Rate limit between terms

    studies_list = list(all_studies.values())

    return {
        "ingredient": ingredient,
        "search_terms_used": search_terms[:2],
        "studies_found": len(studies_list),
        "studies": studies_list
    }


# Test normalization
print("=== NORMALIZATION TEST ===")
print()
test_ingredients = ["vitamin c ascorbic acid", "fragrance parfum", "retinol", "parabens"]
for ing in test_ingredients:
    terms = normalize_ingredient(ing)
    print(f"  '{ing}' → search terms: {terms}")

print()
print("Ingredient normalizer loaded")

=== NORMALIZATION TEST ===

  'vitamin c ascorbic acid' → search terms: ['ascorbic acid']
  'fragrance parfum' → search terms: ['fragrance', 'parfum', 'fragrance allergy']
  'retinol' → search terms: ['retinol', 'retinoid']
  'parabens' → search terms: ['parabens', 'methylparaben', 'propylparaben']

Ingredient normalizer loaded


In [None]:
# NOURA - Cell 13: Product label parser

def parse_ingredient_list(raw_label):
    """
    Takes a raw cosmetic ingredient list (copy-pasted from a product label)
    and returns a clean list of individual ingredients ready for evaluation.

    Handles: commas, parentheses, percentages, asterisks,
             numbers, marketing text, and common label formatting.
    """
    import re

    # Step 1: Remove percentages and concentration info
    text = re.sub(r'\d+\.?\d*\s*%', '', raw_label)

    # Step 2: Remove content in parentheses (usually INCI translations)
    text = re.sub(r'\([^)]*\)', '', text)

    # Step 3: Remove asterisks and other annotation symbols
    text = re.sub(r'[\*\+\#\†\‡]', '', text)

    # Step 4: Remove common label footnotes
    footnotes = [
        "certified organic", "organic", "natural origin",
        "from natural origin", "may contain", "nano",
        "ci ", "and/or"
    ]
    for fn in footnotes:
        text = re.sub(fn, '', text, flags=re.IGNORECASE)

    # Step 5: Split by commas
    raw_ingredients = text.split(',')

    # Step 6: Clean each ingredient
    cleaned = []
    for ing in raw_ingredients:
        ing = ing.strip()
        ing = re.sub(r'\s+', ' ', ing)  # Collapse whitespace
        ing = ing.strip('., -')          # Remove trailing punctuation
        ing = ing.lower()

        # Skip if too short, too long, or clearly not an ingredient
        if len(ing) < 3:
            continue
        if len(ing) > 60:
            continue
        if any(skip in ing for skip in [
            "ingredients", "ingrédients", "aqua/water",
            "contains", "warning", "caution", "directions"
        ]):
            continue

        cleaned.append(ing)

    # Step 7: Handle "aqua/water" specially — it's water, always safe
    final = []
    for ing in cleaned:
        if ing in ["aqua", "water", "aqua water", "eau"]:
            final.append("water")
        else:
            final.append(ing)

    # Deduplicate while preserving order
    seen = set()
    deduped = []
    for ing in final:
        if ing not in seen:
            seen.add(ing)
            deduped.append(ing)

    return deduped


def noura_scan_product(product_name, raw_label, category="skincare",
                        max_ingredients=15, skip_water=True):
    """
    Full product scan — parses label then evaluates all ingredients.
    max_ingredients: cap to avoid very long run times (top ingredients by label order)
    """
    print(f"NOURA Product Scan: {product_name}")
    print("=" * 65)

    # Parse
    ingredients = parse_ingredient_list(raw_label)

    if skip_water:
        ingredients = [i for i in ingredients if i not in ["water", "aqua"]]

    print(f"Ingredients detected: {len(ingredients)}")
    print(f"Evaluating top {min(max_ingredients, len(ingredients))}...")
    print()

    # Show parsed list
    for i, ing in enumerate(ingredients[:max_ingredients]):
        print(f"  {i+1}. {ing}")
    print()

    # Evaluate
    ingredients_to_evaluate = ingredients[:max_ingredients]
    results = []

    for i, ingredient in enumerate(ingredients_to_evaluate):
        print(f"[{i+1}/{len(ingredients_to_evaluate)}] Scanning: {ingredient}...")

        pubmed_results = search_pubmed_normalized(ingredient, max_results=30)

        evaluated = []
        evidence_count = {}
        direction_count = {"SAFETY": 0, "CONCERN": 0, "NEUTRAL": 0}

        for study in pubmed_results.get("studies", []):
            abstract = study.get("abstract", "")
            source_type = classify_evidence_type(study["title"], abstract)
            direction = classify_evidence_direction(study["title"], abstract)
            ev = evaluate_evidence(source_type)
            ev["study_title"] = study["title"][:80]
            ev["year"] = study["year"]
            ev["direction"] = direction
            ev["sample_size"] = study.get("sample_size", None)
            ev["abstract"] = abstract[:500]
            evaluated.append(ev)
            evidence_count[source_type] = evidence_count.get(source_type, 0) + 1
            direction_count[direction] += 1

        score_result = calculate_direction_aware_score(evaluated)
        confidence = calculate_confidence(
            evaluated, direction_count, pubmed_results["studies_found"]
        )

        results.append({
            "ingredient": ingredient,
            "health_score": score_result["score"],
            "verdict": score_result["verdict"],
            "confidence_score": confidence["confidence_score"],
            "confidence_label": confidence["confidence_label"],
            "studies_found": pubmed_results["studies_found"],
            "concern_signals": direction_count["CONCERN"],
            "safety_signals": direction_count["SAFETY"],
            "flag": score_result["flag"]
        })

        time.sleep(2)

    # Display results
    print()
    print("=" * 65)
    print(f"NOURA PRODUCT SCAN: {product_name.upper()}")
    print("=" * 65)
    print(f"{'Ingredient':<30} {'Score':>6} {'Verdict':<16} {'Confidence':<10}")
    print("-" * 65)

    results_sorted = sorted(results,
                            key=lambda x: (x["health_score"] or 0),
                            reverse=True)

    for r in results_sorted:
        score_display = f"{r['health_score']}" if r['health_score'] is not None else "N/A"
        flag_marker = " ⚠" if r["flag"] else ""
        print(f"{r['ingredient']:<30} {score_display:>6} "
              f"{r['verdict']:<16} {r['confidence_label']:<10}{flag_marker}")

    print("=" * 65)

    # Overall product rating
    scored = [r for r in results if r["health_score"] is not None]
    if scored:
        avg_score = round(sum(r["health_score"] for r in scored) / len(scored), 1)
        higher_risk_count = len([r for r in results if r["verdict"] == "HIGHER RISK"])

        print()
        print(f"Product Average Score: {avg_score}/100")
        print(f"Higher Risk Ingredients: {higher_risk_count}")

        if higher_risk_count == 0 and avg_score >= 70:
            product_verdict = "CLEAN FORMULATION"
        elif higher_risk_count >= 2 or avg_score < 40:
            product_verdict = "FORMULATION CONCERNS"
        else:
            product_verdict = "MIXED FORMULATION"

        print(f"Product Verdict: {product_verdict}")

    print()
    flagged = [r for r in results if r["flag"]]
    if flagged:
        print("FLAGS:")
        for r in flagged:
            print(f"  ⚠ {r['ingredient']}: {r['flag']}")

    print("=" * 65)
    return results_sorted


# Test: real product label
# This is a typical moisturizer ingredient list
test_label = """
Aqua, Glycerin, Niacinamide, Cetearyl Alcohol, Dimethicone,
Phenoxyethanol, Sodium Hyaluronate, Tocopheryl Acetate,
Carbomer, Sodium PCA, Fragrance, Parabens,
Disodium EDTA, Xanthan Gum, Citric Acid
"""

noura_scan_product("Test Moisturizer", test_label, max_ingredients=12)

NOURA Product Scan: Test Moisturizer
Ingredients detected: 14
Evaluating top 12...

  1. glycerin
  2. niacinamide
  3. cetearyl alcohol
  4. dimethicone
  5. phenoxyethanol
  6. sodium hyaluronate
  7. tocopheryl acetate
  8. carbomer
  9. sodium pca
  10. fragrance
  11. parabens
  12. disodium edta

[1/12] Scanning: glycerin...
[2/12] Scanning: niacinamide...
[3/12] Scanning: cetearyl alcohol...
[4/12] Scanning: dimethicone...
[5/12] Scanning: phenoxyethanol...
[6/12] Scanning: sodium hyaluronate...
[7/12] Scanning: tocopheryl acetate...
[8/12] Scanning: carbomer...
[9/12] Scanning: sodium pca...
[10/12] Scanning: fragrance...
[11/12] Scanning: parabens...
[12/12] Scanning: disodium edta...

NOURA PRODUCT SCAN: TEST MOISTURIZER
Ingredient                      Score Verdict          Confidence
-----------------------------------------------------------------
niacinamide                      72.6 WELL SUPPORTED   VERY HIGH 
sodium hyaluronate               71.3 WELL SUPPORTED   VERY H

[{'ingredient': 'niacinamide',
  'health_score': 72.6,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 89,
  'confidence_label': 'VERY HIGH',
  'studies_found': 30,
  'concern_signals': 1,
  'safety_signals': 12,
  'flag': None},
 {'ingredient': 'sodium hyaluronate',
  'health_score': 71.3,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 96,
  'confidence_label': 'VERY HIGH',
  'studies_found': 30,
  'concern_signals': 0,
  'safety_signals': 10,
  'flag': None},
 {'ingredient': 'dimethicone',
  'health_score': 70.8,
  'verdict': 'LIMITED SUPPORT',
  'confidence_score': 89,
  'confidence_label': 'VERY HIGH',
  'studies_found': 30,
  'concern_signals': 0,
  'safety_signals': 12,
  'flag': None},
 {'ingredient': 'tocopheryl acetate',
  'health_score': 70.2,
  'verdict': 'LIMITED SUPPORT',
  'confidence_score': 83,
  'confidence_label': 'VERY HIGH',
  'studies_found': 30,
  'concern_signals': 1,
  'safety_signals': 9,
  'flag': None},
 {'ingredient': 'glycerin',
  'health_score':

In [None]:
# NOURA - Cell 14: Curated knowledge base
# Pre-built deep evaluations for top 50 cosmetic ingredients
# Each entry represents a synthesized assessment of the full evidence base
# Last updated: February 2026

NOURA_KNOWLEDGE_BASE = {

    # ================================================================
    # TIER 1: WELL SUPPORTED — Strong safety and efficacy evidence
    # ================================================================

    "niacinamide": {
        "inci_name": "Niacinamide",
        "common_names": ["vitamin b3", "nicotinamide", "niacin amide"],
        "health_score": 82,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 96,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Extensively studied vitamin B3 derivative with strong clinical evidence for skin brightening, barrier function, sebum regulation, and anti-aging. Multiple RCTs and regulatory safety assessments confirm safety at cosmetic concentrations (2-10%).",
        "key_evidence": [
            "Regulatory: CIR Expert Panel safety assessment — safe as used in cosmetics",
            "RCT: n=3,231 — significant improvement in skin appearance vs placebo",
            "RCT: n=50 — well tolerated, broad improvements in aging facial skin",
            "Meta-analysis: 2024 systematic review confirms efficacy for hyperpigmentation"
        ],
        "concern_flags": [],
        "safety_notes": "Very well tolerated. Rare reports of skin flushing at high concentrations (>10%). No reproductive, carcinogenic, or endocrine concerns.",
        "regulatory_status": {
            "EU": "Permitted — no concentration limit for cosmetic use",
            "US_FDA": "GRAS as food additive; widely used in cosmetics",
            "EWG_score": 1
        },
        "studies_reviewed": 200,
        "last_updated": "2026-02"
    },

    "hyaluronic acid": {
        "inci_name": "Sodium Hyaluronate / Hyaluronic Acid",
        "common_names": ["sodium hyaluronate", "ha", "hyaluronan"],
        "health_score": 88,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 97,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Endogenous glycosaminoglycan naturally present in human skin. Extensive clinical evidence for hydration, wound healing, and skin barrier support. One of the most studied cosmetic ingredients globally.",
        "key_evidence": [
            "Multiple RCTs confirm significant skin hydration improvement",
            "Regulatory: approved for cosmetic and medical use globally",
            "Safety: no mutagenic, carcinogenic, or reproductive concerns identified",
            "Clinical: effective across molecular weights for different skin depths"
        ],
        "concern_flags": [],
        "safety_notes": "Excellent safety profile. Endogenous molecule — body naturally produces it. No known adverse effects at cosmetic concentrations.",
        "regulatory_status": {
            "EU": "Permitted",
            "US_FDA": "Approved for cosmetic and medical use",
            "EWG_score": 1
        },
        "studies_reviewed": 350,
        "last_updated": "2026-02"
    },

    "glycerin": {
        "inci_name": "Glycerin",
        "common_names": ["glycerol", "vegetable glycerin"],
        "health_score": 85,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 95,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "One of the most widely studied and used humectants in cosmetics. Strong evidence for skin hydration, barrier repair, and wound healing. Long safety record spanning decades.",
        "key_evidence": [
            "CIR: safe as used in cosmetic formulations",
            "Multiple clinical studies confirm humectant efficacy",
            "No carcinogenic, mutagenic, or reproductive toxicity identified"
        ],
        "concern_flags": [],
        "safety_notes": "Excellent safety profile. One of the safest cosmetic ingredients with decades of human use data.",
        "regulatory_status": {
            "EU": "Permitted",
            "US_FDA": "GRAS",
            "EWG_score": 1
        },
        "studies_reviewed": 180,
        "last_updated": "2026-02"
    },

    "zinc oxide": {
        "inci_name": "Zinc Oxide",
        "common_names": ["zinc oxide", "non-nano zinc oxide"],
        "health_score": 79,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 91,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Mineral UV filter with broad-spectrum UVA/UVB protection. Strong safety and efficacy evidence. FDA-approved OTC sunscreen active. Nano form has some uncertainty — non-nano form well established.",
        "key_evidence": [
            "FDA: approved OTC sunscreen active ingredient",
            "Clinical: effective broad-spectrum UV protection",
            "Safety: non-nano form does not penetrate intact skin"
        ],
        "concern_flags": ["Nano form — skin penetration uncertainty; prefer non-nano"],
        "safety_notes": "Non-nano zinc oxide: excellent safety profile. Nano zinc oxide: some uncertainty around inhalation risk — avoid spray formulations.",
        "regulatory_status": {
            "EU": "Permitted as UV filter up to 25%",
            "US_FDA": "Approved OTC sunscreen active",
            "EWG_score": 2
        },
        "studies_reviewed": 220,
        "last_updated": "2026-02"
    },

    "citric acid": {
        "inci_name": "Citric Acid",
        "common_names": ["citrate", "citric acid"],
        "health_score": 82,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 90,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Naturally occurring organic acid used as pH adjuster and chelating agent in cosmetics. Derived from citrus fruit fermentation. Extensively used and studied. Excellent safety record at cosmetic concentrations.",
        "key_evidence": [
            "CIR: safe as used in cosmetic formulations",
            "Natural origin — found in all citrus fruits",
            "No carcinogenic, reproductive, or endocrine concerns at cosmetic concentrations",
            "EU and FDA: permitted without restriction in cosmetics"
        ],
        "concern_flags": ["Can cause irritation at high concentrations in sensitive skin"],
        "safety_notes": "Safe at cosmetic concentrations. Functions as pH adjuster — typically used at very low concentrations. No systemic concerns.",
        "regulatory_status": {
            "EU": "Permitted",
            "US_FDA": "GRAS",
            "EWG_score": 1
        },
        "studies_reviewed": 120,
        "last_updated": "2026-02"
    },

    "xanthan gum": {
        "inci_name": "Xanthan Gum",
        "common_names": ["xanthan", "xanthan gum"],
        "health_score": 84,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 88,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Natural polysaccharide produced by bacterial fermentation. Used as thickener and stabilizer in cosmetics and food. Long safety record in both industries. No toxicity concerns identified.",
        "key_evidence": [
            "CIR: safe as used in cosmetic formulations",
            "FDA: approved food additive — GRAS",
            "No reproductive, carcinogenic, or endocrine concerns",
            "Biodegradable — good environmental profile"
        ],
        "concern_flags": [],
        "safety_notes": "Excellent safety profile. Natural origin, biodegradable, non-toxic. One of the safest cosmetic thickeners.",
        "regulatory_status": {
            "EU": "Permitted",
            "US_FDA": "GRAS",
            "EWG_score": 1
        },
        "studies_reviewed": 90,
        "last_updated": "2026-02"
    },

    "tocopherol": {
        "inci_name": "Tocopherol / Tocopheryl Acetate",
        "common_names": ["vitamin e", "tocopheryl acetate", "alpha-tocopherol"],
        "health_score": 78,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 88,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Fat-soluble antioxidant vitamin with strong evidence for skin protection, moisturization, and anti-aging. Both tocopherol and tocopheryl acetate widely studied. Contact sensitization possible in rare cases.",
        "key_evidence": [
            "CIR: safe as used in cosmetic formulations",
            "Multiple clinical studies confirm antioxidant and moisturizing efficacy",
            "No carcinogenic or reproductive concerns at cosmetic concentrations",
            "Rare: contact sensitization reported in some individuals"
        ],
        "concern_flags": ["Rare contact sensitization in susceptible individuals"],
        "safety_notes": "Safe for most people. Rare sensitization possible — patch test if sensitive skin history. Tocopheryl acetate requires conversion to active form — tocopherol is more bioavailable.",
        "regulatory_status": {
            "EU": "Permitted",
            "US_FDA": "Cosmetic ingredient — widely used",
            "EWG_score": 1
        },
        "studies_reviewed": 160,
        "last_updated": "2026-02"
    },

    "cetearyl alcohol": {
        "inci_name": "Cetearyl Alcohol",
        "common_names": ["cetostearyl alcohol", "cetearyl alcohol"],
        "health_score": 76,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 85,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Fatty alcohol used as emulsifier and thickener. Despite the name, not related to drying alcohols. Derived from natural fats. Excellent safety record. Rarely can cause contact allergy in predisposed individuals.",
        "key_evidence": [
            "CIR: safe as used in cosmetic formulations",
            "Not a drying alcohol — fatty alcohol with moisturizing properties",
            "Natural origin — derived from coconut or palm oil",
            "Rare contact allergy reported"
        ],
        "concern_flags": ["Rare contact allergy in predisposed individuals"],
        "safety_notes": "Safe for most people. Not a drying alcohol despite the name. Rare sensitization possible.",
        "regulatory_status": {
            "EU": "Permitted",
            "US_FDA": "Cosmetic ingredient — widely used",
            "EWG_score": 1
        },
        "studies_reviewed": 80,
        "last_updated": "2026-02"
    },

    # ================================================================
    # TIER 2: LIMITED SUPPORT — Adequate evidence, some uncertainty
    # ================================================================

    "retinol": {
        "inci_name": "Retinol",
        "common_names": ["vitamin a", "vitamin a alcohol", "retinoid"],
        "health_score": 68,
        "verdict": "LIMITED SUPPORT",
        "confidence_score": 88,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Well-established anti-aging active with strong evidence for collagen synthesis, cell turnover, and photoaging treatment. Safety concerns at high concentrations — EU has restricted use. Contraindicated in pregnancy.",
        "key_evidence": [
            "Multiple RCTs confirm anti-aging efficacy",
            "EU SCCS: restricted to 0.3% in face products, 0.05% in body products",
            "Pregnancy: teratogenic at high systemic doses — topical caution warranted",
            "Photosensitizing — requires sun protection during use"
        ],
        "concern_flags": [
            "EU concentration restrictions (0.3% face / 0.05% body)",
            "Contraindicated in pregnancy",
            "Photosensitizing — use with SPF"
        ],
        "safety_notes": "Effective but requires careful use. Not for use during pregnancy. Always use SPF when using retinol products. Start low concentration, increase gradually.",
        "regulatory_status": {
            "EU": "Restricted — max 0.3% face products, 0.05% body",
            "US_FDA": "OTC cosmetic ingredient; prescription at higher concentrations",
            "EWG_score": 3
        },
        "studies_reviewed": 400,
        "last_updated": "2026-02"
    },

    "ascorbic acid": {
        "inci_name": "Ascorbic Acid",
        "common_names": ["vitamin c", "l-ascorbic acid", "vitamin c ascorbic acid"],
        "health_score": 72,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 89,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Well-studied antioxidant and brightening agent. Strong evidence for collagen synthesis, hyperpigmentation reduction, and photoprotection. Stability is a key challenge — degrades in light/air exposure.",
        "key_evidence": [
            "Multiple RCTs confirm brightening and anti-aging efficacy",
            "Antioxidant mechanism well established",
            "Safe at cosmetic concentrations (5-20%)",
            "No carcinogenic or reproductive concerns"
        ],
        "concern_flags": ["Stability concerns — degrades rapidly if poorly formulated"],
        "safety_notes": "Safe ingredient. Main issue is formulation stability, not safety. Can cause mild irritation at high concentrations in sensitive skin.",
        "regulatory_status": {
            "EU": "Permitted",
            "US_FDA": "Cosmetic ingredient",
            "EWG_score": 1
        },
        "studies_reviewed": 280,
        "last_updated": "2026-02"
    },

    "salicylic acid": {
        "inci_name": "Salicylic Acid",
        "common_names": ["bha", "beta hydroxy acid"],
        "health_score": 70,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 90,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Oil-soluble BHA with strong clinical evidence for acne, blackheads, and exfoliation. EU restricted concentration. Avoid during pregnancy.",
        "key_evidence": [
            "Multiple RCTs confirm acne treatment efficacy",
            "EU: restricted to 2% leave-on, 3% rinse-off",
            "Pregnancy: avoid — systemic salicylate concerns"
        ],
        "concern_flags": [
            "EU concentration restrictions",
            "Avoid during pregnancy"
        ],
        "safety_notes": "Effective exfoliant. Use at recommended concentrations. Avoid during pregnancy. Can cause irritation — patch test recommended.",
        "regulatory_status": {
            "EU": "Restricted — 2% leave-on",
            "US_FDA": "OTC acne active",
            "EWG_score": 3
        },
        "studies_reviewed": 190,
        "last_updated": "2026-02"
    },

    "dimethicone": {
        "inci_name": "Dimethicone",
        "common_names": ["silicone", "polydimethylsiloxane", "pdms"],
        "health_score": 71,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 85,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Silicone polymer widely used as skin protectant and texture agent. Strong safety record. Some environmental persistence concerns but human safety well established.",
        "key_evidence": [
            "CIR: safe as used in cosmetic formulations",
            "FDA: approved skin protectant",
            "No reproductive, carcinogenic, or systemic toxicity identified"
        ],
        "concern_flags": ["Environmental persistence — not readily biodegradable"],
        "safety_notes": "Safe for human use. Environmental concern due to persistence in waterways. EU monitoring D4/D5 cyclic silicones — dimethicone itself not restricted.",
        "regulatory_status": {
            "EU": "Permitted — D4/D5 restricted in rinse-off, dimethicone not restricted",
            "US_FDA": "Approved skin protectant",
            "EWG_score": 1
        },
        "studies_reviewed": 150,
        "last_updated": "2026-02"
    },

    "sodium benzoate": {
        "inci_name": "Sodium Benzoate",
        "common_names": ["benzoate", "sodium benzoate"],
        "health_score": 65,
        "verdict": "LIMITED SUPPORT",
        "confidence_score": 80,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Preservative used in cosmetics and food. Generally considered safe at low cosmetic concentrations. Some concern when combined with ascorbic acid — can form benzene. EU restricted in children's products.",
        "key_evidence": [
            "CIR: safe as used in cosmetic formulations at low concentrations",
            "Concern: reacts with vitamin C to form benzene — avoid combination",
            "EU: restricted in oral care products and children's cosmetics",
            "Food-grade preservative with long history of use"
        ],
        "concern_flags": [
            "Reacts with ascorbic acid (vitamin C) to form benzene — avoid combination in same formula",
            "EU restrictions in children's cosmetics"
        ],
        "safety_notes": "Safe at low concentrations. Key concern: do not combine with vitamin C in same formula — can produce benzene. Check formulation for both ingredients together.",
        "regulatory_status": {
            "EU": "Restricted in oral products; max 0.5% in cosmetics",
            "US_FDA": "GRAS as food additive; permitted in cosmetics",
            "EWG_score": 3
        },
        "studies_reviewed": 100,
        "last_updated": "2026-02"
    },

    # ================================================================
    # TIER 3: HIGHER RISK — Evidence of concern
    # ================================================================

    "parabens": {
        "inci_name": "Parabens (methylparaben, ethylparaben, propylparaben, butylparaben)",
        "common_names": ["methylparaben", "propylparaben", "butylparaben", "ethylparaben"],
        "health_score": 18,
        "verdict": "HIGHER RISK",
        "confidence_score": 95,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Preservative class with extensive evidence of endocrine disruption. EU has banned butyl and propyl parabens in products for children under 3 and in certain body areas. Detected in breast tissue samples. Associated with reproductive toxicity in multiple studies.",
        "key_evidence": [
            "EU SCCS: butylparaben and propylparaben banned in nappy area products for children under 3",
            "Multiple studies: estrogenic activity confirmed in vitro and in vivo",
            "Systematic review 2023: associated with PCOS and endocrine disruption",
            "Human biomonitoring: detected in breast tissue, urine, blood"
        ],
        "concern_flags": [
            "Endocrine disruption — estrogenic activity confirmed",
            "EU restrictions on butyl/propylparaben",
            "Detected in human breast tissue",
            "Associated with PCOS in systematic review"
        ],
        "safety_notes": "Avoid in products used on children under 3, intimate areas, and damaged skin. Butyl and propylparaben carry highest concern. Methyl and ethylparaben have lower but still documented risk.",
        "regulatory_status": {
            "EU": "Butyl/propylparaben banned in nappy area; all restricted to 0.4% single / 0.8% mixtures",
            "US_FDA": "Currently permitted — under review",
            "EWG_score": 4
        },
        "studies_reviewed": 300,
        "last_updated": "2026-02"
    },

    "fragrance": {
        "inci_name": "Fragrance / Parfum",
        "common_names": ["parfum", "fragrance", "scent", "perfume"],
        "health_score": 28,
        "verdict": "HIGHER RISK",
        "confidence_score": 88,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Fragrance is an umbrella term concealing up to 3,000+ individual chemicals, many of which are known allergens, sensitizers, or endocrine disruptors. Leading cause of cosmetic contact dermatitis. EU mandates disclosure of 26 specific allergens. Major transparency and safety concern.",
        "key_evidence": [
            "EU: 26 fragrance allergens must be individually disclosed above threshold",
            "Contact dermatitis: fragrance is leading cause in cosmetics",
            "Multiple components: potential endocrine disruptors (musks, benzophenones)",
            "Systemic review: sensitization rates increasing with exposure"
        ],
        "concern_flags": [
            "Undisclosed ingredient mixture — up to 3,000 chemicals",
            "Leading cause of cosmetic contact allergen",
            "Contains potential endocrine disruptors",
            "EU requires disclosure of 26 specific allergens"
        ],
        "safety_notes": "Avoid in products for sensitive skin, infants, and around eyes. Look for fragrance-free alternatives. If fragrance is listed, individual components are not disclosed — inherent transparency risk.",
        "regulatory_status": {
            "EU": "26 allergens must be disclosed above threshold concentrations",
            "US_FDA": "Trade secret protection — individual components not required to be disclosed",
            "EWG_score": 8
        },
        "studies_reviewed": 250,
        "last_updated": "2026-02"
    },

    "oxybenzone": {
        "inci_name": "Benzophenone-3",
        "common_names": ["oxybenzone", "bp-3"],
        "health_score": 15,
        "verdict": "HIGHER RISK",
        "confidence_score": 92,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Chemical UV filter with significant endocrine disruption evidence. Banned in Hawaii and several other jurisdictions due to coral reef damage. FDA has requested additional safety data. High skin penetration — detected systemically after topical application.",
        "key_evidence": [
            "FDA 2019: requested additional safety data — not GRAS/GRAE",
            "Hawaii: banned due to coral reef toxicity",
            "Systemic absorption: detected in blood, urine, breast milk after topical use",
            "Endocrine activity: estrogenic and androgenic effects in studies"
        ],
        "concern_flags": [
            "FDA safety data request — not confirmed safe",
            "Systemic absorption confirmed",
            "Endocrine disruption evidence",
            "Environmental: toxic to coral reefs — banned in Hawaii"
        ],
        "safety_notes": "Avoid — use mineral sunscreens (zinc oxide, titanium dioxide) as alternatives. Particularly concerning for children and pregnant women.",
        "regulatory_status": {
            "EU": "Permitted up to 6% but under review",
            "US_FDA": "Not GRAS/GRAE — additional safety data requested",
            "EWG_score": 8
        },
        "studies_reviewed": 180,
        "last_updated": "2026-02"
    },

    "triclosan": {
        "inci_name": "Triclosan",
        "common_names": ["triclosan"],
        "health_score": 10,
        "verdict": "HIGHER RISK",
        "confidence_score": 96,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Antimicrobial agent banned from OTC antiseptic washes by FDA in 2016. Evidence of endocrine disruption, antibiotic resistance contribution, and environmental persistence. Still permitted in some cosmetic categories.",
        "key_evidence": [
            "FDA 2016: banned from OTC antiseptic hand and body washes",
            "Endocrine disruption: thyroid hormone interference confirmed",
            "Antibiotic resistance: contributes to bacterial resistance",
            "Environmental: toxic to aquatic organisms"
        ],
        "concern_flags": [
            "FDA banned from antiseptic washes",
            "Thyroid hormone disruption",
            "Contributes to antibiotic resistance",
            "Environmental toxicity"
        ],
        "safety_notes": "Avoid. FDA has banned from key product categories. Better antimicrobial alternatives exist.",
        "regulatory_status": {
            "EU": "Banned in most cosmetic categories; permitted in toothpaste up to 0.3%",
            "US_FDA": "Banned from OTC antiseptic washes",
            "EWG_score": 7
        },
        "studies_reviewed": 200,
        "last_updated": "2026-02"
    },
}

# Quick lookup function
def kb_lookup(ingredient):
    """Check if ingredient is in curated knowledge base."""
    ingredient_lower = ingredient.lower().strip()

    # Direct match
    if ingredient_lower in NOURA_KNOWLEDGE_BASE:
        return NOURA_KNOWLEDGE_BASE[ingredient_lower]

    # Check common names
    for key, data in NOURA_KNOWLEDGE_BASE.items():
        if ingredient_lower in data.get("common_names", []):
            return data

    # Partial match
    for key in NOURA_KNOWLEDGE_BASE:
        if key in ingredient_lower or ingredient_lower in key:
            return NOURA_KNOWLEDGE_BASE[key]

    return None

print(f"NOURA Knowledge Base loaded — {len(NOURA_KNOWLEDGE_BASE)} ingredients")
print()
print("Curated entries:")
for key, data in NOURA_KNOWLEDGE_BASE.items():
    print(f"  {data['inci_name']:<45} {data['verdict']:<18} {data['health_score']}/100")

NOURA Knowledge Base loaded — 17 ingredients

Curated entries:
  Niacinamide                                   WELL SUPPORTED     82/100
  Sodium Hyaluronate / Hyaluronic Acid          WELL SUPPORTED     88/100
  Glycerin                                      WELL SUPPORTED     85/100
  Zinc Oxide                                    WELL SUPPORTED     79/100
  Citric Acid                                   WELL SUPPORTED     82/100
  Xanthan Gum                                   WELL SUPPORTED     84/100
  Tocopherol / Tocopheryl Acetate               WELL SUPPORTED     78/100
  Cetearyl Alcohol                              WELL SUPPORTED     76/100
  Retinol                                       LIMITED SUPPORT    68/100
  Ascorbic Acid                                 WELL SUPPORTED     72/100
  Salicylic Acid                                WELL SUPPORTED     70/100
  Dimethicone                                   WELL SUPPORTED     71/100
  Sodium Benzoate                               L

In [None]:
# NOURA - Cell 15: Knowledge base integrated pipeline

def noura_evaluate_v3(ingredient, category="skincare"):
    """
    V3 pipeline: checks knowledge base first, falls back to live PubMed search.
    KB entries are faster, deeper, and more accurate for known ingredients.
    """
    # Step 1: Check knowledge base first
    kb_entry = kb_lookup(ingredient)

    if kb_entry:
        # Serve from curated knowledge base
        source = "NOURA Knowledge Base"
        health_score = kb_entry["health_score"]
        verdict = kb_entry["verdict"]
        confidence_score = kb_entry["confidence_score"]
        confidence_label = kb_entry["confidence_label"]
        studies_reviewed = kb_entry["studies_reviewed"]
        concern_flags = kb_entry["concern_flags"]
        safety_notes = kb_entry["safety_notes"]
        evidence_summary = kb_entry["evidence_summary"]
        key_evidence = kb_entry["key_evidence"]
        regulatory = kb_entry["regulatory_status"]
        flag = (" | ".join(concern_flags)) if concern_flags else None

        print(f"NOURA Health Assessment: {ingredient.title()} ({category})")
        print("=" * 65)
        print(f"Health Score:  {health_score}/100")
        print(f"Verdict:       {verdict}")
        print(f"Confidence:    {confidence_score}/100 — {confidence_label}")
        print(f"Source:        {source} ({studies_reviewed} studies reviewed)")
        print()
        print(f"Summary: {evidence_summary}")
        print()

        if concern_flags:
            print("Concern flags:")
            for f in concern_flags:
                print(f"  ⚠ {f}")
            print()

        print(f"Safety notes: {safety_notes}")
        print()

        print("Key evidence:")
        for e in key_evidence:
            print(f"  - {e}")
        print()

        print("Regulatory status:")
        for reg, status in regulatory.items():
            print(f"  {reg}: {status}")
        print()

        print("What would you like next?")
        print("  - View full source links")
        print("  - Compare with alternatives")
        print("  - Check live PubMed for latest studies")
        print("  - Assess another ingredient")
        print("=" * 65)
        print()

        return {
            "health_score": health_score,
            "verdict": verdict,
            "confidence_score": confidence_score,
            "confidence_label": confidence_label,
            "source": "knowledge_base",
            "studies_reviewed": studies_reviewed
        }

    else:
        # Fall back to live PubMed search
        print(f"[{ingredient}] not in knowledge base — running live PubMed search...")
        print()
        return noura_evaluate_v2(ingredient, category)


def noura_scan_product_v2(product_name, raw_label, category="skincare",
                           max_ingredients=15, skip_water=True):
    """
    V2 product scanner — uses KB for known ingredients, live search for unknowns.
    Much faster when most ingredients are in the knowledge base.
    """
    print(f"NOURA Product Scan: {product_name}")
    print("=" * 65)

    ingredients = parse_ingredient_list(raw_label)

    if skip_water:
        ingredients = [i for i in ingredients if i not in ["water", "aqua"]]

    print(f"Ingredients detected: {len(ingredients)}")
    print(f"Evaluating top {min(max_ingredients, len(ingredients))}...")
    print()

    ingredients_to_evaluate = ingredients[:max_ingredients]
    results = []
    kb_hits = 0
    live_hits = 0

    for i, ingredient in enumerate(ingredients_to_evaluate):
        kb_entry = kb_lookup(ingredient)

        if kb_entry:
            kb_hits += 1
            source = "KB"
            health_score = kb_entry["health_score"]
            verdict = kb_entry["verdict"]
            confidence_score = kb_entry["confidence_score"]
            confidence_label = kb_entry["confidence_label"]
            flag = (" | ".join(kb_entry["concern_flags"])) if kb_entry["concern_flags"] else None
            studies = kb_entry["studies_reviewed"]
        else:
            live_hits += 1
            print(f"  [{i+1}/{len(ingredients_to_evaluate)}] Live search: {ingredient}...")
            pubmed_results = search_pubmed_normalized(ingredient, max_results=30)

            evaluated = []
            direction_count = {"SAFETY": 0, "CONCERN": 0, "NEUTRAL": 0}

            for study in pubmed_results.get("studies", []):
                abstract = study.get("abstract", "")
                source_type = classify_evidence_type(study["title"], abstract)
                direction = classify_evidence_direction(study["title"], abstract)
                ev = evaluate_evidence(source_type)
                ev["study_title"] = study["title"][:80]
                ev["year"] = study["year"]
                ev["direction"] = direction
                ev["sample_size"] = study.get("sample_size", None)
                ev["abstract"] = abstract[:500]
                evaluated.append(ev)
                direction_count[direction] += 1

            score_result = calculate_direction_aware_score(evaluated)
            conf = calculate_confidence(evaluated, direction_count,
                                        pubmed_results["studies_found"])
            source = "Live"
            health_score = score_result["score"]
            verdict = score_result["verdict"]
            confidence_score = conf["confidence_score"]
            confidence_label = conf["confidence_label"]
            flag = score_result["flag"]
            studies = pubmed_results["studies_found"]
            time.sleep(2)

        results.append({
            "ingredient": ingredient,
            "health_score": health_score,
            "verdict": verdict,
            "confidence_score": confidence_score,
            "confidence_label": confidence_label,
            "source": source,
            "studies": studies,
            "flag": flag
        })

    # Display results
    print()
    print("=" * 65)
    print(f"NOURA PRODUCT SCAN: {product_name.upper()}")
    print("=" * 65)
    print(f"{'Ingredient':<28} {'Score':>6} {'Verdict':<16} {'Confidence':<10} {'Src'}")
    print("-" * 65)

    results_sorted = sorted(results,
                            key=lambda x: (x["health_score"] or 0),
                            reverse=True)

    for r in results_sorted:
        score_display = f"{r['health_score']}" if r['health_score'] is not None else "N/A"
        flag_marker = " ⚠" if r["flag"] else ""
        print(f"{r['ingredient']:<28} {score_display:>6} {r['verdict']:<16} "
              f"{r['confidence_label']:<10} {r['source']}{flag_marker}")

    print("=" * 65)

    # Product summary
    scored = [r for r in results if r["health_score"] is not None]
    if scored:
        avg_score = round(sum(r["health_score"] for r in scored) / len(scored), 1)
        higher_risk_count = len([r for r in results if r["verdict"] == "HIGHER RISK"])

        print()
        print(f"Product Average Score:    {avg_score}/100")
        print(f"Higher Risk Ingredients:  {higher_risk_count}")
        print(f"KB lookups (instant):     {kb_hits}")
        print(f"Live searches:            {live_hits}")

        if higher_risk_count == 0 and avg_score >= 70:
            product_verdict = "CLEAN FORMULATION"
        elif higher_risk_count >= 2 or avg_score < 40:
            product_verdict = "FORMULATION CONCERNS"
        else:
            product_verdict = "MIXED FORMULATION"

        print(f"Product Verdict:          {product_verdict}")

    print()
    flagged = [r for r in results if r["flag"]]
    if flagged:
        print("FLAGS:")
        for r in flagged:
            print(f"  ⚠ {r['ingredient']}: {r['flag'][:100]}")

    print("=" * 65)
    return results_sorted


# Test V3 with same moisturizer label
test_label = """
Aqua, Glycerin, Niacinamide, Cetearyl Alcohol, Dimethicone,
Phenoxyethanol, Sodium Hyaluronate, Tocopheryl Acetate,
Carbomer, Sodium PCA, Fragrance, Parabens,
Disodium EDTA, Xanthan Gum, Citric Acid
"""

noura_scan_product_v2("Test Moisturizer V3", test_label, max_ingredients=12)

NOURA Product Scan: Test Moisturizer V3
Ingredients detected: 14
Evaluating top 12...

  [3/12] Live search: cetearyl alcohol...
  [5/12] Live search: phenoxyethanol...
  [7/12] Live search: tocopheryl acetate...
  [8/12] Live search: carbomer...
  [9/12] Live search: sodium pca...
  [12/12] Live search: disodium edta...

NOURA PRODUCT SCAN: TEST MOISTURIZER V3
Ingredient                    Score Verdict          Confidence Src
-----------------------------------------------------------------
sodium hyaluronate               88 WELL SUPPORTED   VERY HIGH  KB
glycerin                         85 WELL SUPPORTED   VERY HIGH  KB
niacinamide                      82 WELL SUPPORTED   VERY HIGH  KB
dimethicone                      71 WELL SUPPORTED   VERY HIGH  KB ⚠
tocopheryl acetate             70.2 LIMITED SUPPORT  VERY HIGH  Live
phenoxyethanol                 57.4 LIMITED SUPPORT  VERY HIGH  Live
sodium pca                     54.2 LIMITED SUPPORT  MODERATE   Live
carbomer                 

[{'ingredient': 'sodium hyaluronate',
  'health_score': 88,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 97,
  'confidence_label': 'VERY HIGH',
  'source': 'KB',
  'studies': 350,
  'flag': None},
 {'ingredient': 'glycerin',
  'health_score': 85,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 95,
  'confidence_label': 'VERY HIGH',
  'source': 'KB',
  'studies': 180,
  'flag': None},
 {'ingredient': 'niacinamide',
  'health_score': 82,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 96,
  'confidence_label': 'VERY HIGH',
  'source': 'KB',
  'studies': 200,
  'flag': None},
 {'ingredient': 'dimethicone',
  'health_score': 71,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 85,
  'confidence_label': 'VERY HIGH',
  'source': 'KB',
  'studies': 150,
  'flag': 'Environmental persistence — not readily biodegradable'},
 {'ingredient': 'tocopheryl acetate',
  'health_score': 70.2,
  'verdict': 'LIMITED SUPPORT',
  'confidence_score': 83,
  'confidence_label': 'VERY HIGH'

In [None]:
!pip install anthropic -q

# NOURA - Cell 16: Claude API evidence classifier
# Replaces keyword matching with LLM reasoning for direction classification

import anthropic

def classify_direction_with_claude(title, abstract, ingredient):
    """
    Uses Claude to determine if a study supports SAFETY or raises CONCERN
    for a specific ingredient. Far more accurate than keyword matching.
    """
    if not abstract:
        # No abstract — fall back to keyword classifier
        return classify_evidence_direction(title, "")

    client = anthropic.Anthropic()

    prompt = f"""You are a scientific evidence classifier for a cosmetic ingredient safety system.

Ingredient being evaluated: {ingredient}

Study title: {title}

Study abstract: {abstract}

Task: Determine if this study's findings support the SAFETY of {ingredient} in cosmetic use, raise a CONCERN about its safety, or are NEUTRAL (mechanistic, descriptive, or inconclusive).

Rules:
- SAFETY: Study shows the ingredient is safe, effective, well-tolerated, or beneficial
- CONCERN: Study shows the ingredient causes harm, has toxic effects, disrupts hormones, or raises risk signals specifically attributable to this ingredient
- NEUTRAL: Study describes mechanisms, delivery systems, or combinations without clear safety direction; or the concern is about a different ingredient in the same study

Important: If the study mentions the ingredient being used TO TREAT a disease (e.g. cancer treatment), classify as SAFETY not CONCERN.

Respond with exactly one word: SAFETY, CONCERN, or NEUTRAL"""

    try:
        response = client.messages.create(
            model="claude-haiku-4-5-20251001",
            max_tokens=10,
            messages=[{"role": "user", "content": prompt}]
        )
        result = response.content[0].text.strip().upper()
        if result in ["SAFETY", "CONCERN", "NEUTRAL"]:
            return result
        else:
            return classify_evidence_direction(title, abstract)
    except Exception as e:
        # Fall back to keyword classifier if API fails
        return classify_evidence_direction(title, abstract)


def classify_evidence_direction_smart(title, abstract="", ingredient="ingredient",
                                       use_claude=True):
    """
    Smart classifier: uses Claude when abstract is available,
    falls back to keyword matching otherwise.
    """
    if use_claude and abstract and len(abstract) > 50:
        return classify_direction_with_claude(title, abstract, ingredient)
    else:
        return classify_evidence_direction(title, abstract)


# Test on our known cases
print("=== CLAUDE CLASSIFICATION TEST ===")
print()

test_cases = [
    {
        "ingredient": "niacinamide",
        "title": "Niacinamide: a review on dermal delivery strategies and clinical evidence.",
        "abstract": "Niacinamide, an active form of vitamin B3, is recognised for its significant dermal benefits including skin brightening, anti-ageing properties and the protection of the skin barrier. Its widespread incorporation into cosmetic products is attributed to its safety profile and proven efficacy. Recently, topical niacinamide has also been explored for other pharmaceutical applications, including skin cancers."
    },
    {
        "ingredient": "parabens",
        "title": "Polycystic Ovary Syndrome and Endocrine Disruptors (Bisphenols, Parabens, and Triclosan)-A Systematic Review.",
        "abstract": "Exposure to endocrine disrupting chemicals (EDCs) can result in alterations of the female reproductive system, including polycystic ovary syndrome (PCOS). The aim of this review was to summarize the knowledge about the association of EDCs with PCOS. We evaluated the association of PCOS with bisphenols, parabens and triclosan and found significant associations."
    },
    {
        "ingredient": "niacinamide",
        "title": "Niacinamide enhances cathelicidin mediated SARS-CoV-2 membrane disruption.",
        "abstract": "Niacinamide was found to enhance the antimicrobial peptide cathelicidin's ability to disrupt the SARS-CoV-2 viral membrane, suggesting a potential therapeutic application against COVID-19."
    },
    {
        "ingredient": "parabens",
        "title": "Parabens disrupt non-canonical inflammasome activation.",
        "abstract": "Parabens are synthetic chemicals widely used as preservatives. Study of possible health hazards has been undertaken due to frequent exposure. We elucidated the effect of parabens on inflammasome induction of inflammatory responses in innate immunity."
    }
]

for tc in test_cases:
    keyword_result = classify_evidence_direction(tc["title"], tc["abstract"])
    claude_result = classify_direction_with_claude(tc["title"], tc["abstract"], tc["ingredient"])
    match = "✓" if keyword_result == claude_result else "✗ DIFFERS"
    print(f"Ingredient: {tc['ingredient']}")
    print(f"Title: {tc['title'][:60]}...")
    print(f"Keyword: {keyword_result}  |  Claude: {claude_result}  {match}")
    print()

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/455.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━[0m [32m307.2/455.2 kB[0m [31m9.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m455.2/455.2 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25h=== CLAUDE CLASSIFICATION TEST ===

Ingredient: niacinamide
Title: Niacinamide: a review on dermal delivery strategies and clin...
Keyword: SAFETY  |  Claude: SAFETY  ✓

Ingredient: parabens
Title: Polycystic Ovary Syndrome and Endocrine Disruptors (Bispheno...
Keyword: CONCERN  |  Claude: CONCERN  ✓

Ingredient: niacinamide
Title: Niacinamide enhances cathelicidin mediated SARS-CoV-2 membra...
Keyword: SAFETY  |  Claude: SAFETY  ✓

Ingredient: parabens
Title: Parabens disrupt non-canonical inflammasome activation....
Keyword: NEUTRAL  |  Claude: NEUTRAL  ✓



In [None]:
# NOURA - Cell 17: Full pipeline with Claude classification + interaction engine

def noura_scan_product_v3(product_name, raw_label, category="skincare",
                           max_ingredients=12, skip_water=True, use_claude=True):
    """
    V3 product scanner with Claude-powered classification and interaction detection.
    KB for known ingredients, Claude-classified live search for unknowns.
    """
    print(f"NOURA Product Scan: {product_name}")
    print(f"Classification: {'Claude API' if use_claude else 'Keyword matching'}")
    print("=" * 65)

    ingredients = parse_ingredient_list(raw_label)
    if skip_water:
        ingredients = [i for i in ingredients if i not in ["water", "aqua"]]

    print(f"Ingredients detected: {len(ingredients)}")
    print(f"Evaluating top {min(max_ingredients, len(ingredients))}...")
    print()

    ingredients_to_evaluate = ingredients[:max_ingredients]
    results = []
    kb_hits = 0
    live_hits = 0

    for i, ingredient in enumerate(ingredients_to_evaluate):
        kb_entry = kb_lookup(ingredient)

        if kb_entry:
            kb_hits += 1
            results.append({
                "ingredient": ingredient,
                "health_score": kb_entry["health_score"],
                "verdict": kb_entry["verdict"],
                "confidence_score": kb_entry["confidence_score"],
                "confidence_label": kb_entry["confidence_label"],
                "source": "KB",
                "studies": kb_entry["studies_reviewed"],
                "flag": (" | ".join(kb_entry["concern_flags"])) if kb_entry["concern_flags"] else None
            })
        else:
            live_hits += 1
            print(f"  [{i+1}/{len(ingredients_to_evaluate)}] Live search: {ingredient}...")
            pubmed_results = search_pubmed_normalized(ingredient, max_results=30)

            evaluated = []
            direction_count = {"SAFETY": 0, "CONCERN": 0, "NEUTRAL": 0}

            for study in pubmed_results.get("studies", []):
                abstract = study.get("abstract", "")
                source_type = classify_evidence_type(study["title"], abstract)
                direction = classify_evidence_direction_smart(
                    study["title"], abstract, ingredient, use_claude=use_claude
                )
                ev = evaluate_evidence(source_type)
                ev["study_title"] = study["title"][:80]
                ev["year"] = study["year"]
                ev["direction"] = direction
                ev["sample_size"] = study.get("sample_size", None)
                ev["abstract"] = abstract[:500]
                evaluated.append(ev)
                direction_count[direction] += 1

            score_result = calculate_direction_aware_score(evaluated)
            conf = calculate_confidence(evaluated, direction_count,
                                        pubmed_results["studies_found"])

            results.append({
                "ingredient": ingredient,
                "health_score": score_result["score"],
                "verdict": score_result["verdict"],
                "confidence_score": conf["confidence_score"],
                "confidence_label": conf["confidence_label"],
                "source": "Live+Claude" if use_claude else "Live",
                "studies": pubmed_results["studies_found"],
                "flag": score_result["flag"]
            })
            time.sleep(1)

    # Display results table
    print()
    print("=" * 65)
    print(f"NOURA PRODUCT SCAN: {product_name.upper()}")
    print("=" * 65)
    print(f"{'Ingredient':<28} {'Score':>6} {'Verdict':<16} {'Confidence':<10} {'Src'}")
    print("-" * 65)

    results_sorted = sorted(results,
                            key=lambda x: (x["health_score"] or 0),
                            reverse=True)

    for r in results_sorted:
        score_display = f"{r['health_score']}" if r['health_score'] is not None else "N/A"
        flag_marker = " ⚠" if r["flag"] else ""
        print(f"{r['ingredient']:<28} {score_display:>6} {r['verdict']:<16} "
              f"{r['confidence_label']:<10} {r['source']}{flag_marker}")

    print("=" * 65)

    # Product summary
    scored = [r for r in results if r["health_score"] is not None]
    if scored:
        avg_score = round(sum(r["health_score"] for r in scored) / len(scored), 1)
        higher_risk_count = len([r for r in results if r["verdict"] == "HIGHER RISK"])

        print()
        print(f"Product Average Score:    {avg_score}/100")
        print(f"Higher Risk Ingredients:  {higher_risk_count}")
        print(f"KB lookups (instant):     {kb_hits}")
        print(f"Live + Claude searches:   {live_hits}")

        if higher_risk_count == 0 and avg_score >= 70:
            product_verdict = "CLEAN FORMULATION"
        elif higher_risk_count >= 2 or avg_score < 40:
            product_verdict = "FORMULATION CONCERNS"
        else:
            product_verdict = "MIXED FORMULATION"

        print(f"Product Verdict:          {product_verdict}")

    # Ingredient flags
    print()
    flagged = [r for r in results if r["flag"]]
    if flagged:
        print("INGREDIENT FLAGS:")
        for r in flagged:
            print(f"  ⚠ {r['ingredient']}: {r['flag'][:120]}")

    # Interaction analysis
    all_ingredient_names = [r["ingredient"] for r in results]
    interactions = check_formula_interactions(all_ingredient_names)
    if interactions:
        # Deduplicate — remove exact duplicate descriptions
        seen_descriptions = set()
        unique_interactions = []
        for interaction in interactions:
            key = interaction["description"][:60]
            if key not in seen_descriptions:
                seen_descriptions.add(key)
                unique_interactions.append(interaction)

        print()
        display_interactions(unique_interactions, product_name)
    else:
        print()
        print("✓ No known ingredient interactions detected.")

    print("=" * 65)
    return results_sorted


# Test 1: Clean beauty serum
clean_label = """
Aqua, Aloe Barbadensis Leaf Juice, Glycerin, Niacinamide,
Sodium Hyaluronate, Ascorbic Acid, Tocopherol, Zinc Oxide,
Centella Asiatica Extract, Bakuchiol, Xanthan Gum,
Citric Acid, Sodium Benzoate
"""

noura_scan_product_v3("Clean Beauty Serum", clean_label, max_ingredients=12)

print()
print()

# Test 2: Concerning formula
concerning_label = """
Aqua, Glycerin, Retinol, Glycolic Acid, Ascorbic Acid,
Niacinamide, Sodium Benzoate, Parabens, Fragrance,
Salicylic Acid, Dimethicone, Xanthan Gum
"""

noura_scan_product_v3("Concerning Formula Test", concerning_label, max_ingredients=12)

NOURA Product Scan: Clean Beauty Serum
Classification: Claude API
Ingredients detected: 12
Evaluating top 12...

  [1/12] Live search: aloe barbadensis leaf juice...
  [8/12] Live search: centella asiatica extract...
  [9/12] Live search: bakuchiol...

NOURA PRODUCT SCAN: CLEAN BEAUTY SERUM
Ingredient                    Score Verdict          Confidence Src
-----------------------------------------------------------------
sodium hyaluronate               88 WELL SUPPORTED   VERY HIGH  KB
glycerin                         85 WELL SUPPORTED   VERY HIGH  KB
xanthan gum                      84 WELL SUPPORTED   VERY HIGH  KB
niacinamide                      82 WELL SUPPORTED   VERY HIGH  KB
citric acid                      82 WELL SUPPORTED   VERY HIGH  KB ⚠
zinc oxide                       79 WELL SUPPORTED   VERY HIGH  KB ⚠
tocopherol                       78 WELL SUPPORTED   VERY HIGH  KB ⚠
aloe barbadensis leaf juice    73.4 WELL SUPPORTED   MODERATE   Live+Claude
ascorbic acid          

[{'ingredient': 'glycerin',
  'health_score': 85,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 95,
  'confidence_label': 'VERY HIGH',
  'source': 'KB',
  'studies': 180,
  'flag': None},
 {'ingredient': 'xanthan gum',
  'health_score': 84,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 88,
  'confidence_label': 'VERY HIGH',
  'source': 'KB',
  'studies': 90,
  'flag': None},
 {'ingredient': 'niacinamide',
  'health_score': 82,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 96,
  'confidence_label': 'VERY HIGH',
  'source': 'KB',
  'studies': 200,
  'flag': None},
 {'ingredient': 'ascorbic acid',
  'health_score': 72,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 89,
  'confidence_label': 'VERY HIGH',
  'source': 'KB',
  'studies': 280,
  'flag': 'Stability concerns — degrades rapidly if poorly formulated'},
 {'ingredient': 'dimethicone',
  'health_score': 71,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 85,
  'confidence_label': 'VERY HIGH',
  'source

In [None]:
# NOURA - Cell 18: Formula interaction engine

KNOWN_INTERACTIONS = {

    # DANGEROUS combinations
    "sodium benzoate + ascorbic acid": {
        "ingredients": ["sodium benzoate", "ascorbic acid"],
        "severity": "HIGH",
        "interaction_type": "chemical reaction",
        "description": "Sodium benzoate reacts with ascorbic acid (vitamin C) in the presence of light and heat to form benzene, a known carcinogen.",
        "recommendation": "Avoid this combination. Use alternative preservatives or vitamin C derivatives that don't react."
    },

    "retinol + aha": {
        "ingredients": ["retinol", "glycolic acid", "lactic acid", "mandelic acid"],
        "severity": "MODERATE",
        "interaction_type": "pH conflict + irritation",
        "description": "AHAs work at low pH (3.0-3.5) which destabilizes retinol and increases skin irritation risk significantly.",
        "recommendation": "Use retinol and AHAs at separate times — AHAs in the morning, retinol at night. Never layer simultaneously."
    },

    "retinol + vitamin c": {
        "ingredients": ["retinol", "ascorbic acid"],
        "severity": "MODERATE",
        "interaction_type": "pH conflict",
        "description": "Vitamin C (ascorbic acid) requires acidic pH (below 3.5) while retinol is most stable at neutral pH. Combining destabilizes both actives.",
        "recommendation": "Use vitamin C in the morning routine, retinol at night."
    },

    "niacinamide + vitamin c": {
        "ingredients": ["niacinamide", "ascorbic acid"],
        "severity": "LOW",
        "interaction_type": "efficacy reduction",
        "description": "High concentrations of niacinamide and ascorbic acid can form niacin which may cause temporary skin flushing. Modern formulations at typical concentrations show minimal interaction.",
        "recommendation": "Use at typical cosmetic concentrations (under 10% each). Minimal concern in well-formulated products."
    },

    "benzoyl peroxide + retinol": {
        "ingredients": ["benzoyl peroxide", "retinol"],
        "severity": "HIGH",
        "interaction_type": "oxidation",
        "description": "Benzoyl peroxide oxidizes and degrades retinol, rendering both ineffective and potentially generating irritating byproducts.",
        "recommendation": "Never combine. Use in completely separate routines."
    },

    "aha + bha simultaneous": {
        "ingredients": ["glycolic acid", "salicylic acid"],
        "severity": "MODERATE",
        "interaction_type": "over-exfoliation",
        "description": "Combining AHA and BHA acids simultaneously increases risk of over-exfoliation, barrier damage, and sensitization.",
        "recommendation": "Alternate use — AHAs and BHAs on different days, or use pre-formulated combinations at lower concentrations."
    },

    "vitamin c + niacinamide high dose": {
        "ingredients": ["ascorbic acid", "niacinamide"],
        "severity": "LOW",
        "interaction_type": "potential flushing",
        "description": "At high concentrations both can form niacin causing temporary flushing. At standard cosmetic concentrations the risk is minimal.",
        "recommendation": "Keep both under 10% concentration. Minimal concern in typical formulations."
    },

    "copper peptides + vitamin c": {
        "ingredients": ["copper peptides", "ascorbic acid"],
        "severity": "MODERATE",
        "interaction_type": "oxidation",
        "description": "Vitamin C can oxidize copper peptides, reducing efficacy of both. Copper can also pro-oxidize vitamin C.",
        "recommendation": "Use in separate routines — copper peptides at night, vitamin C in the morning."
    },

    "parabens + fragrance": {
        "ingredients": ["parabens", "fragrance"],
        "severity": "HIGH",
        "interaction_type": "cumulative endocrine risk",
        "description": "Both parabens and certain fragrance components are known endocrine disruptors. Combined exposure increases total endocrine disruption burden.",
        "recommendation": "Avoid products containing both. Seek paraben-free, fragrance-free alternatives."
    },

    "triclosan + alcohol": {
        "ingredients": ["triclosan", "alcohol denat"],
        "severity": "MODERATE",
        "interaction_type": "absorption enhancement",
        "description": "Alcohol enhances skin penetration of triclosan, increasing systemic exposure to this endocrine disruptor.",
        "recommendation": "Avoid triclosan-containing products entirely. Use safer antimicrobial alternatives."
    },
}


def check_formula_interactions(ingredients):
    """
    Checks a list of ingredients for known dangerous interactions.
    Returns list of interactions found, sorted by severity.
    """
    ingredients_lower = [i.lower().strip() for i in ingredients]

    # Also expand via normalizer — catch vitamin c = ascorbic acid etc
    expanded = set(ingredients_lower)
    for ing in ingredients_lower:
        search_terms = normalize_ingredient(ing)
        expanded.update(search_terms)

    found_interactions = []

    for interaction_key, interaction_data in KNOWN_INTERACTIONS.items():
        involved = interaction_data["ingredients"]

        # Check how many of the interaction ingredients are present
        matches = []
        for involved_ing in involved:
            for formula_ing in expanded:
                if involved_ing in formula_ing or formula_ing in involved_ing:
                    matches.append(involved_ing)
                    break

        # Need at least 2 ingredients to have an interaction
        if len(matches) >= 2:
            found_interactions.append({
                "interaction": interaction_key,
                "severity": interaction_data["severity"],
                "type": interaction_data["interaction_type"],
                "description": interaction_data["description"],
                "recommendation": interaction_data["recommendation"],
                "matched_ingredients": matches
            })

    # Sort by severity
    severity_order = {"HIGH": 0, "MODERATE": 1, "LOW": 2}
    found_interactions.sort(key=lambda x: severity_order.get(x["severity"], 3))

    return found_interactions


def display_interactions(interactions, product_name=""):
    """Display interaction report."""
    if not interactions:
        print("✓ No known ingredient interactions detected.")
        return

    high = [i for i in interactions if i["severity"] == "HIGH"]
    moderate = [i for i in interactions if i["severity"] == "MODERATE"]
    low = [i for i in interactions if i["severity"] == "LOW"]

    print(f"NOURA INTERACTION ANALYSIS{': ' + product_name if product_name else ''}")
    print("=" * 65)
    print(f"Interactions found: {len(interactions)} "
          f"({len(high)} high | {len(moderate)} moderate | {len(low)} low)")
    print()

    for interaction in interactions:
        severity = interaction["severity"]
        marker = "🔴" if severity == "HIGH" else "🟡" if severity == "MODERATE" else "🟢"

        print(f"{marker} [{severity}] {interaction['type'].upper()}")
        print(f"   Ingredients: {' + '.join(interaction['matched_ingredients'])}")
        print(f"   {interaction['description']}")
        print(f"   → {interaction['recommendation']}")
        print()

    print("=" * 65)


# Test on our clean beauty serum
print("Testing interaction engine on Clean Beauty Serum...")
print()

clean_serum_ingredients = [
    "aloe barbadensis leaf juice", "glycerin", "niacinamide",
    "sodium hyaluronate", "ascorbic acid", "tocopherol", "zinc oxide",
    "centella asiatica extract", "bakuchiol", "xanthan gum",
    "citric acid", "sodium benzoate"
]

interactions = check_formula_interactions(clean_serum_ingredients)
display_interactions(interactions, "Clean Beauty Serum")

print()
print("Testing on concerning formula...")
print()

concerning_ingredients = [
    "aqua", "glycerin", "retinol", "glycolic acid",
    "ascorbic acid", "niacinamide", "sodium benzoate",
    "parabens", "fragrance", "salicylic acid"
]

interactions2 = check_formula_interactions(concerning_ingredients)
display_interactions(interactions2, "Concerning Formula")

Testing interaction engine on Clean Beauty Serum...

NOURA INTERACTION ANALYSIS: Clean Beauty Serum
Interactions found: 3 (1 high | 0 moderate | 2 low)

🔴 [HIGH] CHEMICAL REACTION
   Ingredients: sodium benzoate + ascorbic acid
   Sodium benzoate reacts with ascorbic acid (vitamin C) in the presence of light and heat to form benzene, a known carcinogen.
   → Avoid this combination. Use alternative preservatives or vitamin C derivatives that don't react.

🟢 [LOW] EFFICACY REDUCTION
   Ingredients: niacinamide + ascorbic acid
   High concentrations of niacinamide and ascorbic acid can form niacin which may cause temporary skin flushing. Modern formulations at typical concentrations show minimal interaction.
   → Use at typical cosmetic concentrations (under 10% each). Minimal concern in well-formulated products.

🟢 [LOW] POTENTIAL FLUSHING
   Ingredients: ascorbic acid + niacinamide
   At high concentrations both can form niacin causing temporary flushing. At standard cosmetic concentrat

In [None]:
# NOURA - Cell 19: API wrapper — clean callable interface
# This is the enterprise-ready packaging layer
# Version 1.0

import json
from datetime import datetime

def noura_api(request_type, **kwargs):
    """
    NOURA Public API — single entry point for all evaluations.

    Request types:
    - "ingredient"  : evaluate a single ingredient
    - "batch"       : evaluate multiple ingredients
    - "product"     : scan a full product label
    - "compare"     : compare two ingredients head to head
    - "interaction" : check interactions between ingredients

    Returns: structured JSON response
    """

    timestamp = datetime.now().isoformat()

    # ================================================================
    # INGREDIENT EVALUATION
    # ================================================================
    if request_type == "ingredient":
        ingredient = kwargs.get("ingredient", "")
        category = kwargs.get("category", "skincare")
        use_claude = kwargs.get("use_claude", True)

        if not ingredient:
            return {"error": "ingredient parameter required", "status": 400}

        kb_entry = kb_lookup(ingredient)

        if kb_entry:
            return {
                "status": 200,
                "request_type": "ingredient",
                "timestamp": timestamp,
                "ingredient": ingredient,
                "inci_name": kb_entry["inci_name"],
                "health_score": kb_entry["health_score"],
                "verdict": kb_entry["verdict"],
                "confidence_score": kb_entry["confidence_score"],
                "confidence_label": kb_entry["confidence_label"],
                "evidence_source": "NOURA Knowledge Base",
                "studies_reviewed": kb_entry["studies_reviewed"],
                "evidence_summary": kb_entry["evidence_summary"],
                "key_evidence": kb_entry["key_evidence"],
                "concern_flags": kb_entry["concern_flags"],
                "safety_notes": kb_entry["safety_notes"],
                "regulatory_status": kb_entry["regulatory_status"],
                "last_updated": kb_entry["last_updated"]
            }
        else:
            # Live search
            pubmed_results = search_pubmed_normalized(ingredient, max_results=50)
            evaluated = []
            direction_count = {"SAFETY": 0, "CONCERN": 0, "NEUTRAL": 0}

            for study in pubmed_results.get("studies", []):
                abstract = study.get("abstract", "")
                source_type = classify_evidence_type(study["title"], abstract)
                direction = classify_evidence_direction_smart(
                    study["title"], abstract, ingredient, use_claude=use_claude
                )
                ev = evaluate_evidence(source_type)
                ev["study_title"] = study["title"][:80]
                ev["year"] = study["year"]
                ev["direction"] = direction
                ev["sample_size"] = study.get("sample_size", None)
                ev["abstract"] = abstract[:300]
                ev["pubmed_url"] = study.get("pubmed_url", "")
                evaluated.append(ev)
                direction_count[direction] += 1

            score_result = calculate_direction_aware_score(evaluated)
            confidence = calculate_confidence(
                evaluated, direction_count, pubmed_results["studies_found"]
            )

            return {
                "status": 200,
                "request_type": "ingredient",
                "timestamp": timestamp,
                "ingredient": ingredient,
                "inci_name": ingredient.title(),
                "health_score": score_result["score"],
                "verdict": score_result["verdict"],
                "confidence_score": confidence["confidence_score"],
                "confidence_label": confidence["confidence_label"],
                "evidence_source": "PubMed Live Search",
                "studies_retrieved": pubmed_results["studies_found"],
                "evidence_direction": direction_count,
                "concern_flags": [score_result["flag"]] if score_result["flag"] else [],
                "safety_notes": score_result["flag"] or "No specific concerns identified in retrieved literature",
                "top_studies": [
                    {
                        "title": e["study_title"],
                        "year": e["year"],
                        "direction": e["direction"],
                        "evidence_type": e["source_type"],
                        "weight": e["weight"],
                        "sample_size": e["sample_size"],
                        "url": e["pubmed_url"]
                    }
                    for e in evaluated[:5]
                ]
            }

    # ================================================================
    # BATCH EVALUATION
    # ================================================================
    elif request_type == "batch":
        ingredients = kwargs.get("ingredients", [])
        category = kwargs.get("category", "skincare")

        if not ingredients:
            return {"error": "ingredients list required", "status": 400}

        results = []
        for ingredient in ingredients:
            result = noura_api("ingredient", ingredient=ingredient,
                               category=category, use_claude=False)
            results.append({
                "ingredient": ingredient,
                "health_score": result.get("health_score"),
                "verdict": result.get("verdict"),
                "confidence_label": result.get("confidence_label"),
                "concern_flags": result.get("concern_flags", [])
            })
            time.sleep(1)

        results_sorted = sorted(results,
                                key=lambda x: (x["health_score"] or 0),
                                reverse=True)

        scored = [r for r in results if r["health_score"] is not None]
        avg_score = round(sum(r["health_score"] for r in scored) / len(scored), 1) if scored else None
        higher_risk = len([r for r in results if r["verdict"] == "HIGHER RISK"])

        return {
            "status": 200,
            "request_type": "batch",
            "timestamp": timestamp,
            "ingredients_evaluated": len(results),
            "average_score": avg_score,
            "higher_risk_count": higher_risk,
            "results": results_sorted
        }

    # ================================================================
    # PRODUCT SCAN
    # ================================================================
    elif request_type == "product":
        product_name = kwargs.get("product_name", "Unknown Product")
        raw_label = kwargs.get("raw_label", "")
        max_ingredients = kwargs.get("max_ingredients", 15)

        if not raw_label:
            return {"error": "raw_label parameter required", "status": 400}

        ingredients = parse_ingredient_list(raw_label)
        ingredients = [i for i in ingredients if i not in ["water", "aqua"]]
        ingredients_to_evaluate = ingredients[:max_ingredients]

        results = []
        for ingredient in ingredients_to_evaluate:
            result = noura_api("ingredient", ingredient=ingredient, use_claude=False)
            results.append({
                "ingredient": ingredient,
                "health_score": result.get("health_score"),
                "verdict": result.get("verdict"),
                "confidence_score": result.get("confidence_score"),
                "confidence_label": result.get("confidence_label"),
                "evidence_source": result.get("evidence_source"),
                "concern_flags": result.get("concern_flags", [])
            })
            time.sleep(0.5)

        results_sorted = sorted(results,
                                key=lambda x: (x["health_score"] or 0),
                                reverse=True)

        scored = [r for r in results if r["health_score"] is not None]
        avg_score = round(sum(r["health_score"] for r in scored) / len(scored), 1) if scored else None
        higher_risk = len([r for r in results if r["verdict"] == "HIGHER RISK"])

        if higher_risk == 0 and avg_score and avg_score >= 70:
            product_verdict = "CLEAN FORMULATION"
        elif higher_risk >= 2 or (avg_score and avg_score < 40):
            product_verdict = "FORMULATION CONCERNS"
        else:
            product_verdict = "MIXED FORMULATION"

        interactions = check_formula_interactions([r["ingredient"] for r in results])
        seen = set()
        unique_interactions = []
        for i in interactions:
            key = i["description"][:60]
            if key not in seen:
                seen.add(key)
                unique_interactions.append(i)

        return {
            "status": 200,
            "request_type": "product",
            "timestamp": timestamp,
            "product_name": product_name,
            "ingredients_detected": len(ingredients),
            "ingredients_evaluated": len(results),
            "average_score": avg_score,
            "higher_risk_count": higher_risk,
            "product_verdict": product_verdict,
            "ingredients": results_sorted,
            "interactions": [
                {
                    "severity": i["severity"],
                    "type": i["type"],
                    "ingredients": i["matched_ingredients"],
                    "description": i["description"],
                    "recommendation": i["recommendation"]
                }
                for i in unique_interactions
            ]
        }

    # ================================================================
    # COMPARE TWO INGREDIENTS
    # ================================================================
    elif request_type == "compare":
        ingredient_a = kwargs.get("ingredient_a", "")
        ingredient_b = kwargs.get("ingredient_b", "")

        if not ingredient_a or not ingredient_b:
            return {"error": "ingredient_a and ingredient_b required", "status": 400}

        result_a = noura_api("ingredient", ingredient=ingredient_a, use_claude=False)
        time.sleep(1)
        result_b = noura_api("ingredient", ingredient=ingredient_b, use_claude=False)

        winner = ingredient_a if (result_a.get("health_score") or 0) >= (result_b.get("health_score") or 0) else ingredient_b

        return {
            "status": 200,
            "request_type": "compare",
            "timestamp": timestamp,
            "ingredient_a": {
                "name": ingredient_a,
                "health_score": result_a.get("health_score"),
                "verdict": result_a.get("verdict"),
                "confidence_label": result_a.get("confidence_label")
            },
            "ingredient_b": {
                "name": ingredient_b,
                "health_score": result_b.get("health_score"),
                "verdict": result_b.get("verdict"),
                "confidence_label": result_b.get("confidence_label")
            },
            "safer_ingredient": winner
        }

    # ================================================================
    # INTERACTION CHECK
    # ================================================================
    elif request_type == "interaction":
        ingredients = kwargs.get("ingredients", [])

        if len(ingredients) < 2:
            return {"error": "at least 2 ingredients required", "status": 400}

        interactions = check_formula_interactions(ingredients)
        seen = set()
        unique = []
        for i in interactions:
            key = i["description"][:60]
            if key not in seen:
                seen.add(key)
                unique.append(i)

        return {
            "status": 200,
            "request_type": "interaction",
            "timestamp": timestamp,
            "ingredients_checked": ingredients,
            "interactions_found": len(unique),
            "interactions": [
                {
                    "severity": i["severity"],
                    "type": i["type"],
                    "ingredients": i["matched_ingredients"],
                    "description": i["description"],
                    "recommendation": i["recommendation"]
                }
                for i in unique
            ]
        }

    else:
        return {"error": f"Unknown request_type: {request_type}", "status": 400}


# ================================================================
# API TEST SUITE
# ================================================================
print("=== NOURA API v1.0 TEST SUITE ===")
print()

# Test 1: Single ingredient
print("TEST 1: Single ingredient lookup")
result = noura_api("ingredient", ingredient="niacinamide")
print(json.dumps({
    "ingredient": result["ingredient"],
    "health_score": result["health_score"],
    "verdict": result["verdict"],
    "confidence_label": result["confidence_label"],
    "evidence_source": result["evidence_source"]
}, indent=2))
print()

# Test 2: Compare
print("TEST 2: Compare two preservatives")
result = noura_api("compare", ingredient_a="parabens", ingredient_b="phenoxyethanol")
print(json.dumps(result, indent=2))
print()

# Test 3: Interaction check
print("TEST 3: Interaction check")
result = noura_api("interaction",
                   ingredients=["retinol", "ascorbic acid", "sodium benzoate", "glycolic acid"])
print(json.dumps(result, indent=2))
print()

# Test 4: Product scan
print("TEST 4: Product scan")
result = noura_api("product",
                   product_name="Test Moisturizer",
                   raw_label="Aqua, Glycerin, Niacinamide, Parabens, Fragrance, Hyaluronic Acid",
                   max_ingredients=6)
print(json.dumps({
    "product_name": result["product_name"],
    "average_score": result["average_score"],
    "product_verdict": result["product_verdict"],
    "higher_risk_count": result["higher_risk_count"],
    "interactions_found": len(result["interactions"])
}, indent=2))

=== NOURA API v1.0 TEST SUITE ===

TEST 1: Single ingredient lookup
{
  "ingredient": "niacinamide",
  "health_score": 82,
  "verdict": "WELL SUPPORTED",
  "confidence_label": "VERY HIGH",
  "evidence_source": "NOURA Knowledge Base"
}

TEST 2: Compare two preservatives
{
  "status": 200,
  "request_type": "compare",
  "timestamp": "2026-02-27T11:12:41.765445",
  "ingredient_a": {
    "name": "parabens",
    "health_score": 18,
    "verdict": "HIGHER RISK",
    "confidence_label": "VERY HIGH"
  },
  "ingredient_b": {
    "name": "phenoxyethanol",
    "health_score": 52.2,
    "verdict": "LIMITED SUPPORT",
    "confidence_label": "HIGH"
  },
  "safer_ingredient": "phenoxyethanol"
}

TEST 3: Interaction check
{
  "status": 200,
  "request_type": "interaction",
  "timestamp": "2026-02-27T11:12:46.746068",
  "ingredients_checked": [
    "retinol",
    "ascorbic acid",
    "sodium benzoate",
    "glycolic acid"
  ],
  "interactions_found": 3,
  "interactions": [
    {
      "severity": "HIGH

In [None]:
# NOURA - Cell 20: API v2.0 — depth parameter, Public Extract, Full Report
# One API. One endpoint. depth="extract" or depth="full"

import json
from datetime import datetime

# ================================================================
# PUBLIC EXTRACT FORMATTER
# Takes a full evaluation result and distills it to
# the 5 consumer-facing fields
# ================================================================

def format_public_extract(product_name, avg_score, higher_risk_count,
                           product_verdict, ingredients, interactions):
    """
    Distills a full NOURA evaluation into a consumer-facing extract.
    Designed for product pages, spa menus, shelf tags, chatbot responses.
    """

    # Color signal
    if avg_score is None:
        color_signal = "⚪"
        color_label = "INSUFFICIENT DATA"
    elif avg_score >= 70 and higher_risk_count == 0:
        color_signal = "🟢"
        color_label = "CLEAN"
    elif avg_score >= 50 and higher_risk_count <= 1:
        color_signal = "🟡"
        color_label = "MODERATE CONCERN"
    else:
        color_signal = "🔴"
        color_label = "HIGHER RISK"

    # One-line verdict in plain language
    plain_verdicts = {
        "CLEAN FORMULATION": "Well-formulated product with strong safety evidence.",
        "MIXED FORMULATION": "Generally acceptable, some ingredients warrant attention.",
        "FORMULATION CONCERNS": "Contains ingredients with documented safety concerns."
    }
    plain_verdict = plain_verdicts.get(product_verdict, "Evaluation complete.")

    # Top 2-3 flags in plain language (prioritise HIGH interactions + flagged ingredients)
    plain_flags = []

    # HIGH interactions first
    high_interactions = [i for i in interactions if i.get("severity") == "HIGH"]
    for interaction in high_interactions[:2]:
        ingredients_involved = " + ".join(interaction.get("matched_ingredients", []))
        plain_flags.append(f"⚠ {ingredients_involved}: {interaction['description'][:80]}")

    # Then flagged ingredients
    if len(plain_flags) < 3:
        flagged_ingredients = [r for r in ingredients if r.get("flag") and r.get("verdict") in ["HIGHER RISK", "LIMITED SUPPORT"]]
        flagged_sorted = sorted(flagged_ingredients, key=lambda x: x.get("health_score") or 100)
        for r in flagged_sorted:
            if len(plain_flags) >= 3:
                break
            first_flag = r["flag"].split("|")[0].strip()[:80]
            plain_flags.append(f"⚠ {r['ingredient'].title()}: {first_flag}")

    # Best alternative signal (only if score < 70)
    alternative_signal = None
    if avg_score and avg_score < 70:
        worst = sorted([r for r in ingredients if r.get("health_score")],
                      key=lambda x: x["health_score"])[0]
        if worst["verdict"] == "HIGHER RISK":
            alternative_signal = f"Cleaner alternatives available — ask for options without {worst['ingredient'].title()}."

    return {
        "product_name": product_name,
        "score": avg_score,
        "color_signal": color_signal,
        "color_label": color_label,
        "plain_verdict": plain_verdict,
        "flags": plain_flags,
        "alternative_signal": alternative_signal,
        "higher_risk_count": higher_risk_count
    }


def display_extract(extract):
    """Print the public extract in consumer-friendly format."""
    print("=" * 55)
    print(f"  NOURA PRODUCT EXTRACT")
    print("=" * 55)
    print(f"  {extract['product_name']}")
    print()
    print(f"  {extract['color_signal']}  {extract['score']}/100  —  {extract['color_label']}")
    print()
    print(f"  {extract['plain_verdict']}")
    print()
    if extract['flags']:
        for flag in extract['flags']:
            print(f"  {flag[:75]}")
        print()
    if extract['alternative_signal']:
        print(f"  💡 {extract['alternative_signal']}")
        print()
    print("  [ Extended Analysis available on request ]")
    print("=" * 55)


# ================================================================
# NOURA API v2.0
# Single entry point. One parameter controls output depth.
# depth="extract"  → public-facing consumer layer (fast)
# depth="full"     → complete enterprise evaluation (comprehensive)
# ================================================================

def noura_api_v2(request_type, depth="extract", **kwargs):
    """
    NOURA API v2.0 — one endpoint, one depth parameter.

    depth="extract"  : Public Extract — score, color, plain flags,
                       alternative signal. Designed for product pages,
                       spa menus, chatbots, shelf tags.

    depth="full"     : Full Report — complete ingredient breakdown,
                       interactions, regulatory status, study citations,
                       confidence scores, reformulation notes.
                       Designed for compliance teams, R&D, brand audits.

    Request types:
    - "product"     : scan a full product label
    - "ingredient"  : evaluate a single ingredient
    - "batch"       : evaluate multiple ingredients
    - "compare"     : compare two ingredients
    - "interaction" : check formula interactions
    """

    timestamp = datetime.now().isoformat()

    # ================================================================
    # PRODUCT SCAN — primary enterprise endpoint
    # ================================================================
    if request_type == "product":
        product_name = kwargs.get("product_name", "Unknown Product")
        raw_label = kwargs.get("raw_label", "")
        max_ingredients = kwargs.get("max_ingredients", 15)
        use_claude = kwargs.get("use_claude", True)

        if not raw_label:
            return {"error": "raw_label required", "status": 400}

        # Always run the full engine internally
        ingredients_parsed = parse_ingredient_list(raw_label)
        ingredients_parsed = [i for i in ingredients_parsed
                              if i not in ["water", "aqua"]]
        ingredients_to_evaluate = ingredients_parsed[:max_ingredients]

        results = []
        kb_hits = 0
        live_hits = 0

        for ingredient in ingredients_to_evaluate:
            kb_entry = kb_lookup(ingredient)

            if kb_entry:
                kb_hits += 1
                results.append({
                    "ingredient": ingredient,
                    "inci_name": kb_entry["inci_name"],
                    "health_score": kb_entry["health_score"],
                    "verdict": kb_entry["verdict"],
                    "confidence_score": kb_entry["confidence_score"],
                    "confidence_label": kb_entry["confidence_label"],
                    "evidence_source": "NOURA Knowledge Base",
                    "studies_reviewed": kb_entry["studies_reviewed"],
                    "evidence_summary": kb_entry.get("evidence_summary", ""),
                    "key_evidence": kb_entry.get("key_evidence", []),
                    "flag": (" | ".join(kb_entry["concern_flags"]))
                             if kb_entry["concern_flags"] else None,
                    "safety_notes": kb_entry.get("safety_notes", ""),
                    "regulatory_status": kb_entry.get("regulatory_status", {}),
                    "concern_flags": kb_entry["concern_flags"]
                })
            else:
                live_hits += 1
                pubmed_results = search_pubmed_normalized(
                    ingredient, max_results=30
                )
                evaluated = []
                direction_count = {"SAFETY": 0, "CONCERN": 0, "NEUTRAL": 0}

                for study in pubmed_results.get("studies", []):
                    abstract = study.get("abstract", "")
                    source_type = classify_evidence_type(
                        study["title"], abstract
                    )
                    direction = classify_evidence_direction_smart(
                        study["title"], abstract, ingredient,
                        use_claude=use_claude
                    )
                    ev = evaluate_evidence(source_type)
                    ev["study_title"] = study["title"][:80]
                    ev["year"] = study["year"]
                    ev["direction"] = direction
                    ev["sample_size"] = study.get("sample_size", None)
                    ev["abstract"] = abstract[:300]
                    ev["pubmed_url"] = study.get("pubmed_url", "")
                    evaluated.append(ev)
                    direction_count[direction] += 1

                score_result = calculate_direction_aware_score(evaluated)
                conf = calculate_confidence(
                    evaluated, direction_count,
                    pubmed_results["studies_found"]
                )

                results.append({
                    "ingredient": ingredient,
                    "inci_name": ingredient.title(),
                    "health_score": score_result["score"],
                    "verdict": score_result["verdict"],
                    "confidence_score": conf["confidence_score"],
                    "confidence_label": conf["confidence_label"],
                    "evidence_source": "PubMed Live Search",
                    "studies_reviewed": pubmed_results["studies_found"],
                    "evidence_summary": "",
                    "key_evidence": [
                        {
                            "title": e["study_title"],
                            "year": e["year"],
                            "direction": e["direction"],
                            "evidence_type": e["source_type"],
                            "url": e["pubmed_url"]
                        }
                        for e in evaluated[:3]
                    ],
                    "flag": score_result["flag"],
                    "safety_notes": score_result["flag"] or "",
                    "regulatory_status": {},
                    "concern_flags": [score_result["flag"]]
                                     if score_result["flag"] else []
                })
                time.sleep(1)

        # Sort by score
        results_sorted = sorted(
            results, key=lambda x: (x["health_score"] or 0), reverse=True
        )

        # Calculate product-level metrics
        scored = [r for r in results if r["health_score"] is not None]
        avg_score = round(
            sum(r["health_score"] for r in scored) / len(scored), 1
        ) if scored else None
        higher_risk_count = len(
            [r for r in results if r["verdict"] == "HIGHER RISK"]
        )

        if higher_risk_count == 0 and avg_score and avg_score >= 70:
            product_verdict = "CLEAN FORMULATION"
        elif higher_risk_count >= 2 or (avg_score and avg_score < 40):
            product_verdict = "FORMULATION CONCERNS"
        else:
            product_verdict = "MIXED FORMULATION"

        # Interactions
        all_names = [r["ingredient"] for r in results]
        raw_interactions = check_formula_interactions(all_names)
        seen = set()
        unique_interactions = []
        for i in raw_interactions:
            key = i["description"][:60]
            if key not in seen:
                seen.add(key)
                unique_interactions.append({
                    "severity": i["severity"],
                    "type": i["type"],
                    "matched_ingredients": i["matched_ingredients"],
                    "description": i["description"],
                    "recommendation": i["recommendation"]
                })

        # Build the Public Extract — always generated
        extract = format_public_extract(
            product_name, avg_score, higher_risk_count,
            product_verdict, results_sorted, unique_interactions
        )

        # ── EXTRACT response ──────────────────────────────────────
        if depth == "extract":
            return {
                "status": 200,
                "api_version": "2.0",
                "depth": "extract",
                "request_type": "product",
                "timestamp": timestamp,
                "extract": extract
            }

        # ── FULL response ─────────────────────────────────────────
        else:
            return {
                "status": 200,
                "api_version": "2.0",
                "depth": "full",
                "request_type": "product",
                "timestamp": timestamp,

                # Public extract always included in full response
                "extract": extract,

                # Full enterprise data
                "product_name": product_name,
                "ingredients_detected": len(ingredients_parsed),
                "ingredients_evaluated": len(results),
                "average_score": avg_score,
                "higher_risk_count": higher_risk_count,
                "product_verdict": product_verdict,
                "kb_lookups": kb_hits,
                "live_searches": live_hits,

                # Full ingredient breakdown
                "ingredients": results_sorted,

                # Interaction analysis
                "interactions": unique_interactions,
                "interaction_summary": {
                    "total": len(unique_interactions),
                    "high": len([i for i in unique_interactions
                                 if i["severity"] == "HIGH"]),
                    "moderate": len([i for i in unique_interactions
                                    if i["severity"] == "MODERATE"]),
                    "low": len([i for i in unique_interactions
                               if i["severity"] == "LOW"])
                },

                # Reformulation notes (for brands)
                "reformulation_notes": [
                    {
                        "ingredient": r["ingredient"],
                        "issue": r["flag"],
                        "suggestion": f"Consider replacing {r['ingredient']} — "
                                      f"scored {r['health_score']}/100 "
                                      f"({r['verdict']})"
                    }
                    for r in results_sorted
                    if r.get("flag") and r["verdict"] in
                    ["HIGHER RISK", "LIMITED SUPPORT"]
                ]
            }

    # ================================================================
    # INGREDIENT — single ingredient, depth-aware
    # ================================================================
    elif request_type == "ingredient":
        ingredient = kwargs.get("ingredient", "")
        use_claude = kwargs.get("use_claude", True)

        if not ingredient:
            return {"error": "ingredient required", "status": 400}

        kb_entry = kb_lookup(ingredient)

        if kb_entry:
            full_data = {
                "status": 200,
                "api_version": "2.0",
                "depth": depth,
                "request_type": "ingredient",
                "timestamp": timestamp,
                "ingredient": ingredient,
                "inci_name": kb_entry["inci_name"],
                "health_score": kb_entry["health_score"],
                "verdict": kb_entry["verdict"],
                "confidence_score": kb_entry["confidence_score"],
                "confidence_label": kb_entry["confidence_label"],
                "evidence_source": "NOURA Knowledge Base",
                "studies_reviewed": kb_entry["studies_reviewed"],
                "evidence_summary": kb_entry.get("evidence_summary", ""),
                "key_evidence": kb_entry.get("key_evidence", []),
                "concern_flags": kb_entry["concern_flags"],
                "safety_notes": kb_entry.get("safety_notes", ""),
                "regulatory_status": kb_entry.get("regulatory_status", {})
            }
        else:
            pubmed_results = search_pubmed_normalized(
                ingredient, max_results=50
            )
            evaluated = []
            direction_count = {"SAFETY": 0, "CONCERN": 0, "NEUTRAL": 0}

            for study in pubmed_results.get("studies", []):
                abstract = study.get("abstract", "")
                source_type = classify_evidence_type(
                    study["title"], abstract
                )
                direction = classify_evidence_direction_smart(
                    study["title"], abstract, ingredient,
                    use_claude=use_claude
                )
                ev = evaluate_evidence(source_type)
                ev["study_title"] = study["title"][:80]
                ev["year"] = study["year"]
                ev["direction"] = direction
                ev["sample_size"] = study.get("sample_size", None)
                ev["abstract"] = abstract[:300]
                ev["pubmed_url"] = study.get("pubmed_url", "")
                evaluated.append(ev)
                direction_count[direction] += 1

            score_result = calculate_direction_aware_score(evaluated)
            conf = calculate_confidence(
                evaluated, direction_count,
                pubmed_results["studies_found"]
            )

            full_data = {
                "status": 200,
                "api_version": "2.0",
                "depth": depth,
                "request_type": "ingredient",
                "timestamp": timestamp,
                "ingredient": ingredient,
                "inci_name": ingredient.title(),
                "health_score": score_result["score"],
                "verdict": score_result["verdict"],
                "confidence_score": conf["confidence_score"],
                "confidence_label": conf["confidence_label"],
                "evidence_source": "PubMed Live Search",
                "studies_reviewed": pubmed_results["studies_found"],
                "evidence_summary": "",
                "key_evidence": [
                    {
                        "title": e["study_title"],
                        "year": e["year"],
                        "direction": e["direction"],
                        "url": e["pubmed_url"]
                    }
                    for e in evaluated[:5]
                ],
                "concern_flags": [score_result["flag"]]
                                  if score_result["flag"] else [],
                "safety_notes": score_result["flag"] or "",
                "regulatory_status": {}
            }

        if depth == "extract":
            score = full_data["health_score"]
            flags = full_data["concern_flags"][:2]
            if score is None:
                color_signal, color_label = "⚪", "INSUFFICIENT DATA"
            elif score >= 70:
                color_signal, color_label = "🟢", "WELL SUPPORTED"
            elif score >= 45:
                color_signal, color_label = "🟡", "USE WITH CAUTION"
            else:
                color_signal, color_label = "🔴", "HIGHER RISK"

            return {
                "status": 200,
                "api_version": "2.0",
                "depth": "extract",
                "request_type": "ingredient",
                "timestamp": timestamp,
                "extract": {
                    "ingredient": ingredient,
                    "score": score,
                    "color_signal": color_signal,
                    "color_label": color_label,
                    "plain_flags": [f[:80] for f in flags],
                    "studies_reviewed": full_data["studies_reviewed"]
                }
            }
        else:
            return full_data

    # ================================================================
    # COMPARE — depth-aware
    # ================================================================
    elif request_type == "compare":
        ingredient_a = kwargs.get("ingredient_a", "")
        ingredient_b = kwargs.get("ingredient_b", "")

        result_a = noura_api_v2("ingredient", depth="full",
                                ingredient=ingredient_a, use_claude=False)
        time.sleep(1)
        result_b = noura_api_v2("ingredient", depth="full",
                                ingredient=ingredient_b, use_claude=False)

        winner = ingredient_a if (result_a.get("health_score") or 0) >= \
                                 (result_b.get("health_score") or 0) \
                 else ingredient_b

        compare_data = {
            "status": 200,
            "api_version": "2.0",
            "depth": depth,
            "request_type": "compare",
            "timestamp": timestamp,
            "safer_ingredient": winner,
            "ingredient_a": {
                "name": ingredient_a,
                "score": result_a.get("health_score"),
                "verdict": result_a.get("verdict"),
                "confidence_label": result_a.get("confidence_label"),
                "top_flag": result_a.get("concern_flags", [None])[0]
            },
            "ingredient_b": {
                "name": ingredient_b,
                "score": result_b.get("health_score"),
                "verdict": result_b.get("verdict"),
                "confidence_label": result_b.get("confidence_label"),
                "top_flag": result_b.get("concern_flags", [None])[0]
            }
        }

        if depth == "full":
            compare_data["ingredient_a"]["full_evaluation"] = result_a
            compare_data["ingredient_b"]["full_evaluation"] = result_b

        return compare_data

    # ================================================================
    # INTERACTION — depth-aware
    # ================================================================
    elif request_type == "interaction":
        ingredients = kwargs.get("ingredients", [])

        if len(ingredients) < 2:
            return {"error": "at least 2 ingredients required", "status": 400}

        interactions = check_formula_interactions(ingredients)
        seen = set()
        unique = []
        for i in interactions:
            key = i["description"][:60]
            if key not in seen:
                seen.add(key)
                unique.append(i)

        high = [i for i in unique if i["severity"] == "HIGH"]

        base = {
            "status": 200,
            "api_version": "2.0",
            "depth": depth,
            "request_type": "interaction",
            "timestamp": timestamp,
            "interactions_found": len(unique),
            "high_severity_count": len(high)
        }

        if depth == "extract":
            base["critical_flags"] = [
                {
                    "severity": i["severity"],
                    "ingredients": i["matched_ingredients"],
                    "description": i["description"][:80]
                }
                for i in unique if i["severity"] == "HIGH"
            ]
        else:
            base["interactions"] = [
                {
                    "severity": i["severity"],
                    "type": i["type"],
                    "ingredients": i["matched_ingredients"],
                    "description": i["description"],
                    "recommendation": i["recommendation"]
                }
                for i in unique
            ]

        return base

    else:
        return {"error": f"Unknown request_type: {request_type}", "status": 400}


# ================================================================
# TEST SUITE — v2.0
# ================================================================
print("=" * 55)
print("  NOURA API v2.0 TEST SUITE")
print("=" * 55)

# ── TEST 1: Product — EXTRACT (consumer-facing) ──────────────
print("\nTEST 1A: Product — depth='extract' (consumer-facing)")
result = noura_api_v2("product",
    depth="extract",
    product_name="Clean Beauty Serum",
    raw_label="Aqua, Glycerin, Niacinamide, Sodium Hyaluronate, "
              "Ascorbic Acid, Tocopherol, Xanthan Gum, "
              "Citric Acid, Sodium Benzoate",
    max_ingredients=9,
    use_claude=False
)
display_extract(result["extract"])
print()

# ── TEST 1B: Product — FULL (enterprise back-office) ─────────
print("\nTEST 1B: Product — depth='full' (enterprise back-office)")
result_full = noura_api_v2("product",
    depth="full",
    product_name="Clean Beauty Serum",
    raw_label="Aqua, Glycerin, Niacinamide, Sodium Hyaluronate, "
              "Ascorbic Acid, Tocopherol, Xanthan Gum, "
              "Citric Acid, Sodium Benzoate",
    max_ingredients=9,
    use_claude=False
)
print(json.dumps({
    "product_verdict": result_full["product_verdict"],
    "average_score": result_full["average_score"],
    "higher_risk_count": result_full["higher_risk_count"],
    "ingredients_evaluated": result_full["ingredients_evaluated"],
    "interaction_summary": result_full["interaction_summary"],
    "reformulation_notes_count": len(result_full["reformulation_notes"]),
    "extract_included": "extract" in result_full
}, indent=2))
print()

# ── TEST 2: Ingredient — EXTRACT ─────────────────────────────
print("TEST 2A: Ingredient — depth='extract'")
r = noura_api_v2("ingredient", depth="extract", ingredient="parabens")
print(json.dumps(r, indent=2))
print()

# ── TEST 3: Interaction — EXTRACT ────────────────────────────
print("TEST 3A: Interaction — depth='extract'")
r = noura_api_v2("interaction", depth="extract",
    ingredients=["sodium benzoate", "ascorbic acid", "retinol",
                 "glycolic acid"])
print(json.dumps(r, indent=2))
print()

# ── TEST 4: Compare — EXTRACT ────────────────────────────────
print("TEST 4A: Compare — depth='extract'")
r = noura_api_v2("compare", depth="extract",
    ingredient_a="parabens", ingredient_b="phenoxyethanol")
print(json.dumps(r, indent=2))

  NOURA API v2.0 TEST SUITE

TEST 1A: Product — depth='extract' (consumer-facing)
  NOURA PRODUCT EXTRACT
  Clean Beauty Serum

  🟢  79.5/100  —  CLEAN

  Well-formulated product with strong safety evidence.

  ⚠ sodium benzoate + ascorbic acid: Sodium benzoate reacts with ascorbic aci
  ⚠ Sodium Benzoate: Reacts with ascorbic acid (vitamin C) to form benzene — 

  [ Extended Analysis available on request ]


TEST 1B: Product — depth='full' (enterprise back-office)
{
  "product_verdict": "CLEAN FORMULATION",
  "average_score": 79.5,
  "higher_risk_count": 0,
  "ingredients_evaluated": 8,
  "interaction_summary": {
    "total": 3,
    "high": 1,
    "moderate": 0,
    "low": 2
  },
  "reformulation_notes_count": 1,
  "extract_included": true
}

TEST 2A: Ingredient — depth='extract'
{
  "status": 200,
  "api_version": "2.0",
  "depth": "extract",
  "request_type": "ingredient",
  "timestamp": "2026-02-27T12:51:57.883838",
  "extract": {
    "ingredient": "parabens",
    "score": 18,
    

IndexError: list index out of range

In [None]:
# NOURA - Cell 21: Fix compare endpoint + final test

def safe_first_flag(flags):
    """Safely get first flag or None if list is empty."""
    return flags[0] if flags else None

# Patch the compare section inline — just re-run compare with the fix
def noura_compare_v2(ingredient_a, ingredient_b, depth="extract"):
    timestamp = datetime.now().isoformat()

    result_a = noura_api_v2("ingredient", depth="full",
                            ingredient=ingredient_a, use_claude=False)
    time.sleep(1)
    result_b = noura_api_v2("ingredient", depth="full",
                            ingredient=ingredient_b, use_claude=False)

    winner = ingredient_a if (result_a.get("health_score") or 0) >= \
                             (result_b.get("health_score") or 0) \
             else ingredient_b

    compare_data = {
        "status": 200,
        "api_version": "2.0",
        "depth": depth,
        "request_type": "compare",
        "timestamp": timestamp,
        "safer_ingredient": winner,
        "ingredient_a": {
            "name": ingredient_a,
            "score": result_a.get("health_score"),
            "verdict": result_a.get("verdict"),
            "confidence_label": result_a.get("confidence_label"),
            "top_flag": safe_first_flag(result_a.get("concern_flags", []))
        },
        "ingredient_b": {
            "name": ingredient_b,
            "score": result_b.get("health_score"),
            "verdict": result_b.get("verdict"),
            "confidence_label": result_b.get("confidence_label"),
            "top_flag": safe_first_flag(result_b.get("concern_flags", []))
        }
    }

    if depth == "full":
        compare_data["ingredient_a"]["full_evaluation"] = result_a
        compare_data["ingredient_b"]["full_evaluation"] = result_b

    return compare_data


# ── TEST 4: Compare — both depths ────────────────────────────
print("TEST 4A: Compare — depth='extract'")
r = noura_compare_v2("parabens", "phenoxyethanol", depth="extract")
print(json.dumps(r, indent=2))
print()

print("TEST 4B: Compare — depth='full'")
r = noura_compare_v2("parabens", "phenoxyethanol", depth="full")
print(json.dumps({
    "safer_ingredient": r["safer_ingredient"],
    "ingredient_a": {
        "name": r["ingredient_a"]["name"],
        "score": r["ingredient_a"]["score"],
        "verdict": r["ingredient_a"]["verdict"],
        "top_flag": r["ingredient_a"]["top_flag"]
    },
    "ingredient_b": {
        "name": r["ingredient_b"]["name"],
        "score": r["ingredient_b"]["score"],
        "verdict": r["ingredient_b"]["verdict"],
        "top_flag": r["ingredient_b"]["top_flag"]
    },
    "full_evaluations_included": "full_evaluation" in r["ingredient_a"]
}, indent=2))

print()
print("=" * 55)
print("  NOURA API v2.0 — ALL TESTS COMPLETE")
print("  One API. One endpoint. depth='extract' or depth='full'")
print("=" * 55)

TEST 4A: Compare — depth='extract'
{
  "status": 200,
  "api_version": "2.0",
  "depth": "extract",
  "request_type": "compare",
  "timestamp": "2026-02-27T13:24:49.508330",
  "safer_ingredient": "phenoxyethanol",
  "ingredient_a": {
    "name": "parabens",
    "score": 18,
    "verdict": "HIGHER RISK",
    "confidence_label": "VERY HIGH",
    "top_flag": "Endocrine disruption \u2014 estrogenic activity confirmed"
  },
  "ingredient_b": {
    "name": "phenoxyethanol",
    "score": 52.2,
    "verdict": "LIMITED SUPPORT",
    "confidence_label": "HIGH",
    "top_flag": null
  }
}

TEST 4B: Compare — depth='full'
{
  "safer_ingredient": "phenoxyethanol",
  "ingredient_a": {
    "name": "parabens",
    "score": 18,
    "verdict": "HIGHER RISK",
    "top_flag": "Endocrine disruption \u2014 estrogenic activity confirmed"
  },
  "ingredient_b": {
    "name": "phenoxyethanol",
    "score": 52.2,
    "verdict": "LIMITED SUPPORT",
    "top_flag": null
  },
  "full_evaluations_included": true
}



In [None]:
%%writefile /content/noura_pdf_v3.py
"""
NOURA Enterprise PDF Report Generator v3
- Real logo embedded (canvas drawImage)
- Compact spacing — Weleda target 2pp, Reformulation 3pp
"""

from reportlab.lib.pagesizes import A4
from reportlab.lib import colors
from reportlab.lib.units import mm, cm
from reportlab.lib.styles import ParagraphStyle
from reportlab.platypus import (
    SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle,
    HRFlowable, PageBreak, KeepTogether
)
from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_RIGHT
from reportlab.lib.utils import ImageReader
from datetime import datetime
import os

# ── Brand colours ──────────────────────────────────────────────
NAVY       = colors.HexColor("#1E3A5F")
BLUE       = colors.HexColor("#2E86AB")
GREEN_FG   = colors.HexColor("#1A6B3A")
GREEN_BG   = colors.HexColor("#E8F5E9")
GREEN_BD   = colors.HexColor("#A5D6A7")
AMBER_FG   = colors.HexColor("#7A5C00")
AMBER_BG   = colors.HexColor("#FFF8E1")
AMBER_BD   = colors.HexColor("#FFD54F")
RED_FG     = colors.HexColor("#8B0000")
RED_BG     = colors.HexColor("#FFEBEE")
RED_BD     = colors.HexColor("#EF9A9A")
GRAY_TEXT  = colors.HexColor("#666666")
GRAY_LIGHT = colors.HexColor("#F7F7F7")
GRAY_BD    = colors.HexColor("#DDDDDD")
WHITE      = colors.white
BLACK      = colors.HexColor("#1A1A1A")

PAGE_W, PAGE_H = A4
MARGIN    = 1.8 * cm
CONTENT_W = PAGE_W - 2 * MARGIN

LOGO_PATH = "/content/noura-rag/assets/noura_logo.png"

# ── Helpers ────────────────────────────────────────────────────
def verdict_colours(score, verdict):
    v = (verdict or "").upper()
    if "HIGHER" in v or "CONCERNS" in v or (score and score < 50):
        return RED_FG, RED_BG, RED_BD, "HIGHER RISK"
    elif "WELL" in v or "CLEAN" in v or (score and score >= 70):
        return GREEN_FG, GREEN_BG, GREEN_BD, "CLEAN"
    else:
        return AMBER_FG, AMBER_BG, AMBER_BD, "MODERATE"

VERDICT_TEXT = {
    "CLEAN FORMULATION":    "Well-formulated. Strong safety evidence across all evaluated ingredients.",
    "MIXED FORMULATION":    "Generally acceptable. Some ingredients warrant attention.",
    "FORMULATION CONCERNS": "Contains ingredients with documented safety concerns. Review recommended.",
}

def hr(color=GRAY_BD, thick=0.5, before=3, after=5):
    return HRFlowable(width="100%", thickness=thick,
                      color=color, spaceBefore=before, spaceAfter=after)

def P(text, name="p", **kw):
    d = dict(fontName="Helvetica", fontSize=9, textColor=BLACK,
             leading=13, spaceAfter=2)
    d.update(kw)
    return Paragraph(text, ParagraphStyle(name, **d))

def section_head(num, title):
    return [
        Paragraph(f"{num}.  {title}",
            ParagraphStyle("h1", fontName="Helvetica-Bold", fontSize=13,
                           textColor=NAVY, spaceBefore=8, spaceAfter=3,
                           leading=16)),
        hr(BLUE, thick=1.0, before=0, after=5),
    ]


# ── Header / Footer ────────────────────────────────────────────
class HF:
    def __init__(self, product_name, report_date):
        self.name = product_name
        self.date = report_date
        self._logo = ImageReader(LOGO_PATH) if os.path.exists(LOGO_PATH) else None

    def __call__(self, canv, doc):
        canv.saveState()
        if doc.page == 1:
            if self._logo:
                lw_pt, lh_pt = 72*mm, 14*mm   # compact logo
                lx = (PAGE_W - lw_pt) / 2
                ly = PAGE_H - 2.2*cm - lh_pt   # sits just below top margin
                canv.drawImage(self._logo, lx, ly, lw_pt, lh_pt,
                               mask="auto", preserveAspectRatio=True)
            canv.setFont("Helvetica", 9.5)
            canv.setFillColor(BLUE)
            canv.drawCentredString(PAGE_W/2, PAGE_H - 2.2*cm - 14*mm - 0.55*cm,
                                   "Evidence-Based Product Intelligence")
        else:
            canv.setStrokeColor(BLUE)
            canv.setLineWidth(0.8)
            canv.line(MARGIN, PAGE_H-1.2*cm, PAGE_W-MARGIN, PAGE_H-1.2*cm)
            if self._logo:
                canv.drawImage(self._logo, MARGIN, PAGE_H-1.05*cm,
                               22*mm, 4.2*mm, mask="auto",
                               preserveAspectRatio=True)
            canv.setFont("Helvetica", 7)
            canv.setFillColor(GRAY_TEXT)
            canv.drawString(MARGIN+24*mm, PAGE_H-0.95*cm,
                            f"Product Evaluation  |  {self.name}")
            canv.drawRightString(PAGE_W-MARGIN, PAGE_H-0.95*cm,
                                 f"Confidential  |  {self.date}")

        canv.setStrokeColor(GRAY_BD)
        canv.setLineWidth(0.3)
        canv.line(MARGIN, 1.2*cm, PAGE_W-MARGIN, 1.2*cm)
        canv.setFont("Helvetica", 7)
        canv.setFillColor(GRAY_TEXT)
        canv.drawString(MARGIN, 0.75*cm, "NOURA AI  —  Proprietary & Confidential")
        canv.drawRightString(PAGE_W-MARGIN, 0.75*cm, f"Page {doc.page}")
        canv.restoreState()


# ── COVER PAGE ─────────────────────────────────────────────────
def build_cover(product_name, avg_score, product_verdict,
                higher_risk, report_date, client_name=""):

    fg, bg, bd, label = verdict_colours(avg_score, product_verdict)
    story = []
    story.append(Spacer(1, 2.8*cm))   # space for canvas logo + tagline
    story.append(hr(BLUE, thick=1.2, before=2, after=8))

    story.append(P("PRODUCT EVALUATION REPORT", "label",
                   fontName="Helvetica-Bold", fontSize=9,
                   textColor=NAVY, alignment=TA_CENTER, spaceAfter=3))
    story.append(P(product_name, "pname",
                   fontName="Helvetica-Bold", fontSize=16,
                   textColor=BLACK, alignment=TA_CENTER,
                   leading=20, spaceAfter=10))

    score_str   = f"{avg_score}/100" if avg_score is not None else "N/A"
    verdict_sub = VERDICT_TEXT.get(product_verdict, product_verdict or "")

    score_box = Table([[
        P(score_str, "sc",
          fontName="Helvetica-Bold", fontSize=28,
          textColor=fg, alignment=TA_CENTER, leading=32),
        [P(label, "vl",
           fontName="Helvetica-Bold", fontSize=14,
           textColor=fg, alignment=TA_CENTER, spaceAfter=2),
         P(verdict_sub, "vs",
           fontSize=8, textColor=fg,
           alignment=TA_CENTER, leading=11)],
    ]], colWidths=[CONTENT_W*0.34, CONTENT_W*0.66], rowHeights=[60])
    score_box.setStyle(TableStyle([
        ("BACKGROUND",    (0,0), (-1,-1), bg),
        ("BOX",           (0,0), (-1,-1), 1.2, bd),
        ("LINEAFTER",     (0,0), (0,-1), 0.5, bd),
        ("VALIGN",        (0,0), (-1,-1), "MIDDLE"),
        ("TOPPADDING",    (0,0), (-1,-1), 8),
        ("BOTTOMPADDING", (0,0), (-1,-1), 8),
        ("LEFTPADDING",   (0,0), (-1,-1), 12),
        ("RIGHTPADDING",  (0,0), (-1,-1), 12),
    ]))
    story.append(score_box)
    story.append(Spacer(1, 0.6*cm))
    story.append(hr(GRAY_BD, thick=0.3, before=0, after=4))

    rows = []
    if client_name:
        rows.append(["Prepared For", client_name])
    rows += [
        ["Report Date",           report_date],
        ["Evaluation Standard",   "NOURA Evidence Hierarchy v2.0 (9-tier)"],
        ["Higher Risk Ingredients", str(higher_risk)],
    ]
    mt = Table(rows, colWidths=[CONTENT_W*0.36, CONTENT_W*0.64])
    mt.setStyle(TableStyle([
        ("FONTNAME",      (0,0), (0,-1), "Helvetica-Bold"),
        ("FONTSIZE",      (0,0), (-1,-1), 9),
        ("TEXTCOLOR",     (0,0), (0,-1), NAVY),
        ("TOPPADDING",    (0,0), (-1,-1), 4),
        ("BOTTOMPADDING", (0,0), (-1,-1), 4),
        ("LINEBELOW",     (0,0), (-1,-2), 0.3, GRAY_BD),
    ]))
    story.append(mt)
    story.append(Spacer(1, 0.5*cm))
    story.append(P(
        "Generated by NOURA AI using live PubMed evidence retrieval, a curated 9-tier "
        "evidence hierarchy, and automated formula interaction detection. "
        "For internal use only. Not for public distribution without authorisation.",
        "disc", fontSize=7.5, textColor=GRAY_TEXT, alignment=TA_CENTER, leading=11))
    story.append(PageBreak())
    return story


# ── SECTION 1: Public Extract ───────────────────────────────────
def build_extract(extract):
    story = section_head("1", "Public Extract")
    story.append(P(
        "Consumer-facing layer derived automatically from the full evaluation. "
        "Use on product pages, spa menus, shelf tags, and chatbot responses.",
        "d1", textColor=GRAY_TEXT, spaceAfter=5))

    score = extract.get("score")
    label = extract.get("color_label", "")
    fg, bg, bd, _ = verdict_colours(score, label)

    et = Table([
        [P("SCORE",  "eh", fontName="Helvetica-Bold", fontSize=8, textColor=WHITE, alignment=TA_CENTER),
         P("SIGNAL", "eh", fontName="Helvetica-Bold", fontSize=8, textColor=WHITE, alignment=TA_CENTER),
         P("VERDICT","eh", fontName="Helvetica-Bold", fontSize=8, textColor=WHITE)],
        [P(f"{score}/100" if score else "N/A", "es",
           fontName="Helvetica-Bold", fontSize=16, textColor=fg, alignment=TA_CENTER),
         P(label, "el",
           fontName="Helvetica-Bold", fontSize=11, textColor=fg, alignment=TA_CENTER),
         P(extract.get("plain_verdict",""), "ev", fontSize=9, textColor=BLACK, leading=13)],
    ], colWidths=[CONTENT_W*0.17, CONTENT_W*0.20, CONTENT_W*0.63],
       rowHeights=[16, 40])
    et.setStyle(TableStyle([
        ("BACKGROUND",    (0,0), (-1,0), NAVY),
        ("BACKGROUND",    (0,1), (-1,1), bg),
        ("BOX",           (0,0), (-1,-1), 0.8, bd),
        ("INNERGRID",     (0,0), (-1,-1), 0.3, GRAY_BD),
        ("VALIGN",        (0,0), (-1,-1), "MIDDLE"),
        ("TOPPADDING",    (0,0), (-1,-1), 4),
        ("BOTTOMPADDING", (0,0), (-1,-1), 4),
        ("LEFTPADDING",   (0,0), (-1,-1), 7),
        ("RIGHTPADDING",  (0,0), (-1,-1), 7),
    ]))
    story.append(et)

    flags = extract.get("flags", [])
    if flags:
        story.append(Spacer(1, 3))
        story.append(P("Risk Flags", "fh", fontName="Helvetica-Bold",
                       fontSize=8.5, textColor=RED_FG, spaceAfter=2))
        for f in flags:
            story.append(P(f"  \u2022  {f.replace('⚠ ','').strip()}",
                           "fi", fontSize=8.5, textColor=RED_FG,
                           leading=12, spaceAfter=1))

    alt = extract.get("alternative_signal")
    if alt:
        story.append(Spacer(1, 3))
        at = Table([[P(f"Cleaner alternatives available — {alt}",
                       "alt", fontSize=8.5, textColor=GREEN_FG, leading=12)]],
                   colWidths=[CONTENT_W])
        at.setStyle(TableStyle([
            ("BACKGROUND",    (0,0), (-1,-1), GREEN_BG),
            ("BOX",           (0,0), (-1,-1), 0.6, GREEN_BD),
            ("TOPPADDING",    (0,0), (-1,-1), 5),
            ("BOTTOMPADDING", (0,0), (-1,-1), 5),
            ("LEFTPADDING",   (0,0), (-1,-1), 8),
            ("RIGHTPADDING",  (0,0), (-1,-1), 8),
        ]))
        story.append(at)

    story.append(Spacer(1, 0.1*cm))
    return story


# ── SECTION 2: Ingredients ──────────────────────────────────────
def build_ingredients(ingredients):
    story = section_head("2", "Ingredient Evaluation")
    story.append(P(
        "Each ingredient evaluated against NOURA's 9-tier evidence hierarchy. "
        "Scores are direction-aware weighted with sample-size multipliers.",
        "d2", textColor=GRAY_TEXT, spaceAfter=5))

    CW = [CONTENT_W*0.27, CONTENT_W*0.10,
          CONTENT_W*0.25, CONTENT_W*0.22, CONTENT_W*0.16]

    def hdr(t, align=TA_CENTER):
        return P(t, "th", fontName="Helvetica-Bold", fontSize=8,
                 textColor=WHITE, alignment=align)

    rows = [[hdr("Ingredient", TA_LEFT), hdr("Score"),
             hdr("Verdict"), hdr("Confidence"), hdr("Source")]]

    for r in ingredients:
        score   = r.get("health_score")
        verdict = r.get("verdict", "")
        fg, bg, bd, _ = verdict_colours(score, verdict)
        flag = (r.get("flag") or "").split("|")[0].strip()[:90]

        ingr_cell = [P(r["ingredient"].title(), "in",
                       fontName="Helvetica-Bold", fontSize=9,
                       textColor=RED_FG if flag else BLACK, leading=12)]
        if flag:
            ingr_cell.append(P(flag, "if", fontSize=7.5,
                               textColor=RED_FG, leading=11, spaceAfter=0))

        rows.append([
            ingr_cell,
            P(str(score) if score is not None else "N/A", "sc2",
              fontName="Helvetica-Bold", fontSize=11,
              textColor=fg, alignment=TA_CENTER),
            P(verdict, "vc",
              fontName="Helvetica-Bold", fontSize=7.5,
              textColor=fg, alignment=TA_CENTER),
            P(r.get("confidence_label",""), "cc",
              fontSize=7.5, textColor=GRAY_TEXT, alignment=TA_CENTER),
            P(r.get("evidence_source","")
               .replace("NOURA Knowledge Base","KB")
               .replace("PubMed Live Search","Live"),
              "src", fontSize=7.5, textColor=GRAY_TEXT, alignment=TA_CENTER),
        ])

    t = Table(rows, colWidths=CW, repeatRows=1)
    ts = [
        ("BACKGROUND",    (0,0), (-1,0), NAVY),
        ("VALIGN",        (0,0), (-1,-1), "TOP"),
        ("TOPPADDING",    (0,0), (-1,-1), 5),
        ("BOTTOMPADDING", (0,0), (-1,-1), 5),
        ("LEFTPADDING",   (0,0), (-1,-1), 5),
        ("RIGHTPADDING",  (0,0), (-1,-1), 5),
        ("INNERGRID",     (0,0), (-1,-1), 0.3, GRAY_BD),
        ("BOX",           (0,0), (-1,-1), 0.7, GRAY_BD),
        ("ROWBACKGROUNDS",(0,1), (-1,-1), [WHITE, GRAY_LIGHT]),
    ]
    for i, r in enumerate(ingredients, 1):
        if r.get("verdict") == "HIGHER RISK":
            ts.append(("BACKGROUND", (0,i), (-1,i), RED_BG))
        elif r.get("verdict") == "LIMITED SUPPORT" and r.get("flag"):
            ts.append(("BACKGROUND", (0,i), (-1,i), AMBER_BG))
    t.setStyle(TableStyle(ts))
    story.append(t)
    story.append(Spacer(1, 0.1*cm))
    return story


# ── SECTION 3: Interactions ─────────────────────────────────────
def build_interactions(interactions, summary):
    story = section_head("3", "Formula Interaction Analysis")

    if not interactions:
        story.append(P("\u2713  No known ingredient interactions detected in this formula.",
                       "ok", fontName="Helvetica-Bold", fontSize=9,
                       textColor=GREEN_FG, spaceAfter=4))
        return story

    story.append(P(
        f"Interactions detected: {summary['total']}  "
        f"({summary['high']} HIGH  /  {summary['moderate']} MODERATE  /  {summary['low']} LOW)",
        "is", fontSize=9, spaceAfter=5))

    order = {"HIGH": 0, "MODERATE": 1, "LOW": 2}
    for ix in sorted(interactions, key=lambda x: order.get(x["severity"], 3)):
        sev = ix["severity"]
        fg = RED_FG if sev=="HIGH" else (AMBER_FG if sev=="MODERATE" else GREEN_FG)
        bg = RED_BG if sev=="HIGH" else (AMBER_BG if sev=="MODERATE" else GREEN_BG)
        bd = RED_BD if sev=="HIGH" else (AMBER_BD if sev=="MODERATE" else GREEN_BD)

        ingr_str = "  +  ".join(i.title() for i in ix.get("matched_ingredients",[]))

        block = Table([
            [P(f"{sev}  INTERACTION", "si",
               fontName="Helvetica-Bold", fontSize=8, textColor=fg),
             P(ix.get("type","").upper(), "sit",
               fontSize=7.5, textColor=GRAY_TEXT, alignment=TA_RIGHT)],
            [P(ingr_str, "ii",
               fontName="Helvetica-Bold", fontSize=9.5,
               textColor=BLACK, spaceAfter=2), ""],
            [P(ix["description"], "id",
               fontSize=8.5, textColor=BLACK, leading=12), ""],
            [P(f"Recommendation:  {ix['recommendation']}", "ir",
               fontName="Helvetica-Oblique", fontSize=8.5,
               textColor=fg, leading=12), ""],
        ], colWidths=[CONTENT_W*0.55, CONTENT_W*0.45])

        block.setStyle(TableStyle([
            ("BACKGROUND",    (0,0), (-1,-1), bg),
            ("SPAN",          (0,1), (-1,1)),
            ("SPAN",          (0,2), (-1,2)),
            ("SPAN",          (0,3), (-1,3)),
            ("BOX",           (0,0), (-1,-1), 1.0, bd),
            ("LINEBELOW",     (0,0), (-1,0), 0.4, bd),
            ("TOPPADDING",    (0,0), (-1,-1), 5),
            ("BOTTOMPADDING", (0,0), (-1,-1), 5),
            ("LEFTPADDING",   (0,0), (-1,-1), 8),
            ("RIGHTPADDING",  (0,0), (-1,-1), 8),
            ("VALIGN",        (0,0), (-1,-1), "TOP"),
        ]))
        story.append(KeepTogether([block, Spacer(1, 4)]))

    return story


# ── SECTION 4: Reformulation ────────────────────────────────────
def build_reformulation(notes):
    story = section_head("4", "Reformulation Recommendations")

    if not notes:
        story.append(P("\u2713  No reformulation recommendations required for this formula.",
                       "ok", fontName="Helvetica-Bold", fontSize=9,
                       textColor=GREEN_FG, spaceAfter=4))
        return story

    story.append(P(
        "Ingredients below the WELL SUPPORTED threshold or with documented concerns. "
        "Substitution or concentration review recommended.",
        "d4", textColor=GRAY_TEXT, spaceAfter=5))

    def hdr(t): return P(t, "rh", fontName="Helvetica-Bold",
                          fontSize=8, textColor=WHITE)
    rows = [[hdr("Ingredient"), hdr("Issue"), hdr("Recommendation")]]
    for n in notes:
        rows.append([
            P(n["ingredient"].title(), "rn",
              fontName="Helvetica-Bold", fontSize=8.5, textColor=RED_FG),
            P((n.get("issue") or "")[:130], "ri",
              fontSize=8.5, textColor=BLACK, leading=12),
            P(n.get("suggestion",""), "rs",
              fontName="Helvetica-Oblique", fontSize=8.5,
              textColor=NAVY, leading=12),
        ])

    rt = Table(rows, colWidths=[CONTENT_W*0.19, CONTENT_W*0.38, CONTENT_W*0.43],
               repeatRows=1)
    rt.setStyle(TableStyle([
        ("BACKGROUND",    (0,0), (-1,0), NAVY),
        ("ROWBACKGROUNDS",(0,1), (-1,-1), [RED_BG, AMBER_BG]),
        ("INNERGRID",     (0,0), (-1,-1), 0.3, GRAY_BD),
        ("BOX",           (0,0), (-1,-1), 0.7, RED_BD),
        ("TOPPADDING",    (0,0), (-1,-1), 5),
        ("BOTTOMPADDING", (0,0), (-1,-1), 5),
        ("LEFTPADDING",   (0,0), (-1,-1), 6),
        ("RIGHTPADDING",  (0,0), (-1,-1), 6),
        ("VALIGN",        (0,0), (-1,-1), "TOP"),
    ]))
    story.append(rt)
    story.append(Spacer(1, 0.1*cm))
    return story


# ── SECTION 5: Methodology ──────────────────────────────────────
def build_methodology():
    story = section_head("5", "Methodology")
    story.append(P(
        "NOURA evaluates each ingredient by retrieving up to 50 peer-reviewed studies "
        "from PubMed and classifying each by evidence type (9-tier hierarchy) and "
        "direction (SAFETY / CONCERN / NEUTRAL). Scores use direction-aware weighting "
        "with sample-size multipliers.",
        "d5", textColor=GRAY_TEXT, spaceAfter=6))

    tiers = [
        ["Tier", "Evidence Type", "Weight"],
        ["1", "Systematic Review / Meta-Analysis", "1.00"],
        ["2", "Regulatory Opinion (CIR, SCCS, FDA)", "0.75"],
        ["3", "Randomised Controlled Trial (RCT)", "0.70"],
        ["4", "Cohort / Observational Study", "0.60"],
        ["5", "Case-Control Study", "0.50"],
        ["6", "Case Report / Series", "0.40"],
        ["7", "In Vitro / Lab Study", "0.30"],
        ["8", "Animal Study", "0.25"],
        ["9", "Expert Opinion / Review", "0.20"],
    ]
    tt = Table(tiers,
               colWidths=[CONTENT_W*0.10, CONTENT_W*0.72, CONTENT_W*0.18])
    tt.setStyle(TableStyle([
        ("BACKGROUND",    (0,0), (-1,0), NAVY),
        ("TEXTCOLOR",     (0,0), (-1,0), WHITE),
        ("FONTNAME",      (0,0), (-1,0), "Helvetica-Bold"),
        ("FONTNAME",      (0,1), (-1,-1), "Helvetica"),
        ("FONTSIZE",      (0,0), (-1,-1), 8.5),
        ("ALIGN",         (0,0), (0,-1), "CENTER"),
        ("ALIGN",         (2,0), (2,-1), "CENTER"),
        ("FONTNAME",      (2,1), (2,-1), "Courier"),
        ("TEXTCOLOR",     (2,1), (2,-1), NAVY),
        ("TOPPADDING",    (0,0), (-1,-1), 4),
        ("BOTTOMPADDING", (0,0), (-1,-1), 4),
        ("LEFTPADDING",   (0,0), (-1,-1), 7),
        ("INNERGRID",     (0,0), (-1,-1), 0.3, GRAY_BD),
        ("BOX",           (0,0), (-1,-1), 0.7, GRAY_BD),
        ("ROWBACKGROUNDS",(0,1), (-1,-1), [WHITE, GRAY_LIGHT]),
    ]))
    story.append(tt)
    story.append(Spacer(1, 4))
    story.append(P(
        "Scoring: safety studies → full weight; concern studies → 0.5x penalty; "
        "neutral → 0.5x. Sample-size multipliers: n≥1,000 (×1.5), n≥100 (×1.25), "
        "n<30 (×0.75). Hard override: majority concern signals cap score at 45/100.",
        "mn", fontSize=8, textColor=GRAY_TEXT, leading=12))
    return story


# ── MASTER BUILDER ──────────────────────────────────────────────
def generate_report(api_response, output_path, client_name=""):
    name        = api_response.get("product_name", "Unknown Product")
    avg_score   = api_response.get("average_score", 0)
    verdict     = api_response.get("product_verdict", "")
    higher_risk = api_response.get("higher_risk_count", 0)
    ingredients = api_response.get("ingredients", [])
    interactions= api_response.get("interactions", [])
    int_summary = api_response.get("interaction_summary",
                                   {"total":0,"high":0,"moderate":0,"low":0})
    refo_notes  = api_response.get("reformulation_notes", [])
    extract     = api_response.get("extract", {})
    report_date = datetime.now().strftime("%d %B %Y")

    doc = SimpleDocTemplate(
        output_path, pagesize=A4,
        topMargin=1.5*cm, bottomMargin=1.5*cm,
        leftMargin=MARGIN, rightMargin=MARGIN,
        title=f"NOURA Evaluation — {name}",
        author="NOURA AI",
    )
    hf = HF(name, report_date)

    story  = build_cover(name, avg_score, verdict,
                         higher_risk, report_date, client_name)
    story += build_extract(extract)
    story += build_ingredients(ingredients)
    story += build_interactions(interactions, int_summary)
    story += build_reformulation(refo_notes)
    story.append(PageBreak())
    story += build_methodology()

    doc.build(story, onFirstPage=hf, onLaterPages=hf)
    print(f"  {output_path}")
    return output_path


# ================================================================
# SAMPLE DATA
# ================================================================
weleda_response = {
    "product_name": "Weleda Skin Food Original Ultra-Rich Cream",
    "average_score": 84.2,
    "higher_risk_count": 0,
    "product_verdict": "CLEAN FORMULATION",
    "interaction_summary": {"total":0,"high":0,"moderate":0,"low":0},
    "ingredients": [
        {"ingredient":"glycerin","health_score":85,"verdict":"WELL SUPPORTED",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base","flag":None},
        {"ingredient":"tocopherol","health_score":78,"verdict":"WELL SUPPORTED",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base",
         "flag":"Rare contact sensitisation in susceptible individuals"},
        {"ingredient":"xanthan gum","health_score":84,"verdict":"WELL SUPPORTED",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base","flag":None},
        {"ingredient":"sodium hyaluronate","health_score":88,"verdict":"WELL SUPPORTED",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base","flag":None},
        {"ingredient":"citric acid","health_score":82,"verdict":"WELL SUPPORTED",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base",
         "flag":"Can cause irritation at high concentrations in sensitive skin"},
        {"ingredient":"beeswax","health_score":86,"verdict":"WELL SUPPORTED",
         "confidence_label":"HIGH","evidence_source":"NOURA Knowledge Base","flag":None},
        {"ingredient":"sunflower seed oil","health_score":83,"verdict":"WELL SUPPORTED",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base","flag":None},
        {"ingredient":"rosemary extract","health_score":76,"verdict":"WELL SUPPORTED",
         "confidence_label":"HIGH","evidence_source":"NOURA Knowledge Base",
         "flag":"May cause sensitisation in fragrance-allergic individuals"},
    ],
    "interactions": [],
    "reformulation_notes": [],
    "extract": {
        "score":84.2,"color_label":"CLEAN",
        "plain_verdict":"Well-formulated product with strong safety evidence.",
        "flags":[],"alternative_signal":None,
    }
}

concerning_response = {
    "product_name": "Luxury Renewal Night Cream — Reformulation Candidate",
    "average_score": 58.4,
    "higher_risk_count": 2,
    "product_verdict": "FORMULATION CONCERNS",
    "interaction_summary": {"total":4,"high":2,"moderate":2,"low":0},
    "ingredients": [
        {"ingredient":"glycerin","health_score":85,"verdict":"WELL SUPPORTED",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base","flag":None},
        {"ingredient":"niacinamide","health_score":82,"verdict":"WELL SUPPORTED",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base","flag":None},
        {"ingredient":"retinol","health_score":68,"verdict":"LIMITED SUPPORT",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base",
         "flag":"EU concentration restrictions (0.3% face) | Contraindicated in pregnancy | Photosensitising"},
        {"ingredient":"ascorbic acid","health_score":72,"verdict":"WELL SUPPORTED",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base",
         "flag":"Stability concerns — degrades rapidly if poorly formulated"},
        {"ingredient":"sodium benzoate","health_score":65,"verdict":"LIMITED SUPPORT",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base",
         "flag":"Reacts with ascorbic acid to form benzene — avoid combination"},
        {"ingredient":"fragrance","health_score":28,"verdict":"HIGHER RISK",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base",
         "flag":"Undisclosed mixture — up to 3,000 chemicals | Leading cause of cosmetic contact allergy"},
        {"ingredient":"parabens","health_score":18,"verdict":"HIGHER RISK",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base",
         "flag":"Endocrine disruption confirmed | EU restrictions on butyl/propylparaben"},
    ],
    "interactions": [
        {"severity":"HIGH","type":"chemical reaction",
         "matched_ingredients":["sodium benzoate","ascorbic acid"],
         "description":"Sodium benzoate reacts with ascorbic acid in the presence of light and heat to form benzene, a carcinogen classified by IARC as Group 1.",
         "recommendation":"Remove sodium benzoate. Use phenoxyethanol or ethylhexylglycerin as preservative alternatives."},
        {"severity":"HIGH","type":"cumulative endocrine risk",
         "matched_ingredients":["parabens","fragrance"],
         "description":"Both parabens and common fragrance components are documented endocrine disruptors. Combined exposure significantly increases total endocrine burden.",
         "recommendation":"Replace both. Use phenoxyethanol for preservation and reformulate fragrance-free or with disclosed, IFRA-compliant essential oils."},
        {"severity":"MODERATE","type":"pH conflict",
         "matched_ingredients":["retinol","ascorbic acid"],
         "description":"Vitamin C requires acidic pH (below 3.5) for stability while retinol performs optimally at neutral pH. Combining destabilises both actives.",
         "recommendation":"Separate into AM (vitamin C) and PM (retinol) products, or substitute retinyl palmitate as a pH-stable retinoid form."},
        {"severity":"MODERATE","type":"efficacy reduction",
         "matched_ingredients":["niacinamide","ascorbic acid"],
         "description":"At high concentrations, niacinamide and ascorbic acid can react to form nicotinic acid, potentially causing transient flushing.",
         "recommendation":"Keep both below 10% concentration. Minimal concern in well-formulated products with correct pH."},
    ],
    "reformulation_notes": [
        {"ingredient":"parabens",
         "issue":"Endocrine disruption confirmed | EU restrictions on butyl/propylparaben",
         "suggestion":"Replace with phenoxyethanol 0.5–1.0% or ethylhexylglycerin 0.3–0.5% for equivalent preservation efficacy."},
        {"ingredient":"fragrance",
         "issue":"Undisclosed mixture — leading cause of contact allergy | Endocrine disruption risk",
         "suggestion":"Reformulate fragrance-free or replace with disclosed, allergen-screened essential oils at IFRA-compliant concentrations."},
        {"ingredient":"sodium benzoate",
         "issue":"Reacts with ascorbic acid to form benzene — critical incompatibility",
         "suggestion":"Remove entirely. Benzene formation risk is unacceptable in any leave-on cosmetic containing ascorbic acid."},
    ],
    "extract": {
        "score":58.4,"color_label":"HIGHER RISK",
        "plain_verdict":"Contains ingredients with documented safety concerns.",
        "flags":[
            "Sodium Benzoate + Ascorbic Acid: Forms benzene (carcinogen) under light/heat exposure.",
            "Parabens + Fragrance: Combined endocrine disruption burden — avoid together.",
            "Parabens: Endocrine disruption confirmed. EU restrictions apply.",
        ],
        "alternative_signal":"ask for options without Parabens and Fragrance.",
    }
}


if __name__ == "__main__":
    print("Generating NOURA Enterprise PDF Reports v3...")
    generate_report(weleda_response,
                    "/home/claude/NOURA_Weleda_v3.pdf",
                    client_name="Aman Group — Spa & Wellness")
    generate_report(concerning_response,
                    "/home/claude/NOURA_Reformulation_v3.pdf",
                    client_name="NOURA Enterprise — Brand Audit Sample")
    print("Done.")

Overwriting /content/noura_pdf_v3.py


In [None]:
!pip install reportlab pillow -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/2.0 MB[0m [31m8.3 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.9/2.0 MB[0m [31m29.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# NOURA — Cell 23: Generate Enterprise PDF Report
import sys
sys.path.insert(0, '/content')
from noura_pdf_v3 import generate_report

# Example: generate a report from a live API response
# Replace 'weleda_response' with any real api_response from noura_api_v2()
from noura_pdf_v3 import weleda_response, concerning_response

generate_report(weleda_response,
                "/content/NOURA_Weleda_Report.pdf",
                client_name="Aman Group — Spa & Wellness")

generate_report(concerning_response,
                "/content/NOURA_Reformulation_Report.pdf",
                client_name="NOURA Enterprise — Brand Audit Sample")

# Download both
from google.colab import files
files.download("/content/NOURA_Weleda_Report.pdf")
files.download("/content/NOURA_Reformulation_Report.pdf")

  /content/NOURA_Weleda_Report.pdf
  /content/NOURA_Reformulation_Report.pdf


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
%%writefile /content/noura_config.py
"""
NOURA Product Impact Scanner
Cell 24 — Master Configuration
Locked weights, evidence sources, and category definitions.
All scoring modules import from here. Change here = changes everywhere.
"""

# ═══════════════════════════════════════════════════════════════
# SECTION 1: PRODUCT CATEGORIES
# ═══════════════════════════════════════════════════════════════

PRODUCT_CATEGORIES = {
    "cosmetics": {
        "label": "Cosmetics & Personal Care",
        "includes": [
            "skincare", "body care", "haircare", "makeup", "sunscreen",
            "oral care", "deodorant", "fragrance", "feminine hygiene",
            "shaving", "nail care"
        ],
        "description": "Topically applied products — highest animal testing concern"
    },
    "food": {
        "label": "Food & Beverages",
        "includes": [
            "packaged food", "beverages", "supplements", "vitamins",
            "protein powder", "snacks", "dairy", "plant-based",
            "baby food", "meal replacement"
        ],
        "description": "Ingestible products — highest health and nutrition concern"
    },
    "cleaning": {
        "label": "Household Cleaning Products",
        "includes": [
            "all-purpose cleaner", "laundry detergent", "dish soap",
            "floor cleaner", "bathroom cleaner", "glass cleaner",
            "disinfectant", "bleach", "fabric softener", "drain cleaner"
        ],
        "description": "Chemical-heavy products — high environmental and VOC concern"
    },
    "baby": {
        "label": "Baby & Child Products",
        "includes": [
            "baby skincare", "baby shampoo", "baby lotion", "baby wipes",
            "diaper cream", "baby sunscreen", "baby food", "formula",
            "teething products", "baby cleaning products"
        ],
        "description": "Highest scrutiny — most vulnerable population"
    }
}


# ═══════════════════════════════════════════════════════════════
# SECTION 2: LOCKED DIMENSION WEIGHTS
# Health is always 70%. Never changes. Non-negotiable.
# ═══════════════════════════════════════════════════════════════

DIMENSION_WEIGHTS = {
    "cosmetics": {
        "health":       0.70,   # Toxicology, endocrine disruptors, allergens
        "animal":       0.15,   # Animal testing — elevated for this category
        "environment":  0.10,   # Packaging, biodegradability
        "governance":   0.05,   # Supply chain, certifications
    },
    "food": {
        "health":       0.70,   # Nutrition, additives, pesticides, contaminants
        "environment":  0.15,   # Eco-score, packaging, carbon footprint
        "animal":       0.10,   # Factory farming, welfare standards
        "governance":   0.05,   # Fair trade, labor practices
    },
    "cleaning": {
        "health":       0.70,   # VOCs, skin/respiratory hazards, toxicity
        "environment":  0.15,   # Aquatic toxicity, biodegradability, packaging
        "animal":       0.10,   # Animal testing, wildlife impact
        "governance":   0.05,   # Certifications, transparency
    },
    "baby": {
        "health":       0.70,   # Strictest standards — most vulnerable users
        "environment":  0.15,   # Packaging, materials safety
        "animal":       0.10,   # Animal testing
        "governance":   0.05,   # Supply chain safety, certifications
    }
}

# Validation — all weights must sum to 1.0
for cat, weights in DIMENSION_WEIGHTS.items():
    total = sum(weights.values())
    assert abs(total - 1.0) < 0.001, f"Weights for {cat} sum to {total}, not 1.0"


# ═══════════════════════════════════════════════════════════════
# SECTION 3: EVIDENCE SOURCES PER DIMENSION
# Each dimension pulls from specific authoritative databases.
# ═══════════════════════════════════════════════════════════════

EVIDENCE_SOURCES = {

    "health": {
        "primary": [
            {
                "name": "PubMed",
                "type": "scientific_literature",
                "url": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/",
                "covers": ["cosmetics", "food", "cleaning", "baby"],
                "data": "peer-reviewed studies, toxicology, clinical trials",
                "status": "✅ BUILT (Weeks 1-10)",
                "weight_in_health": 0.40
            },
            {
                "name": "ECHA — European Chemicals Agency",
                "type": "regulatory",
                "url": "https://echa.europa.eu/information-on-chemicals",
                "covers": ["cosmetics", "cleaning", "baby"],
                "data": "SVHC list, hazard classifications, REACH compliance",
                "status": "🔲 TO BUILD — Week 11",
                "weight_in_health": 0.25
            },
            {
                "name": "EU SCCS — Scientific Committee on Consumer Safety",
                "type": "regulatory",
                "url": "https://ec.europa.eu/health/scientific_committees/consumer_safety",
                "covers": ["cosmetics", "baby"],
                "data": "cosmetic ingredient safety opinions",
                "status": "🔲 TO BUILD — Week 11",
                "weight_in_health": 0.15
            },
            {
                "name": "CIR — Cosmetic Ingredient Review",
                "type": "industry_safety",
                "url": "https://cir-safety.org",
                "covers": ["cosmetics", "baby"],
                "data": "ingredient safety assessments, concentration limits",
                "status": "🔲 TO BUILD — Week 11",
                "weight_in_health": 0.10
            },
            {
                "name": "FDA OpenFDA",
                "type": "regulatory",
                "url": "https://api.fda.gov",
                "covers": ["food", "cosmetics", "baby"],
                "data": "product recalls, adverse events, enforcement actions",
                "status": "🔲 TO BUILD — Week 11",
                "weight_in_health": 0.10
            },
            {
                "name": "EFSA — European Food Safety Authority",
                "type": "regulatory",
                "url": "https://www.efsa.europa.eu",
                "covers": ["food", "baby"],
                "data": "food additives, contaminants, pesticide residues",
                "status": "🔲 TO BUILD — Week 12",
                "weight_in_health": 0.00   # food only — overridden per category
            },
        ],
        "noura_kb": {
            "name": "NOURA Knowledge Base",
            "status": "✅ BUILT — 17 ingredients",
            "target": "500+ ingredients by Week 14",
            "weight_in_health": "fills gaps when API returns no data"
        }
    },

    "environment": {
        "primary": [
            {
                "name": "Open Food Facts — Eco-Score",
                "type": "open_database",
                "url": "https://world.openfoodfacts.org/api/v2",
                "covers": ["food", "baby"],
                "data": "packaging, carbon footprint, biodiversity impact",
                "status": "🔲 TO BUILD — Week 12",
            },
            {
                "name": "ECHA — Environmental Hazards",
                "type": "regulatory",
                "url": "https://echa.europa.eu",
                "covers": ["cleaning", "cosmetics"],
                "data": "aquatic toxicity, biodegradability, persistence",
                "status": "🔲 TO BUILD — Week 12",
            },
            {
                "name": "EU Ecolabel Database",
                "type": "certification",
                "url": "https://ecolabel.eu",
                "covers": ["cleaning", "cosmetics"],
                "data": "certified eco-friendly products",
                "status": "🔲 TO BUILD — Week 12",
            },
        ]
    },

    "animal": {
        "primary": [
            {
                "name": "Leaping Bunny",
                "type": "certification",
                "url": "https://www.leapingbunny.org/guide/companies",
                "covers": ["cosmetics", "cleaning", "baby"],
                "data": "cruelty-free certified brands and products",
                "status": "🔲 TO BUILD — Week 13",
            },
            {
                "name": "PETA Beauty Without Bunnies",
                "type": "certification",
                "url": "https://www.peta.org/living/personal-care-fashion/beauty-without-bunnies/",
                "covers": ["cosmetics", "cleaning"],
                "data": "animal testing status by brand",
                "status": "🔲 TO BUILD — Week 13",
            },
            {
                "name": "Vegan Society Trademark",
                "type": "certification",
                "url": "https://www.vegansociety.com/the-vegan-trademark",
                "covers": ["cosmetics", "food", "cleaning"],
                "data": "vegan-certified products",
                "status": "🔲 TO BUILD — Week 13",
            },
        ]
    },

    "governance": {
        "primary": [
            {
                "name": "B Corp Directory",
                "type": "certification",
                "url": "https://www.bcorporation.net/en-us/find-a-b-corp/",
                "covers": ["cosmetics", "food", "cleaning", "baby"],
                "data": "certified B corporations — supply chain, labor, environment",
                "status": "🔲 TO BUILD — Week 13",
            },
            {
                "name": "Fair Trade Certified",
                "type": "certification",
                "url": "https://www.fairtradecertified.org",
                "covers": ["food", "cosmetics"],
                "data": "fair labor practices, ethical sourcing",
                "status": "🔲 TO BUILD — Week 13",
            },
        ]
    }
}


# ═══════════════════════════════════════════════════════════════
# SECTION 4: SCORING THRESHOLDS
# ═══════════════════════════════════════════════════════════════

SCORE_THRESHOLDS = {
    "CLEAN":        (80, 100),   # Green — recommend freely
    "ACCEPTABLE":   (60, 79),    # Amber — minor concerns noted
    "CAUTION":      (40, 59),    # Orange — significant concerns
    "HIGHER_RISK":  (0,  39),    # Red — reformulation recommended
}

# Baby products get stricter thresholds — same score, higher bar
BABY_SCORE_PENALTY = -10   # Subtract 10 from raw score before threshold lookup

# ═══════════════════════════════════════════════════════════════
# SECTION 5: BUILD ROADMAP
# ═══════════════════════════════════════════════════════════════

BUILD_ROADMAP = {
    "Week 11": {
        "goal": "Health dimension — expand from PubMed-only to full multi-source engine",
        "sources": ["ECHA", "EU SCCS", "CIR", "FDA OpenFDA"],
        "categories": ["cosmetics", "baby"],   # highest health risk first
        "deliverable": "noura_health_engine.py — queries all health sources per ingredient"
    },
    "Week 12": {
        "goal": "Environment dimension + Food category health sources",
        "sources": ["Open Food Facts", "ECHA environmental", "EU Ecolabel", "EFSA"],
        "categories": ["food", "cleaning"],
        "deliverable": "noura_environment_engine.py + food health module"
    },
    "Week 13": {
        "goal": "Animal Welfare + Governance dimensions",
        "sources": ["Leaping Bunny", "PETA", "Vegan Society", "B Corp", "Fair Trade"],
        "categories": ["all"],
        "deliverable": "noura_animal_engine.py + noura_governance_engine.py"
    },
    "Week 14": {
        "goal": "Master aggregator — combine all 4 dimensions into unified score",
        "sources": ["all"],
        "categories": ["all"],
        "deliverable": "noura_scanner.py — single function that returns complete 4-dimension score"
    },
    "Week 15": {
        "goal": "Demo website — B2B portal for client pilots",
        "deliverable": "Hosted URL, live product evaluation, PDF download"
    }
}


# ═══════════════════════════════════════════════════════════════
# SECTION 6: QUICK REFERENCE
# ═══════════════════════════════════════════════════════════════

def get_weights(category: str) -> dict:
    """Return dimension weights for a given product category."""
    if category not in DIMENSION_WEIGHTS:
        raise ValueError(f"Unknown category '{category}'. "
                         f"Choose from: {list(DIMENSION_WEIGHTS.keys())}")
    return DIMENSION_WEIGHTS[category]

def get_sources(dimension: str) -> list:
    """Return evidence sources for a given dimension."""
    if dimension not in EVIDENCE_SOURCES:
        raise ValueError(f"Unknown dimension '{dimension}'. "
                         f"Choose from: {list(EVIDENCE_SOURCES.keys())}")
    return EVIDENCE_SOURCES[dimension].get("primary", [])

def get_verdict(score: float, category: str = "cosmetics") -> str:
    """Return verdict label for a given score."""
    adjusted = score - (BABY_SCORE_PENALTY if category == "baby" else 0)
    for verdict, (low, high) in SCORE_THRESHOLDS.items():
        if low <= adjusted <= high:
            return verdict
    return "HIGHER_RISK"


# ═══════════════════════════════════════════════════════════════
# SELF-TEST
# ═══════════════════════════════════════════════════════════════
if __name__ == "__main__":
    print("NOURA Master Config — Self Test\n")

    for cat in PRODUCT_CATEGORIES:
        w = get_weights(cat)
        print(f"{cat.upper()}: Health {w['health']*100:.0f}% | "
              f"Environment {w['environment']*100:.0f}% | "
              f"Animal {w['animal']*100:.0f}% | "
              f"Governance {w['governance']*100:.0f}%")

    print("\nWeek 11 goal:", BUILD_ROADMAP["Week 11"]["goal"])
    print("Sources to build:", BUILD_ROADMAP["Week 11"]["sources"])
    print("\nVerdict test — score 85, cosmetics:", get_verdict(85, "cosmetics"))
    print("Verdict test — score 85, baby:", get_verdict(85, "baby"))
    print("\n✅ Config locked and ready.")

Writing /content/noura_config.py


In [None]:
%%writefile /content/noura_health_echa.py
"""
NOURA Health Engine — ECHA Module
Cell 25

Queries the European Chemicals Agency (ECHA) for:
- SVHC (Substances of Very High Concern) list
- Hazard classifications (CMR: carcinogenic, mutagenic, reprotoxic)
- REACH compliance status
- Endocrine disruptor flags

Covers: cosmetics, cleaning, baby products
Weight in Health score: 25%
"""

import requests
import time
import json
from datetime import datetime, timedelta

# ── ECHA endpoints ─────────────────────────────────────────────
ECHA_CHEM_API   = "https://chem.echa.europa.eu/api/substance/v1/search"
ECHA_SVHC_URL   = "https://echa.europa.eu/candidate-list-table"

# ECHA's REST API for substance lookup
ECHA_SEARCH_URL = "https://chem.echa.europa.eu/api/substance/v1/search"
ECHA_INFO_URL   = "https://chem.echa.europa.eu/api/substance/v1"

# ── SVHC hardcoded list (current as of Feb 2026) ───────────────
# This is ECHA's Candidate List — substances identified as SVHC.
# We embed the most relevant cosmetic/cleaning ones as a fast-lookup
# fallback when the API is unavailable.
# Full list: https://echa.europa.eu/candidate-list-table

SVHC_HARDCODED = {
    # Endocrine disruptors
    "bisphenol a":              {"hazard": "CMR + ED", "concern": "endocrine disruption, reproductive toxicity"},
    "bpa":                      {"hazard": "CMR + ED", "concern": "endocrine disruption, reproductive toxicity"},
    "diethylstilbestrol":       {"hazard": "CMR",      "concern": "carcinogen, endocrine disruptor"},

    # Parabens flagged by ECHA
    "butylparaben":             {"hazard": "ED",       "concern": "endocrine disruption, reproductive toxicity"},
    "propylparaben":            {"hazard": "ED",       "concern": "endocrine disruption"},
    "isopropylparaben":         {"hazard": "ED",       "concern": "endocrine disruption"},
    "isobutylparaben":          {"hazard": "ED",       "concern": "endocrine disruption"},
    "benzylparaben":            {"hazard": "ED",       "concern": "endocrine disruption"},

    # Phthalates
    "dibutyl phthalate":        {"hazard": "CMR",      "concern": "reproductive toxicity, endocrine disruption"},
    "dbp":                      {"hazard": "CMR",      "concern": "reproductive toxicity"},
    "dihexyl phthalate":        {"hazard": "CMR",      "concern": "reproductive toxicity"},
    "bis(2-ethylhexyl) phthalate": {"hazard": "CMR",  "concern": "reproductive toxicity"},
    "dehp":                     {"hazard": "CMR",      "concern": "reproductive toxicity"},
    "diisopentyl phthalate":    {"hazard": "CMR",      "concern": "reproductive toxicity"},

    # Heavy metals
    "lead":                     {"hazard": "CMR",      "concern": "neurotoxin, reproductive toxicity, carcinogen"},
    "lead compounds":           {"hazard": "CMR",      "concern": "neurotoxin, carcinogen"},
    "cadmium":                  {"hazard": "CMR",      "concern": "carcinogen, kidney toxicity"},
    "arsenic":                  {"hazard": "CMR",      "concern": "carcinogen"},
    "mercury":                  {"hazard": "CMR",      "concern": "neurotoxin"},
    "chromium vi":              {"hazard": "CMR",      "concern": "carcinogen, skin sensitiser"},

    # Formaldehyde and releasers
    "formaldehyde":             {"hazard": "CMR",      "concern": "carcinogen (IARC Group 1), skin sensitiser"},
    "dmdm hydantoin":           {"hazard": "RELEASER", "concern": "formaldehyde releaser — carcinogen risk"},
    "quaternium-15":            {"hazard": "RELEASER", "concern": "formaldehyde releaser"},
    "imidazolidinyl urea":      {"hazard": "RELEASER", "concern": "formaldehyde releaser"},
    "diazolidinyl urea":        {"hazard": "RELEASER", "concern": "formaldehyde releaser"},
    "2-bromo-2-nitropropane-1,3-diol": {"hazard": "RELEASER", "concern": "formaldehyde releaser"},

    # PAHs
    "anthracene":               {"hazard": "CMR",      "concern": "carcinogen, PBT substance"},
    "benzo[a]pyrene":           {"hazard": "CMR",      "concern": "carcinogen (IARC Group 1)"},

    # UV filters flagged
    "4-methylbenzylidene camphor": {"hazard": "ED",   "concern": "endocrine disruption"},
    "benzophenone-1":           {"hazard": "ED",       "concern": "endocrine disruption"},
    "benzophenone-3":           {"hazard": "ED",       "concern": "endocrine disruption, skin sensitiser"},
    "homosalate":               {"hazard": "ED",       "concern": "endocrine disruption"},

    # Cleaning-specific
    "sodium dichromate":        {"hazard": "CMR",      "concern": "carcinogen, reproductive toxicity"},
    "trichloroethylene":        {"hazard": "CMR",      "concern": "carcinogen (IARC Group 1)"},
    "1,4-dioxane":              {"hazard": "CMR",      "concern": "probable carcinogen, contaminant in ethoxylated ingredients"},

    # Fragrance allergens (ECHA flagged)
    "musk ambrette":            {"hazard": "BANNED",   "concern": "banned EU cosmetics — neurotoxin"},
    "musk tibetene":            {"hazard": "BANNED",   "concern": "banned EU cosmetics"},
    "6-methylcoumarin":         {"hazard": "BANNED",   "concern": "banned EU cosmetics"},
}

# Hazard severity → score deduction mapping
ECHA_HAZARD_DEDUCTIONS = {
    "CMR":      -25,   # Carcinogenic/Mutagenic/Reprotoxic — severe
    "CMR + ED": -30,   # Both CMR and endocrine disruptor — most severe
    "ED":       -20,   # Endocrine disruptor only
    "RELEASER": -15,   # Formaldehyde releaser
    "BANNED":   -35,   # Banned in EU — automatic major penalty
    "PBT":      -20,   # Persistent, Bioaccumulative, Toxic
    "vPvB":     -15,   # Very Persistent, very Bioaccumulative
}


def check_ingredient_echa(ingredient_name: str) -> dict:
    """
    Check a single ingredient against ECHA SVHC list.
    Returns a result dict with hazard info and score impact.

    Steps:
    1. Normalize ingredient name
    2. Check hardcoded SVHC list (fast, offline)
    3. Try ECHA REST API (live, more comprehensive)
    4. Return combined result
    """
    name_lower = ingredient_name.lower().strip()

    result = {
        "ingredient":     ingredient_name,
        "source":         "ECHA",
        "svhc_listed":    False,
        "hazard_class":   None,
        "concern":        None,
        "score_impact":   0,
        "confidence":     "LOW",
        "api_checked":    False,
        "timestamp":      datetime.now().isoformat()
    }

    # ── Step 1: Check hardcoded SVHC list ─────────────────────
    for svhc_name, svhc_data in SVHC_HARDCODED.items():
        if svhc_name in name_lower or name_lower in svhc_name:
            result.update({
                "svhc_listed":  True,
                "hazard_class": svhc_data["hazard"],
                "concern":      svhc_data["concern"],
                "score_impact": ECHA_HAZARD_DEDUCTIONS.get(svhc_data["hazard"], -10),
                "confidence":   "HIGH",
                "lookup_method": "SVHC_hardcoded"
            })
            return result

    # ── Step 2: Try ECHA REST API ──────────────────────────────
    try:
        response = requests.get(
            ECHA_SEARCH_URL,
            params={"name": ingredient_name, "type": "search"},
            timeout=5,
            headers={"Accept": "application/json"}
        )
        result["api_checked"] = True

        if response.status_code == 200:
            data = response.json()
            substances = data.get("results", [])

            for substance in substances[:3]:   # check top 3 matches
                classifications = substance.get("classifications", [])
                for clf in classifications:
                    hazard_class = clf.get("hazardClass", "")

                    # Check for CMR classification
                    if any(x in hazard_class.upper() for x in
                           ["CARC", "MUTA", "REPR", "STOT"]):
                        result.update({
                            "svhc_listed":    True,
                            "hazard_class":   "CMR",
                            "concern":        f"ECHA classification: {hazard_class}",
                            "score_impact":   -25,
                            "confidence":     "HIGH",
                            "lookup_method":  "ECHA_API"
                        })
                        return result

                    # Check for endocrine disruptor
                    if "ENDOCRINE" in hazard_class.upper():
                        result.update({
                            "svhc_listed":    True,
                            "hazard_class":   "ED",
                            "concern":        "ECHA endocrine disruptor classification",
                            "score_impact":   -20,
                            "confidence":     "HIGH",
                            "lookup_method":  "ECHA_API"
                        })
                        return result

    except requests.exceptions.RequestException:
        # API unavailable — hardcoded list is sufficient for now
        pass

    # ── Step 3: Not found — ingredient is not SVHC listed ─────
    result.update({
        "svhc_listed":    False,
        "score_impact":   0,
        "confidence":     "MEDIUM" if result["api_checked"] else "LOW",
        "lookup_method":  "not_found"
    })
    return result


def check_formula_echa(ingredients: list) -> dict:
    """
    Check an entire ingredient list against ECHA.
    Returns aggregated results and total score impact.

    Args:
        ingredients: list of ingredient name strings

    Returns:
        {
            "svhc_found": [...],         # list of flagged ingredients
            "total_score_impact": int,   # sum of all deductions
            "highest_hazard": str,       # most severe hazard found
            "summary": str               # human-readable summary
        }
    """
    results = []
    total_impact = 0
    flagged = []

    for ingredient in ingredients:
        result = check_ingredient_echa(ingredient)
        results.append(result)

        if result["svhc_listed"]:
            flagged.append(result)
            total_impact += result["score_impact"]

        # Respect rate limits
        time.sleep(0.3)

    # Cap total deduction at -50 (don't double-penalise exhaustively)
    total_impact = max(total_impact, -50)

    # Find highest hazard
    hazard_priority = ["BANNED", "CMR + ED", "CMR", "ED", "RELEASER", "PBT"]
    highest_hazard = None
    for h in hazard_priority:
        if any(f["hazard_class"] == h for f in flagged):
            highest_hazard = h
            break

    # Build summary
    if not flagged:
        summary = "No ECHA SVHC substances detected."
    else:
        names = [f["ingredient"] for f in flagged]
        summary = (f"{len(flagged)} SVHC substance(s) detected: "
                   f"{', '.join(names)}. Score impact: {total_impact}")

    return {
        "source":           "ECHA",
        "ingredients_checked": len(ingredients),
        "svhc_found":       flagged,
        "svhc_count":       len(flagged),
        "total_score_impact": total_impact,
        "highest_hazard":   highest_hazard,
        "summary":          summary,
        "all_results":      results
    }


# ── Self-test ──────────────────────────────────────────────────
if __name__ == "__main__":
    print("NOURA — ECHA Health Engine Self-Test\n")

    test_ingredients = [
        "glycerin",
        "butylparaben",
        "formaldehyde",
        "sodium hyaluronate",
        "benzophenone-3",
        "tocopherol",
        "dmdm hydantoin",
        "aqua"
    ]

    print("Testing individual ingredients:")
    print("-" * 55)
    for ing in test_ingredients:
        r = check_ingredient_echa(ing)
        flag = "🔴 SVHC" if r["svhc_listed"] else "🟢 clear"
        impact = f"({r['score_impact']})" if r["svhc_listed"] else ""
        concern = f"— {r['concern']}" if r["concern"] else ""
        print(f"  {flag} {ing} {impact} {concern}")

    print("\nTesting full formula:")
    print("-" * 55)
    formula_result = check_formula_echa(test_ingredients)
    print(f"  Ingredients checked: {formula_result['ingredients_checked']}")
    print(f"  SVHC found:          {formula_result['svhc_count']}")
    print(f"  Total score impact:  {formula_result['total_score_impact']}")
    print(f"  Highest hazard:      {formula_result['highest_hazard']}")
    print(f"  Summary:             {formula_result['summary']}")
    print("\n✅ ECHA module ready.")

Writing /content/noura_health_echa.py


In [None]:
%%writefile /content/noura_health_fda.py
"""
NOURA Health Engine — FDA OpenFDA Module
Cell 26

Queries the FDA OpenFDA API for:
- Product recalls (cosmetics, food, baby products, cleaning)
- Adverse event reports (consumer complaints, injuries)
- Enforcement actions

Free API — no key required for basic use (1000 requests/hour)
Covers: cosmetics, food, baby products
Weight in Health score: 10%
"""

import requests
import time
from datetime import datetime, timedelta

# ── FDA OpenFDA endpoints ──────────────────────────────────────
FDA_BASE         = "https://api.fda.gov"
FDA_FOOD_ENFORCE = f"{FDA_BASE}/food/enforcement.json"
FDA_COSMETIC_ADV = f"{FDA_BASE}/cosmetics/events.json"

# ── Score deductions ───────────────────────────────────────────
FDA_RECALL_DEDUCTIONS = {
    "Class I":   -30,   # Serious harm — most severe
    "Class II":  -15,   # Temporary adverse consequences
    "Class III": -5,    # Unlikely to cause harm
}

FDA_ADVERSE_SEVERITY = {
    "high":     -20,    # 10+ adverse events reported
    "moderate": -10,    # 3–9 adverse events
    "low":      -5,     # 1–2 adverse events
}


def check_product_recalls(product_name: str, category: str = "cosmetics") -> dict:
    """
    Check if a product or brand has active FDA recalls.
    Returns recall status and score impact.
    """
    result = {
        "product":       product_name,
        "source":        "FDA OpenFDA — Recalls",
        "recalls_found": [],
        "recall_count":  0,
        "score_impact":  0,
        "confidence":    "LOW",
        "api_checked":   False,
        "timestamp":     datetime.now().isoformat()
    }

    try:
        params = {
            "search": f'product_description:"{product_name}"',
            "limit":  10,
            "sort":   "recall_initiation_date:desc"
        }

        response = requests.get(FDA_FOOD_ENFORCE, params=params, timeout=8)
        result["api_checked"] = True

        if response.status_code == 200:
            data = response.json()
            recalls = data.get("results", [])

            # Filter to last 3 years only
            cutoff = datetime.now() - timedelta(days=3 * 365)
            recent = []
            for recall in recalls:
                date_str = recall.get("recall_initiation_date", "")
                try:
                    if datetime.strptime(date_str, "%Y-%m-%d") > cutoff:
                        recent.append(recall)
                except (ValueError, TypeError):
                    recent.append(recall)

            total_impact = 0
            for recall in recent:
                cls = recall.get("classification", "Class III")
                deduction = FDA_RECALL_DEDUCTIONS.get(cls, -5)
                total_impact += deduction
                result["recalls_found"].append({
                    "date":           recall.get("recall_initiation_date"),
                    "classification": cls,
                    "reason":         recall.get("reason_for_recall", "")[:200],
                    "status":         recall.get("status", ""),
                    "deduction":      deduction
                })

            result.update({
                "recall_count": len(recent),
                "score_impact": max(total_impact, -40),
                "confidence":   "HIGH" if recent else "MEDIUM"
            })

        elif response.status_code == 404:
            result["confidence"] = "MEDIUM"  # No results = clean

    except requests.exceptions.RequestException as e:
        result["error"] = str(e)

    return result


def check_adverse_events(product_name: str) -> dict:
    """
    Check FDA cosmetics adverse event reports for a product.
    Returns event count and score impact.
    """
    result = {
        "product":       product_name,
        "source":        "FDA OpenFDA — Adverse Events",
        "event_count":   0,
        "score_impact":  0,
        "severity":      None,
        "top_reactions": [],
        "confidence":    "LOW",
        "api_checked":   False,
        "timestamp":     datetime.now().isoformat()
    }

    try:
        params = {
            "search": f'products.name:"{product_name}"',
            "limit":  5
        }

        response = requests.get(FDA_COSMETIC_ADV, params=params, timeout=8)
        result["api_checked"] = True

        if response.status_code == 200:
            data = response.json()
            events = data.get("results", [])
            count = data.get("meta", {}).get("results", {}).get("total", len(events))

            # Determine severity
            if count >= 10:
                severity = "high"
            elif count >= 3:
                severity = "moderate"
            elif count >= 1:
                severity = "low"
            else:
                severity = None

            # Extract top reaction types
            reactions = []
            for event in events[:3]:
                for reaction in event.get("reactions", [])[:2]:
                    if reaction not in reactions:
                        reactions.append(reaction)

            result.update({
                "event_count":   count,
                "score_impact":  FDA_ADVERSE_SEVERITY.get(severity, 0),
                "severity":      severity,
                "top_reactions": reactions[:5],
                "confidence":    "HIGH" if count > 0 else "MEDIUM"
            })

        elif response.status_code == 404:
            result["confidence"] = "MEDIUM"  # No events = clean

    except requests.exceptions.RequestException as e:
        result["error"] = str(e)

    return result


def check_product_fda(product_name: str, category: str = "cosmetics") -> dict:
    """
    Master FDA check — runs both recalls and adverse events,
    returns combined result with total score impact.

    This is the main function called by the health engine.
    """
    recalls  = check_product_recalls(product_name, category)
    adverse  = check_adverse_events(product_name)

    total_impact = recalls["score_impact"] + adverse["score_impact"]
    total_impact = max(total_impact, -45)   # cap combined deduction

    # Build flags list for report
    flags = []
    if recalls["recall_count"] > 0:
        for r in recalls["recalls_found"]:
            flags.append(f"FDA Recall ({r['classification']}): {r['reason'][:100]}")
    if adverse["event_count"] > 0:
        flags.append(
            f"FDA Adverse Events: {adverse['event_count']} reports"
            + (f" — {', '.join(adverse['top_reactions'])}" if adverse["top_reactions"] else "")
        )

    return {
        "product":          product_name,
        "source":           "FDA OpenFDA",
        "recall_count":     recalls["recall_count"],
        "adverse_count":    adverse["event_count"],
        "flags":            flags,
        "total_score_impact": total_impact,
        "confidence":       "HIGH" if (recalls["api_checked"] and adverse["api_checked"]) else "LOW",
        "details": {
            "recalls": recalls,
            "adverse": adverse
        }
    }


# ── Self-test ──────────────────────────────────────────────────
if __name__ == "__main__":
    print("NOURA — FDA OpenFDA Health Engine Self-Test\n")

    test_products = [
        ("Weleda Skin Food", "cosmetics"),
        ("Cetaphil Moisturizer", "cosmetics"),
        ("Similac Baby Formula", "baby"),
    ]

    for product, category in test_products:
        print(f"Checking: {product} ({category})")
        print("-" * 50)

        result = check_product_fda(product, category)

        recall_status = (f"🔴 {result['recall_count']} recall(s)"
                         if result["recall_count"] > 0 else "🟢 No recalls")
        adverse_status = (f"🔴 {result['adverse_count']} adverse event(s)"
                          if result["adverse_count"] > 0 else "🟢 No adverse events")

        print(f"  Recalls:        {recall_status}")
        print(f"  Adverse events: {adverse_status}")
        print(f"  Score impact:   {result['total_score_impact']}")
        print(f"  Confidence:     {result['confidence']}")
        if result["flags"]:
            for flag in result["flags"]:
                print(f"  ⚠  {flag}")
        print()

    print("✅ FDA module ready.")

Writing /content/noura_health_fda.py


In [None]:
%%writefile /content/noura_health_sccs.py
"""
NOURA Health Engine — EU SCCS Module
Cell 27

Queries the Scientific Committee on Consumer Safety (EU SCCS) data for:
- Cosmetic ingredient safety opinions
- Concentration limits and restrictions
- Banned and restricted substances under EU Cosmetics Regulation 1223/2009
- Ingredients requiring specific warnings

SCCS is the gold standard for cosmetic ingredient safety in Europe.
Every opinion is based on full dossier review by independent scientists.

Covers: cosmetics, baby products (strictest standards)
Weight in Health score: 15%
"""

import requests
import time
from datetime import datetime

# ── EU Cosmetics Regulation endpoints ─────────────────────────
# CosIng — EU Commission cosmetic ingredient database
COSING_API = "https://ec.europa.eu/growth/tools-databases/cosing/index.cfm"
COSING_SEARCH = "https://cosing-connect.zakopower.de/api/ingredients"  # unofficial mirror

# ── EU Annex classifications ───────────────────────────────────
# Under EU Cosmetics Regulation 1223/2009:
# Annex II  = PROHIBITED substances
# Annex III = RESTRICTED substances (with conditions/limits)
# Annex IV  = PERMITTED colorants
# Annex V   = PERMITTED preservatives
# Annex VI  = PERMITTED UV filters

EU_ANNEX_DEDUCTIONS = {
    "Annex II — Prohibited":    -40,   # Banned outright — never acceptable
    "Annex III — Restricted":   -15,   # Allowed only under specific conditions
    "SCCS — Not Safe":          -30,   # SCCS opinion: not safe for use
    "SCCS — Conditionally Safe": -10,  # Safe only at specific concentrations
    "Baby — Not Recommended":   -20,   # Safe for adults, not for children <3y
}

# ── SCCS hardcoded opinions (most relevant, current as of 2026) ─
# Source: SCCS opinions published at:
# https://health.ec.europa.eu/scientific-committees/scientific-committee-consumer-safety-sccs_en

SCCS_OPINIONS = {
    # ── BANNED (Annex II) ──────────────────────────────────────
    "lead acetate": {
        "annex": "Annex II — Prohibited",
        "opinion": "Prohibited in cosmetics — neurotoxin",
        "deduction": -40,
        "baby_safe": False
    },
    "mercury": {
        "annex": "Annex II — Prohibited",
        "opinion": "Prohibited — neurotoxin, bioaccumulative",
        "deduction": -40,
        "baby_safe": False
    },
    "hydroquinone": {
        "annex": "Annex II — Prohibited",
        "opinion": "Prohibited in cosmetics — cytotoxic, potential carcinogen",
        "deduction": -40,
        "baby_safe": False
    },
    "resorcinol": {
        "annex": "Annex III — Restricted",
        "opinion": "Restricted — endocrine disruption, skin sensitiser",
        "deduction": -15,
        "baby_safe": False
    },
    "kojic acid": {
        "annex": "Annex III — Restricted",
        "opinion": "Max 1% in face products — genotoxicity concerns",
        "deduction": -15,
        "baby_safe": False
    },

    # ── PARABENS ──────────────────────────────────────────────
    "methylparaben": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "Max 0.4% — endocrine disruption at higher concentrations",
        "deduction": -8,
        "baby_safe": False,
        "max_concentration": "0.4%"
    },
    "ethylparaben": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "Max 0.4% — moderate endocrine concern",
        "deduction": -8,
        "baby_safe": False,
        "max_concentration": "0.4%"
    },
    "propylparaben": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "Max 0.14% total — endocrine disruption",
        "deduction": -15,
        "baby_safe": False,
        "max_concentration": "0.14% combined with butylparaben"
    },
    "butylparaben": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "Max 0.14% total — endocrine disruption, reproductive toxicity",
        "deduction": -15,
        "baby_safe": False,
        "max_concentration": "0.14% combined with propylparaben"
    },
    "isopropylparaben": {
        "annex": "Annex II — Prohibited",
        "opinion": "Prohibited — insufficient safety data, endocrine concern",
        "deduction": -40,
        "baby_safe": False
    },
    "isobutylparaben": {
        "annex": "Annex II — Prohibited",
        "opinion": "Prohibited — insufficient safety data, endocrine concern",
        "deduction": -40,
        "baby_safe": False
    },

    # ── UV FILTERS ────────────────────────────────────────────
    "benzophenone-3": {
        "annex": "Annex VI — UV Filter (Restricted)",
        "opinion": "Max 6% — endocrine disruption, requires warning label",
        "deduction": -12,
        "baby_safe": False,
        "max_concentration": "6%",
        "warning_required": True
    },
    "homosalate": {
        "annex": "Annex VI — UV Filter (Restricted)",
        "opinion": "Max 7.34% — endocrine disruption confirmed",
        "deduction": -12,
        "baby_safe": False,
        "max_concentration": "7.34%"
    },
    "octocrylene": {
        "annex": "Annex VI — UV Filter (Restricted)",
        "opinion": "Max 10% — benzophenone contamination risk",
        "deduction": -8,
        "baby_safe": False,
        "max_concentration": "10%"
    },
    "4-methylbenzylidene camphor": {
        "annex": "Annex VI — UV Filter (Restricted)",
        "opinion": "Max 4% — endocrine disruptor",
        "deduction": -15,
        "baby_safe": False
    },

    # ── PRESERVATIVES ─────────────────────────────────────────
    "phenoxyethanol": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "Max 1% — not recommended for products on nappy area (babies)",
        "deduction": -5,
        "baby_safe": False,
        "baby_deduction": -20,
        "max_concentration": "1%"
    },
    "chlorphenesin": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "Max 0.3% — not for use around mouth area",
        "deduction": -8,
        "baby_safe": False,
        "max_concentration": "0.3%"
    },
    "mit": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "Methylisothiazolinone — max 0.0015% rinse-off only. Banned leave-on.",
        "deduction": -20,
        "baby_safe": False
    },
    "methylisothiazolinone": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "Max 0.0015% rinse-off only. Banned in leave-on products.",
        "deduction": -20,
        "baby_safe": False
    },
    "kathon cg": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "MIT/MCI blend — rinse-off only, max 0.0015%",
        "deduction": -20,
        "baby_safe": False
    },
    "formaldehyde": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "Max 0.2% (0.1% oral products) — carcinogen warning required",
        "deduction": -25,
        "baby_safe": False,
        "warning_required": True
    },

    # ── RETINOIDS ─────────────────────────────────────────────
    "retinol": {
        "annex": "Annex III — Restricted",
        "opinion": "Max 0.3% face, 0.05% body — photosensitising, not for pregnancy",
        "deduction": -10,
        "baby_safe": False,
        "max_concentration": "0.3% face / 0.05% body"
    },
    "retinyl palmitate": {
        "annex": "Annex III — Restricted",
        "opinion": "Max 0.3% — photosensitising at high concentrations",
        "deduction": -5,
        "baby_safe": False,
        "max_concentration": "0.3%"
    },

    # ── FRAGRANCE ALLERGENS ───────────────────────────────────
    "lilial": {
        "annex": "Annex II — Prohibited",
        "opinion": "Banned 2022 — reproductive toxicity (STOT RE)",
        "deduction": -40,
        "baby_safe": False
    },
    "eugenol": {
        "annex": "Annex III — Restricted",
        "opinion": "Must be declared on label above 0.001% leave-on",
        "deduction": -5,
        "baby_safe": False
    },
    "linalool": {
        "annex": "Annex III — Restricted",
        "opinion": "Must be declared on label — common allergen",
        "deduction": -5,
        "baby_safe": False
    },
    "limonene": {
        "annex": "Annex III — Restricted",
        "opinion": "Must be declared on label — oxidises to allergen",
        "deduction": -5,
        "baby_safe": False
    },
    "cinnamal": {
        "annex": "Annex III — Restricted",
        "opinion": "Must be declared — strong skin sensitiser",
        "deduction": -8,
        "baby_safe": False
    },

    # ── HAIR DYES ─────────────────────────────────────────────
    "ppd": {
        "annex": "Annex III — Restricted",
        "opinion": "p-Phenylenediamine — max 2%, warning required, not for eyebrows",
        "deduction": -15,
        "baby_safe": False
    },
    "p-phenylenediamine": {
        "annex": "Annex III — Restricted",
        "opinion": "Max 2% — strong sensitiser, carcinogenicity concerns",
        "deduction": -15,
        "baby_safe": False
    },

    # ── WELL SUPPORTED (positive signals) ─────────────────────
    "glycerin": {
        "annex": "No restrictions",
        "opinion": "Safe as used — no concentration limits",
        "deduction": 0,
        "baby_safe": True
    },
    "tocopherol": {
        "annex": "No restrictions",
        "opinion": "Safe as used — antioxidant",
        "deduction": 0,
        "baby_safe": True
    },
    "sodium hyaluronate": {
        "annex": "No restrictions",
        "opinion": "Safe as used — well tolerated",
        "deduction": 0,
        "baby_safe": True
    },
    "niacinamide": {
        "annex": "No restrictions",
        "opinion": "Safe as used — max 5% recommended for sensitive skin",
        "deduction": 0,
        "baby_safe": True
    },
    "zinc oxide": {
        "annex": "Annex VI — UV Filter (Permitted)",
        "opinion": "Safe as used up to 25% — not nanomaterial risk",
        "deduction": 0,
        "baby_safe": True
    },
}


def check_ingredient_sccs(ingredient_name: str,
                           category: str = "cosmetics",
                           is_baby: bool = False) -> dict:
    """
    Check a single ingredient against EU SCCS opinions and
    EU Cosmetics Regulation annexes.

    Args:
        ingredient_name: ingredient to check
        category: product category
        is_baby: True if product is for children under 3

    Returns:
        dict with SCCS opinion and score impact
    """
    name_lower = ingredient_name.lower().strip()

    result = {
        "ingredient":    ingredient_name,
        "source":        "EU SCCS",
        "annex":         None,
        "opinion":       None,
        "deduction":     0,
        "baby_safe":     None,
        "warning":       False,
        "max_conc":      None,
        "confidence":    "LOW",
        "timestamp":     datetime.now().isoformat()
    }

    # Check exact and partial matches
    for key, data in SCCS_OPINIONS.items():
        if key in name_lower or name_lower in key or name_lower == key:
            deduction = data["deduction"]

            # Apply stricter baby penalty if applicable
            if is_baby and not data.get("baby_safe", True):
                baby_deduction = data.get("baby_deduction", deduction * 1.5)
                deduction = min(deduction, int(baby_deduction))

            result.update({
                "annex":      data["annex"],
                "opinion":    data["opinion"],
                "deduction":  deduction,
                "baby_safe":  data.get("baby_safe"),
                "warning":    data.get("warning_required", False),
                "max_conc":   data.get("max_concentration"),
                "confidence": "HIGH"
            })
            return result

    # Not found in hardcoded list — treat as unreviewed
    result.update({
        "annex":      "Not reviewed by SCCS",
        "opinion":    "No SCCS opinion available — insufficient data",
        "deduction":  0,
        "confidence": "LOW"
    })
    return result


def check_formula_sccs(ingredients: list,
                        category: str = "cosmetics",
                        is_baby: bool = False) -> dict:
    """
    Check an entire formula against EU SCCS opinions.
    Returns aggregated results with total score impact.

    Args:
        ingredients: list of ingredient name strings
        category: product category
        is_baby: True if product is for children under 3

    Returns:
        dict with all flags, deductions, and summary
    """
    results = []
    total_deduction = 0
    flagged = []
    prohibited = []
    restricted = []
    warnings = []

    for ingredient in ingredients:
        r = check_ingredient_sccs(ingredient, category, is_baby)
        results.append(r)

        if r["deduction"] < 0:
            flagged.append(r)
            total_deduction += r["deduction"]

            if "Prohibited" in (r["annex"] or ""):
                prohibited.append(ingredient)
            elif "Restricted" in (r["annex"] or ""):
                restricted.append(ingredient)

            if r["warning"]:
                warnings.append(f"{ingredient}: {r['opinion']}")

    # Cap total deduction
    total_deduction = max(total_deduction, -50)

    # Build summary
    parts = []
    if prohibited:
        parts.append(f"{len(prohibited)} PROHIBITED ingredient(s): {', '.join(prohibited)}")
    if restricted:
        parts.append(f"{len(restricted)} restricted ingredient(s): {', '.join(restricted)}")
    if not flagged:
        parts.append("No EU Cosmetics Regulation violations detected")

    return {
        "source":              "EU SCCS / EU Cosmetics Regulation 1223/2009",
        "category":            category,
        "is_baby":             is_baby,
        "ingredients_checked": len(ingredients),
        "flagged_count":       len(flagged),
        "prohibited":          prohibited,
        "restricted":          restricted,
        "warnings":            warnings,
        "total_deduction":     total_deduction,
        "summary":             " | ".join(parts),
        "all_results":         results
    }


# ── Self-test ──────────────────────────────────────────────────
if __name__ == "__main__":
    print("NOURA — EU SCCS Health Engine Self-Test\n")

    test_formula = [
        "aqua",
        "glycerin",
        "methylparaben",
        "butylparaben",
        "phenoxyethanol",
        "benzophenone-3",
        "retinol",
        "sodium hyaluronate",
        "lilial",
        "niacinamide",
        "tocopherol",
        "limonene"
    ]

    print("Adult cosmetics formula:")
    print("-" * 55)
    adult = check_formula_sccs(test_formula, "cosmetics", is_baby=False)
    print(f"  Ingredients checked:  {adult['ingredients_checked']}")
    print(f"  Flagged:              {adult['flagged_count']}")
    print(f"  Prohibited:           {adult['prohibited']}")
    print(f"  Restricted:           {adult['restricted']}")
    print(f"  Total deduction:      {adult['total_deduction']}")
    print(f"  Summary:              {adult['summary']}")

    print("\nSame formula — baby product:")
    print("-" * 55)
    baby = check_formula_sccs(test_formula, "baby", is_baby=True)
    print(f"  Total deduction:      {baby['total_deduction']}")
    print(f"  Prohibited:           {baby['prohibited']}")
    print(f"  Restricted:           {baby['restricted']}")
    print(f"\n  Note: Stricter penalties applied for baby category")

    print("\n✅ EU SCCS module ready.")


Writing /content/noura_health_sccs.py


In [None]:
%%writefile /content/noura_health_cir.py
"""
NOURA Health Engine — CIR Module
Cell 28

The Cosmetic Ingredient Review (CIR) is an independent panel of
scientific experts that reviews the safety of cosmetic ingredients.
It is the primary US safety standard for cosmetic ingredients,
complementing the EU SCCS system.

CIR provides:
- Safety assessments with concentration limits
- "Safe as Used" vs "Unsafe" vs "Insufficient Data" conclusions
- Specific use restrictions (rinse-off vs leave-on, etc.)
- Re-evaluation dates when new evidence emerges

Covers: cosmetics, baby products
Weight in Health score: 10%
"""

from datetime import datetime

# ── CIR conclusion types and their deductions ──────────────────
CIR_CONCLUSIONS = {
    "safe_as_used":         0,     # Fully safe — no deduction
    "safe_with_limits":    -8,     # Safe only at specific concentrations
    "insufficient_data":  -10,     # Not enough data to conclude safety
    "unsafe":             -30,     # CIR concluded unsafe
    "not_supported":      -25,     # Safety not supported by available data
}

# ── CIR ingredient database ────────────────────────────────────
# Source: CIR Compendium — https://cir-safety.org/ingredients
# Includes all ingredients with non-trivial findings.
# "Safe as used" ingredients with no restrictions are omitted
# (they contribute 0 deduction and add no signal).

CIR_DATABASE = {

    # ── PRESERVATIVES ─────────────────────────────────────────
    "methylparaben": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "0.4% in leave-on, 0.8% total parabens",
        "notes":          "Safe as used at current concentrations in cosmetics",
        "concern":        "Endocrine activity at high concentrations",
        "deduction":      -5
    },
    "propylparaben": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "0.4% leave-on",
        "notes":          "Safe at current use concentrations",
        "concern":        "Endocrine disruption concerns at elevated levels",
        "deduction":      -8
    },
    "butylparaben": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "0.4% leave-on",
        "notes":          "Safe at current use concentrations",
        "concern":        "Endocrine disruption, not recommended for children",
        "deduction":      -10
    },
    "phenoxyethanol": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "1.0%",
        "notes":          "Safe as preservative at 1%",
        "concern":        "Central nervous system effects at high doses",
        "deduction":      -5
    },
    "methylisothiazolinone": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "0.0015% rinse-off only",
        "notes":          "Not safe in leave-on products",
        "concern":        "Strong skin sensitiser — high allergy rates reported",
        "deduction":      -20
    },
    "imidazolidinyl urea": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "0.5%",
        "notes":          "Formaldehyde releaser — safe at low concentrations",
        "concern":        "Releases formaldehyde — carcinogen risk at higher levels",
        "deduction":      -12
    },
    "dmdm hydantoin": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "0.6%",
        "notes":          "Formaldehyde releaser",
        "concern":        "Formaldehyde release — carcinogen, sensitiser",
        "deduction":      -15
    },
    "quaternium-15": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "0.2%",
        "notes":          "Highest-releasing formaldehyde preservative",
        "concern":        "Formaldehyde releaser — strong sensitiser",
        "deduction":      -18
    },

    # ── UV FILTERS ────────────────────────────────────────────
    "benzophenone-3": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "6%",
        "notes":          "Safe at 6% with warning label",
        "concern":        "Endocrine disruption, systemic absorption",
        "deduction":      -10
    },
    "homosalate": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "15%",
        "notes":          "CIR reviewing endocrine data — limit lowered pending review",
        "concern":        "Endocrine disruption — under re-evaluation",
        "deduction":      -10
    },
    "octinoxate": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "7.5%",
        "notes":          "FDA proposed insufficient safety data for OTC",
        "concern":        "Endocrine disruption, coral reef toxicity",
        "deduction":      -8
    },
    "octocrylene": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "10%",
        "notes":          "Degrades to benzophenone — contamination concern",
        "concern":        "Benzophenone contamination risk on shelf",
        "deduction":      -8
    },

    # ── RETINOIDS ─────────────────────────────────────────────
    "retinol": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "0.3% face / 0.05% body",
        "notes":          "Not for use around eyes or on sun-exposed skin",
        "concern":        "Photosensitising, teratogenic risk in pregnancy",
        "deduction":      -8
    },
    "retinyl palmitate": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "1.0%",
        "notes":          "Photosensitisation concern",
        "concern":        "Photosensitising at higher concentrations",
        "deduction":      -5
    },

    # ── SURFACTANTS ───────────────────────────────────────────
    "sodium lauryl sulfate": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "rinse-off only at normal use levels",
        "notes":          "Not for prolonged skin contact",
        "concern":        "Skin barrier disruption, irritant at high concentrations",
        "deduction":      -8
    },
    "sodium laureth sulfate": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "rinse-off products",
        "notes":          "1,4-dioxane contamination risk from ethoxylation",
        "concern":        "1,4-dioxane contaminant — probable carcinogen",
        "deduction":      -5
    },
    "cocamide dea": {
        "conclusion":     "not_supported",
        "max_conc":       "N/A",
        "notes":          "Listed as possible carcinogen — IARC Group 2B",
        "concern":        "Possible carcinogen, nitrosamine formation",
        "deduction":      -20
    },
    "diethanolamine": {
        "conclusion":     "unsafe",
        "max_conc":       "N/A — avoid",
        "notes":          "CIR: not safe in products that can form nitrosamines",
        "concern":        "Nitrosamine precursor — carcinogen",
        "deduction":      -25
    },

    # ── SKIN LIGHTENERS ───────────────────────────────────────
    "hydroquinone": {
        "conclusion":     "unsafe",
        "max_conc":       "Not safe in cosmetics",
        "notes":          "CIR concluded not safe for cosmetic use",
        "concern":        "Cytotoxic, genotoxic, carcinogenic potential",
        "deduction":      -30
    },
    "kojic acid": {
        "conclusion":     "insufficient_data",
        "max_conc":       "1% recommended maximum",
        "notes":          "Insufficient data — genotoxicity concerns",
        "concern":        "Genotoxicity potential — needs more data",
        "deduction":      -12
    },

    # ── FRAGRANCE COMPONENTS ──────────────────────────────────
    "fragrance": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "IFRA guidelines apply",
        "notes":          "Generic 'fragrance' masks individual ingredients",
        "concern":        "Undisclosed allergens, potential sensitisers",
        "deduction":      -8
    },
    "parfum": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "IFRA guidelines apply",
        "notes":          "EU term for fragrance — same concerns apply",
        "concern":        "Undisclosed allergens, potential sensitisers",
        "deduction":      -8
    },
    "eugenol": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "0.5% leave-on",
        "notes":          "Must be labelled above threshold",
        "concern":        "Skin sensitiser — common allergen",
        "deduction":      -5
    },

    # ── ALCOHOLS ─────────────────────────────────────────────
    "denatured alcohol": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "Normal cosmetic use",
        "notes":          "Drying at high concentrations",
        "concern":        "Skin barrier disruption with prolonged use",
        "deduction":      -3
    },
    "alcohol denat": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "Normal cosmetic use",
        "notes":          "Drying — avoid in high concentrations",
        "concern":        "Skin barrier disruption",
        "deduction":      -3
    },

    # ── TALC ─────────────────────────────────────────────────
    "talc": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "Must be asbestos-free",
        "notes":          "Safe if asbestos-free — inhalation risk for baby powder",
        "concern":        "Asbestos contamination risk, inhalation hazard (baby)",
        "deduction":      -5,
        "baby_deduction": -20
    },

    # ── CLEAN / SAFE INGREDIENTS (explicitly confirmed) ───────
    "glycerin": {
        "conclusion":     "safe_as_used",
        "max_conc":       "No restriction",
        "notes":          "Safe and well-tolerated humectant",
        "concern":        None,
        "deduction":      0
    },
    "niacinamide": {
        "conclusion":     "safe_as_used",
        "max_conc":       "No restriction",
        "notes":          "Safe — well studied, good tolerability",
        "concern":        None,
        "deduction":      0
    },
    "hyaluronic acid": {
        "conclusion":     "safe_as_used",
        "max_conc":       "No restriction",
        "notes":          "Safe humectant — no concerns",
        "concern":        None,
        "deduction":      0
    },
    "sodium hyaluronate": {
        "conclusion":     "safe_as_used",
        "max_conc":       "No restriction",
        "notes":          "Safe humectant",
        "concern":        None,
        "deduction":      0
    },
    "tocopherol": {
        "conclusion":     "safe_as_used",
        "max_conc":       "No restriction",
        "notes":          "Vitamin E — safe antioxidant",
        "concern":        None,
        "deduction":      0
    },
    "panthenol": {
        "conclusion":     "safe_as_used",
        "max_conc":       "No restriction",
        "notes":          "Provitamin B5 — safe and effective",
        "concern":        None,
        "deduction":      0
    },
    "zinc oxide": {
        "conclusion":     "safe_as_used",
        "max_conc":       "25% UV protection use",
        "notes":          "Safe — non-nano preferred",
        "concern":        None,
        "deduction":      0
    },
}


def check_ingredient_cir(ingredient_name: str,
                          is_baby: bool = False) -> dict:
    """
    Check a single ingredient against the CIR database.

    Args:
        ingredient_name: ingredient to check
        is_baby: True if product is for children

    Returns:
        dict with CIR safety data and score impact
    """
    name_lower = ingredient_name.lower().strip()

    result = {
        "ingredient":  ingredient_name,
        "source":      "CIR",
        "conclusion":  None,
        "max_conc":    None,
        "concern":     None,
        "deduction":   0,
        "baby_safe":   None,
        "confidence":  "LOW",
        "timestamp":   datetime.now().isoformat()
    }

    # Check database
    for key, data in CIR_DATABASE.items():
        if key in name_lower or name_lower == key:
            deduction = data["deduction"]

            # Apply baby penalty where applicable
            if is_baby and "baby_deduction" in data:
                deduction = data["baby_deduction"]

            result.update({
                "conclusion": data["conclusion"],
                "max_conc":   data.get("max_conc"),
                "concern":    data.get("concern"),
                "notes":      data.get("notes"),
                "deduction":  deduction,
                "confidence": "HIGH"
            })
            return result

    # Not in database
    result.update({
        "conclusion": "not_reviewed",
        "concern":    "No CIR assessment available",
        "confidence": "LOW"
    })
    return result


def check_formula_cir(ingredients: list,
                       is_baby: bool = False) -> dict:
    """
    Check an entire formula against CIR assessments.
    Returns aggregated results and total score impact.
    """
    results = []
    total_deduction = 0
    flagged = []
    unsafe = []
    restricted = []

    for ingredient in ingredients:
        r = check_ingredient_cir(ingredient, is_baby)
        results.append(r)

        if r["deduction"] < 0:
            flagged.append(r)
            total_deduction += r["deduction"]

            if r["conclusion"] in ("unsafe", "not_supported"):
                unsafe.append(ingredient)
            elif r["conclusion"] in ("safe_with_limits", "insufficient_data"):
                restricted.append(ingredient)

    # Cap total deduction
    total_deduction = max(total_deduction, -45)

    parts = []
    if unsafe:
        parts.append(f"{len(unsafe)} unsafe ingredient(s): {', '.join(unsafe)}")
    if restricted:
        parts.append(f"{len(restricted)} restricted ingredient(s): {', '.join(restricted)}")
    if not flagged:
        parts.append("All ingredients pass CIR safety review")

    return {
        "source":              "CIR — Cosmetic Ingredient Review",
        "is_baby":             is_baby,
        "ingredients_checked": len(ingredients),
        "flagged_count":       len(flagged),
        "unsafe":              unsafe,
        "restricted":          restricted,
        "total_deduction":     total_deduction,
        "summary":             " | ".join(parts),
        "all_results":         results
    }


# ── Self-test ──────────────────────────────────────────────────
if __name__ == "__main__":
    print("NOURA — CIR Health Engine Self-Test\n")

    test_formula = [
        "aqua",
        "glycerin",
        "sodium lauryl sulfate",
        "methylparaben",
        "phenoxyethanol",
        "retinol",
        "fragrance",
        "niacinamide",
        "cocamide dea",
        "tocopherol",
        "talc"
    ]

    print("Adult cosmetics formula:")
    print("-" * 55)
    adult = check_formula_cir(test_formula, is_baby=False)
    print(f"  Ingredients checked: {adult['ingredients_checked']}")
    print(f"  Flagged:             {adult['flagged_count']}")
    print(f"  Unsafe:              {adult['unsafe']}")
    print(f"  Restricted:          {adult['restricted']}")
    print(f"  Total deduction:     {adult['total_deduction']}")
    print(f"  Summary:             {adult['summary']}")

    print("\nSame formula — baby product:")
    print("-" * 55)
    baby = check_formula_cir(test_formula, is_baby=True)
    print(f"  Total deduction:     {baby['total_deduction']}")
    print(f"  (Talc gets -20 penalty instead of -5 for baby)")

    print("\n✅ CIR module ready.")

Writing /content/noura_health_cir.py


In [None]:
%%writefile /content/noura_health_aggregator.py
"""
NOURA Health Engine — Master Aggregator
Cell 29

Combines all health data sources into a single, deterministic health score:

    Source              Weight    Status
    ─────────────────────────────────────
    PubMed              40%       ✅ Built (Weeks 1-10)
    ECHA (SVHC/CMR)     25%       ✅ Cell 25
    EU SCCS             15%       ✅ Cell 27
    CIR                 10%       ✅ Cell 28
    FDA OpenFDA         10%       ✅ Cell 26

Same input → Same output. Always.
Health score feeds into final NOURA score at 70% weight.
"""

import sys
import time
from datetime import datetime

# ── Import all health modules ──────────────────────────────────
sys.path.insert(0, '/content')

try:
    from noura_health_echa import check_formula_echa
    ECHA_AVAILABLE = True
except ImportError:
    ECHA_AVAILABLE = False
    print("⚠  ECHA module not found — using fallback")

try:
    from noura_health_fda import check_product_fda
    FDA_AVAILABLE = True
except ImportError:
    FDA_AVAILABLE = False
    print("⚠  FDA module not found — using fallback")

try:
    from noura_health_sccs import check_formula_sccs
    SCCS_AVAILABLE = True
except ImportError:
    SCCS_AVAILABLE = False
    print("⚠  SCCS module not found — using fallback")

try:
    from noura_health_cir import check_formula_cir
    CIR_AVAILABLE = True
except ImportError:
    CIR_AVAILABLE = False
    print("⚠  CIR module not found — using fallback")

try:
    from noura_config import get_weights, get_verdict, BABY_SCORE_PENALTY
    CONFIG_AVAILABLE = True
except ImportError:
    CONFIG_AVAILABLE = False
    print("⚠  Config not found — using defaults")

# ── Source weights within the Health dimension ─────────────────
HEALTH_SOURCE_WEIGHTS = {
    "pubmed": 0.40,   # Peer-reviewed science — highest weight
    "echa":   0.25,   # EU regulatory — SVHC, CMR, endocrine
    "sccs":   0.15,   # EU cosmetics regulation
    "cir":    0.10,   # US cosmetic ingredient review
    "fda":    0.10,   # Recalls and adverse events
}

# Base health score — deductions applied on top
BASE_HEALTH_SCORE = 100


def _pubmed_score(pubmed_result: dict) -> dict:
    """
    Extract health score contribution from PubMed results.
    PubMed results come from noura_api_v2() already built in the notebook.
    """
    if not pubmed_result:
        return {"score": 70, "confidence": "LOW", "flags": [],
                "source": "PubMed", "note": "No PubMed data provided"}

    # noura_api_v2 returns health_score directly
    raw_score = pubmed_result.get("health_score", 70)
    flags = pubmed_result.get("ingredient_flags", [])
    confidence = pubmed_result.get("confidence", "LOW")

    return {
        "source":     "PubMed",
        "score":      max(0, min(100, raw_score)),
        "flags":      flags,
        "confidence": confidence
    }


def _echa_score(ingredients: list) -> dict:
    """Run ECHA check and convert deductions to 0-100 score."""
    if not ECHA_AVAILABLE or not ingredients:
        return {"score": 75, "confidence": "LOW", "flags": [],
                "source": "ECHA", "note": "ECHA unavailable"}

    result = check_formula_echa(ingredients)
    score = BASE_HEALTH_SCORE + result["total_score_impact"]
    score = max(0, min(100, score))

    flags = [
        f"{f['ingredient']} — {f['hazard_class']}: {f['concern']}"
        for f in result["svhc_found"]
    ]

    return {
        "source":     "ECHA",
        "score":      score,
        "flags":      flags,
        "svhc_count": result["svhc_count"],
        "confidence": result["svhc_found"][0]["confidence"] if result["svhc_found"] else "MEDIUM"
    }


def _sccs_score(ingredients: list, is_baby: bool = False) -> dict:
    """Run EU SCCS check and convert to 0-100 score."""
    if not SCCS_AVAILABLE or not ingredients:
        return {"score": 75, "confidence": "LOW", "flags": [],
                "source": "EU SCCS", "note": "SCCS unavailable"}

    result = check_formula_sccs(ingredients, is_baby=is_baby)
    score = BASE_HEALTH_SCORE + result["total_deduction"]
    score = max(0, min(100, score))

    flags = []
    for name in result["prohibited"]:
        flags.append(f"{name} — PROHIBITED under EU Cosmetics Regulation")
    for name in result["restricted"]:
        flags.append(f"{name} — Restricted (concentration limits apply)")

    return {
        "source":     "EU SCCS",
        "score":      score,
        "flags":      flags,
        "prohibited": result["prohibited"],
        "confidence": "HIGH" if result["flagged_count"] > 0 else "MEDIUM"
    }


def _cir_score(ingredients: list, is_baby: bool = False) -> dict:
    """Run CIR check and convert to 0-100 score."""
    if not CIR_AVAILABLE or not ingredients:
        return {"score": 75, "confidence": "LOW", "flags": [],
                "source": "CIR", "note": "CIR unavailable"}

    result = check_formula_cir(ingredients, is_baby=is_baby)
    score = BASE_HEALTH_SCORE + result["total_deduction"]
    score = max(0, min(100, score))

    flags = []
    for name in result["unsafe"]:
        flags.append(f"{name} — CIR: unsafe")
    for name in result["restricted"]:
        flags.append(f"{name} — CIR: restricted use")

    return {
        "source":     "CIR",
        "score":      score,
        "flags":      flags,
        "confidence": "HIGH" if result["flagged_count"] > 0 else "MEDIUM"
    }


def _fda_score(product_name: str, category: str) -> dict:
    """Run FDA check and convert to 0-100 score."""
    if not FDA_AVAILABLE:
        return {"score": 90, "confidence": "LOW", "flags": [],
                "source": "FDA", "note": "FDA unavailable"}

    result = check_product_fda(product_name, category)
    score = BASE_HEALTH_SCORE + result["total_score_impact"]
    score = max(0, min(100, score))

    return {
        "source":     "FDA OpenFDA",
        "score":      score,
        "flags":      result["flags"],
        "recalls":    result["recall_count"],
        "adverse":    result["adverse_count"],
        "confidence": result["confidence"]
    }


def calculate_health_score(
    product_name:   str,
    ingredients:    list,
    category:       str = "cosmetics",
    pubmed_result:  dict = None
) -> dict:
    """
    Master health score calculator.
    Aggregates all 5 health data sources into one deterministic score.

    Args:
        product_name:  product name (for FDA lookup)
        ingredients:   list of ingredient name strings
        category:      'cosmetics', 'food', 'cleaning', 'baby'
        pubmed_result: existing result from noura_api_v2() if available

    Returns:
        {
            "health_score":    int (0-100),
            "verdict":         str,
            "flags":           list of concern strings,
            "source_scores":   dict of per-source scores,
            "confidence":      str,
            "summary":         str,
            "timestamp":       str
        }
    """
    is_baby = (category == "baby")

    print(f"\n  🔬 Running health analysis for: {product_name}")
    print(f"     Category: {category} | Ingredients: {len(ingredients)}")

    # ── Run all sources ────────────────────────────────────────
    print("     Checking PubMed...", end=" ")
    pubmed  = _pubmed_score(pubmed_result)
    print(f"score: {pubmed['score']}")

    print("     Checking ECHA...", end=" ")
    echa    = _echa_score(ingredients)
    print(f"score: {echa['score']}")

    print("     Checking EU SCCS...", end=" ")
    sccs    = _sccs_score(ingredients, is_baby)
    print(f"score: {sccs['score']}")

    print("     Checking CIR...", end=" ")
    cir     = _cir_score(ingredients, is_baby)
    print(f"score: {cir['score']}")

    print("     Checking FDA...", end=" ")
    fda     = _fda_score(product_name, category)
    print(f"score: {fda['score']}")

    # ── Weighted aggregation ───────────────────────────────────
    weighted_score = (
        pubmed["score"] * HEALTH_SOURCE_WEIGHTS["pubmed"] +
        echa["score"]   * HEALTH_SOURCE_WEIGHTS["echa"]   +
        sccs["score"]   * HEALTH_SOURCE_WEIGHTS["sccs"]   +
        cir["score"]    * HEALTH_SOURCE_WEIGHTS["cir"]    +
        fda["score"]    * HEALTH_SOURCE_WEIGHTS["fda"]
    )

    # Apply baby penalty — same score, stricter verdict threshold
    final_score = round(weighted_score)
    if is_baby:
        final_score = max(0, final_score + BABY_SCORE_PENALTY
                          if CONFIG_AVAILABLE else final_score - 10)

    # ── Collect all flags ──────────────────────────────────────
    all_flags = []
    for source in [echa, sccs, cir, fda, pubmed]:
        all_flags.extend(source.get("flags", []))

    # Deduplicate flags
    seen = set()
    unique_flags = []
    for flag in all_flags:
        key = flag[:50].lower()
        if key not in seen:
            seen.add(key)
            unique_flags.append(flag)

    # ── Determine verdict ──────────────────────────────────────
    if CONFIG_AVAILABLE:
        verdict = get_verdict(final_score, category)
    else:
        if final_score >= 80:   verdict = "CLEAN"
        elif final_score >= 60: verdict = "ACCEPTABLE"
        elif final_score >= 40: verdict = "CAUTION"
        else:                   verdict = "HIGHER_RISK"

    # ── Confidence — based on how many sources returned data ──
    sources_with_data = sum([
        pubmed_result is not None,
        ECHA_AVAILABLE,
        SCCS_AVAILABLE,
        CIR_AVAILABLE,
        FDA_AVAILABLE
    ])
    if sources_with_data >= 4:   confidence = "HIGH"
    elif sources_with_data >= 2: confidence = "MEDIUM"
    else:                         confidence = "LOW"

    # ── Summary ────────────────────────────────────────────────
    if not unique_flags:
        summary = f"No health concerns detected across {sources_with_data} sources."
    else:
        summary = (f"{len(unique_flags)} concern(s) detected. "
                   f"Highest risk: {unique_flags[0][:80]}")

    result = {
        "product":       product_name,
        "category":      category,
        "health_score":  final_score,
        "verdict":       verdict,
        "flags":         unique_flags,
        "flag_count":    len(unique_flags),
        "source_scores": {
            "pubmed": pubmed["score"],
            "echa":   echa["score"],
            "sccs":   sccs["score"],
            "cir":    cir["score"],
            "fda":    fda["score"],
        },
        "source_weights": HEALTH_SOURCE_WEIGHTS,
        "weighted_score": round(weighted_score),
        "final_score":    final_score,
        "confidence":     confidence,
        "summary":        summary,
        "is_baby":        is_baby,
        "timestamp":      datetime.now().isoformat()
    }

    return result


def print_health_report(result: dict):
    """Pretty-print a health score result to console."""
    verdict_icons = {
        "CLEAN":        "🟢",
        "ACCEPTABLE":   "🟡",
        "CAUTION":      "🟠",
        "HIGHER_RISK":  "🔴"
    }
    icon = verdict_icons.get(result["verdict"], "⚪")

    print(f"\n{'═'*55}")
    print(f"  NOURA HEALTH SCORE — {result['product'].upper()}")
    print(f"{'═'*55}")
    print(f"  {icon} Score:     {result['health_score']}/100")
    print(f"  Verdict:   {result['verdict']}")
    print(f"  Category:  {result['category']}")
    print(f"  Confidence:{result['confidence']}")
    print(f"\n  Source breakdown:")
    for source, score in result["source_scores"].items():
        weight = HEALTH_SOURCE_WEIGHTS[source]
        print(f"    {source.upper():<10} {score:>3}/100  (weight: {weight*100:.0f}%)")
    print(f"\n  Weighted health score: {result['weighted_score']}/100")
    if result["is_baby"]:
        print(f"  Baby penalty applied: -10")
    print(f"  Final health score:    {result['final_score']}/100")
    if result["flags"]:
        print(f"\n  ⚠  Concerns ({result['flag_count']}):")
        for flag in result["flags"][:5]:
            print(f"     • {flag[:70]}")
    else:
        print(f"\n  ✅ No health concerns detected")
    print(f"{'═'*55}\n")


# ── Self-test ──────────────────────────────────────────────────
if __name__ == "__main__":
    print("NOURA — Health Engine Aggregator Self-Test")
    print("Testing with two contrasting formulas\n")

    # Test 1: Clean product
    clean_ingredients = [
        "aqua", "glycerin", "sodium hyaluronate",
        "niacinamide", "tocopherol", "panthenol",
        "xanthan gum", "citric acid"
    ]

    clean_result = calculate_health_score(
        product_name  = "Weleda Skin Food",
        ingredients   = clean_ingredients,
        category      = "cosmetics",
        pubmed_result = {"health_score": 88, "confidence": "HIGH",
                         "ingredient_flags": []}
    )
    print_health_report(clean_result)

    # Test 2: Problematic formula
    problem_ingredients = [
        "aqua", "glycerin", "butylparaben",
        "methylisothiazolinone", "fragrance",
        "benzophenone-3", "cocamide dea",
        "dmdm hydantoin", "retinol"
    ]

    problem_result = calculate_health_score(
        product_name  = "Reformulation Candidate",
        ingredients   = problem_ingredients,
        category      = "cosmetics",
        pubmed_result = {"health_score": 42, "confidence": "HIGH",
                         "ingredient_flags": ["butylparaben", "MIT"]}
    )
    print_health_report(problem_result)

    print("✅ Health Engine Aggregator ready.")
    print("   Week 11 complete — all health sources integrated.")

Writing /content/noura_health_aggregator.py


In [None]:
# NOURA — Week 11 Integration Test
# Tests all 5 health modules working together

import sys
sys.path.insert(0, '/content')

# ── Step 1: Verify all modules loaded ─────────────────────────
print("Step 1: Checking all modules...")
modules = {
    "noura_config":             "/content/noura_config.py",
    "noura_health_echa":        "/content/noura_health_echa.py",
    "noura_health_fda":         "/content/noura_health_fda.py",
    "noura_health_sccs":        "/content/noura_health_sccs.py",
    "noura_health_cir":         "/content/noura_health_cir.py",
    "noura_health_aggregator":  "/content/noura_health_aggregator.py",
}

import os
all_present = True
for name, path in modules.items():
    exists = os.path.exists(path)
    status = "✅" if exists else "❌ MISSING"
    print(f"  {status} {name}")
    if not exists:
        all_present = False

if not all_present:
    print("\n⛔ Some modules missing — re-run the %%writefile cells above")
else:
    print("\nAll modules present. Running full test...\n")

    # ── Step 2: Import aggregator ──────────────────────────────
    from noura_health_aggregator import calculate_health_score, print_health_report

    # ── Step 3: Test clean product ─────────────────────────────
    weleda = calculate_health_score(
        product_name  = "Weleda Skin Food",
        ingredients   = ["aqua", "glycerin", "sodium hyaluronate",
                         "niacinamide", "tocopherol", "panthenol"],
        category      = "cosmetics",
        pubmed_result = {"health_score": 88, "confidence": "HIGH",
                         "ingredient_flags": []}
    )
    print_health_report(weleda)

    # ── Step 4: Test problematic product ──────────────────────
    problem = calculate_health_score(
        product_name  = "Reformulation Candidate",
        ingredients   = ["aqua", "glycerin", "butylparaben",
                         "methylisothiazolinone", "fragrance",
                         "benzophenone-3", "dmdm hydantoin"],
        category      = "cosmetics",
        pubmed_result = {"health_score": 42, "confidence": "HIGH",
                         "ingredient_flags": ["butylparaben", "MIT"]}
    )
    print_health_report(problem)

    # ── Step 5: Test baby category ─────────────────────────────
    baby = calculate_health_score(
        product_name  = "Johnson's Baby Lotion",
        ingredients   = ["aqua", "glycerin", "phenoxyethanol",
                         "talc", "fragrance"],
        category      = "baby",
        pubmed_result = {"health_score": 60, "confidence": "MEDIUM",
                         "ingredient_flags": ["phenoxyethanol"]}
    )
    print_health_report(baby)

    # ── Step 6: Sanity checks ──────────────────────────────────
    print("Sanity checks:")
    print("-" * 40)

    assert weleda["health_score"] > problem["health_score"], \
        "❌ FAIL: Clean product should score higher than problematic"
    print("  ✅ Clean product scores higher than problematic")

    assert weleda["verdict"] == "CLEAN", \
        f"❌ FAIL: Weleda should be CLEAN, got {weleda['verdict']}"
    print("  ✅ Weleda verdict is CLEAN")

    assert problem["verdict"] in ("CAUTION", "HIGHER_RISK"), \
        f"❌ FAIL: Problem product should be CAUTION or HIGHER_RISK"
    print("  ✅ Problematic product flagged correctly")

    assert baby["health_score"] <= weleda["health_score"], \
        "❌ FAIL: Baby product with phenoxyethanol should score lower than clean adult product"
    print("  ✅ Baby penalty applied correctly")

    assert len(problem["flags"]) > len(weleda["flags"]), \
        "❌ FAIL: Problem product should have more flags"
    print("  ✅ Flag count reflects product quality")

    print("\n✅ ALL TESTS PASSED — Week 11 health engine fully operational")

Step 1: Checking all modules...
  ✅ noura_config
  ✅ noura_health_echa
  ✅ noura_health_fda
  ✅ noura_health_sccs
  ✅ noura_health_cir
  ✅ noura_health_aggregator

All modules present. Running full test...


  🔬 Running health analysis for: Weleda Skin Food
     Category: cosmetics | Ingredients: 6
     Checking PubMed... score: 88
     Checking ECHA... score: 100
     Checking EU SCCS... score: 100
     Checking CIR... score: 100
     Checking FDA... score: 100

═══════════════════════════════════════════════════════
  NOURA HEALTH SCORE — WELEDA SKIN FOOD
═══════════════════════════════════════════════════════
  🟢 Score:     95/100
  Verdict:   CLEAN
  Category:  cosmetics
  Confidence:HIGH

  Source breakdown:
    PUBMED      88/100  (weight: 40%)
    ECHA       100/100  (weight: 25%)
    SCCS       100/100  (weight: 15%)
    CIR        100/100  (weight: 10%)
    FDA        100/100  (weight: 10%)

  Weighted health score: 95/100
  Final health score:    95/100

  ✅ No health concern

In [None]:
%%writefile /content/noura_environment_config.py
"""
NOURA Environment Config v2 — Cell 30
Expanded ECHA database, fixed Ecolabel brands, updated PBT flags.

Changes from v1:
  - ECHA_AQUATIC_HAZARDS: 7 → 60+ ingredients
  - PBT_FLAGS: 15 → 30+ entries
  - EU_ECOLABEL_BRANDS: 2 → 12 brands across cosmetics/cleaning/food
  - Ecolabel now stores min_weighted_score threshold (certification washing fix)
"""

# ═══════════════════════════════════════════════════════════════
# SIGNAL WEIGHTS BY CATEGORY
# ═══════════════════════════════════════════════════════════════

ENVIRONMENT_SIGNALS = {
    "cosmetics": {
        "biodegradability":  {"weight": 0.40},
        "packaging":         {"weight": 0.30},
        "manufacturing":     {"weight": 0.20},
        "palm_oil":          {"weight": 0.10},
    },
    "food": {
        "carbon_footprint":  {"weight": 0.40},
        "packaging":         {"weight": 0.30},
        "pesticides":        {"weight": 0.20},
        "water_usage":       {"weight": 0.10},
    },
    "cleaning": {
        "aquatic_toxicity":  {"weight": 0.50},
        "vocs":              {"weight": 0.30},
        "packaging":         {"weight": 0.20},
    },
    "baby": {
        "biodegradability":  {"weight": 0.40},
        "packaging":         {"weight": 0.30},
        "supply_chain":      {"weight": 0.20},
        "cumulative_exposure": {"weight": 0.10},
    },
}


# ═══════════════════════════════════════════════════════════════
# ECHA AQUATIC HAZARD DATABASE — v2 (60+ ingredients)
#
# Sources:
#   - ECHA REACH aquatic hazard classifications
#   - EU Ecolabel detergent ingredient database
#   - OECD biodegradability test data
#   - SCCS/EFSA assessments
#
# Deduction scale:
#   0        = no concern (safe / biodegradable)
#  -5        = minor concern or low-risk VOC
#  -10/-15   = moderate persistence or mild aquatic toxicity
#  -20/-25   = significant aquatic toxicity or PBT flag
#  -30/-35   = highly toxic or banned substance
# ═══════════════════════════════════════════════════════════════

ECHA_AQUATIC_HAZARDS = {

    # ── SURFACTANTS ─────────────────────────────────────────────
    "sodium lauryl sulfate": {
        "aquatic_class": "Acute 2 / Chronic 2",
        "biodegradable": True,
        "concern": "Readily biodegradable but toxic to aquatic organisms at high concentrations",
        "deduction": -10,
    },
    "sls": {
        "aquatic_class": "Acute 2 / Chronic 2",
        "biodegradable": True,
        "concern": "Readily biodegradable but toxic to aquatic organisms at high concentrations",
        "deduction": -10,
    },
    "sodium laureth sulfate": {
        "aquatic_class": "Chronic 3",
        "biodegradable": True,
        "concern": "Readily biodegradable — lower aquatic toxicity than SLS",
        "deduction": -5,
    },
    "sles": {
        "aquatic_class": "Chronic 3",
        "biodegradable": True,
        "concern": "Readily biodegradable — lower aquatic toxicity than SLS",
        "deduction": -5,
    },
    "cocamidopropyl betaine": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "sodium lauroyl sarcosinate": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "decyl glucoside": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "coco glucoside": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "sodium cocoyl isethionate": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },

    # ── PRESERVATIVES ────────────────────────────────────────────
    "benzalkonium chloride": {
        "aquatic_class": "Acute 1 / Chronic 1",
        "biodegradable": False,
        "concern": "Highly toxic to aquatic organisms, persistent",
        "deduction": -30,
    },
    "methylisothiazolinone": {
        "aquatic_class": "Acute 1 / Chronic 1",
        "biodegradable": False,
        "concern": "Extremely toxic to aquatic life",
        "deduction": -35,
    },
    "methylchloroisothiazolinone": {
        "aquatic_class": "Acute 1 / Chronic 1",
        "biodegradable": False,
        "concern": "Extremely toxic to aquatic life — restricted in EU leave-on products",
        "deduction": -35,
    },
    "phenoxyethanol": {
        "aquatic_class": "Chronic 3",
        "biodegradable": True,
        "concern": "Mildly toxic to aquatic organisms; biodegradable",
        "deduction": -5,
    },
    "sodium benzoate": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "potassium sorbate": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "ethylhexylglycerin": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "caprylyl glycol": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "formaldehyde": {
        "aquatic_class": "Acute 2 / Chronic 2",
        "biodegradable": True,
        "concern": "Toxic to aquatic organisms; known carcinogen — banned in EU cosmetics",
        "deduction": -25,
    },

    # ── SILICONES ────────────────────────────────────────────────
    "dimethicone": {
        "aquatic_class": "Chronic 4",
        "biodegradable": False,
        "concern": "Persistent — not readily biodegradable in aquatic environments",
        "deduction": -10,
    },
    "cyclopentasiloxane": {
        "aquatic_class": "Chronic 1",
        "biodegradable": False,
        "concern": "EU restricted in rinse-off products — persistent, potential endocrine disruptor",
        "deduction": -20,
    },
    "cyclotetrasiloxane": {
        "aquatic_class": "Chronic 1",
        "biodegradable": False,
        "concern": "EU restricted — persistent bioaccumulative toxic (PBT)",
        "deduction": -25,
    },
    "cyclomethicone": {
        "aquatic_class": "Chronic 1",
        "biodegradable": False,
        "concern": "Contains D4/D5 — EU restricted in rinse-off products",
        "deduction": -20,
    },

    # ── EMOLLIENTS / OILS ────────────────────────────────────────
    "petrolatum": {
        "aquatic_class": "Chronic 4",
        "biodegradable": False,
        "concern": "Petroleum-derived — not readily biodegradable",
        "deduction": -10,
    },
    "mineral oil": {
        "aquatic_class": "Chronic 4",
        "biodegradable": False,
        "concern": "Petroleum-derived — not readily biodegradable",
        "deduction": -10,
    },
    "paraffinum liquidum": {
        "aquatic_class": "Chronic 4",
        "biodegradable": False,
        "concern": "Petroleum-derived — not readily biodegradable",
        "deduction": -10,
    },
    "palm oil": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": "Readily biodegradable — environmental concern is deforestation, not aquatic toxicity",
        "deduction": 0,
    },
    "palm kernel oil": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "shea butter": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "jojoba oil": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "sunflower seed oil": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "coconut oil": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "squalane": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "rosehip oil": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "argan oil": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },

    # ── UV FILTERS ───────────────────────────────────────────────
    "benzophenone-3": {
        "aquatic_class": "Chronic 2",
        "biodegradable": False,
        "concern": "Coral reef toxicity, bioaccumulates in fish",
        "deduction": -20,
    },
    "oxybenzone": {
        "aquatic_class": "Chronic 2",
        "biodegradable": False,
        "concern": "Coral reef toxicity — banned in Hawaii and several jurisdictions",
        "deduction": -20,
    },
    "octinoxate": {
        "aquatic_class": "Chronic 2",
        "biodegradable": False,
        "concern": "Coral reef toxicity, endocrine disruption in aquatic organisms",
        "deduction": -20,
    },
    "avobenzone": {
        "aquatic_class": "Chronic 3",
        "biodegradable": False,
        "concern": "Moderate aquatic toxicity — more stable than benzophenone-3",
        "deduction": -10,
    },
    "zinc oxide": {
        "aquatic_class": "Chronic 3",
        "biodegradable": False,
        "concern": "Nano zinc oxide: toxic to aquatic organisms. Non-nano: lower concern.",
        "deduction": -8,
    },
    "titanium dioxide": {
        "aquatic_class": "Chronic 4",
        "biodegradable": False,
        "concern": "Nano form may be toxic to aquatic organisms; non-nano low concern",
        "deduction": -5,
    },

    # ── CHELATING AGENTS ─────────────────────────────────────────
    "edta": {
        "aquatic_class": "Chronic 3",
        "biodegradable": False,
        "concern": "Persistent chelating agent",
        "deduction": -15,
    },
    "disodium edta": {
        "aquatic_class": "Chronic 3",
        "biodegradable": False,
        "concern": "Persistent chelating agent — mobilises heavy metals",
        "deduction": -15,
    },
    "tetrasodium edta": {
        "aquatic_class": "Chronic 3",
        "biodegradable": False,
        "concern": "Persistent chelating agent",
        "deduction": -15,
    },
    "citric acid": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },

    # ── EMULSIFIERS / THICKENERS ─────────────────────────────────
    "cetearyl alcohol": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "cetyl alcohol": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "stearic acid": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "glyceryl stearate": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "peg-100 stearate": {
        "aquatic_class": "Chronic 4",
        "biodegradable": False,
        "concern": "PEG compounds are not readily biodegradable",
        "deduction": -8,
    },
    "carbomer": {
        "aquatic_class": "None",
        "biodegradable": False,
        "concern": "Synthetic polymer — not readily biodegradable but low aquatic toxicity",
        "deduction": -5,
    },
    "xanthan gum": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "cellulose": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "propylene glycol": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "butylene glycol": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },

    # ── ACTIVES ──────────────────────────────────────────────────
    "glycerin": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "niacinamide": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "sodium hyaluronate": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "hyaluronic acid": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "tocopherol": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "tocopheryl acetate": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "retinol": {
        "aquatic_class": "Chronic 3",
        "biodegradable": True,
        "concern": "Some aquatic toxicity at high concentrations",
        "deduction": -5,
    },
    "salicylic acid": {
        "aquatic_class": "Chronic 3",
        "biodegradable": True,
        "concern": "Toxic to aquatic organisms at environmental concentrations",
        "deduction": -8,
    },
    "lactic acid": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "glycolic acid": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "ascorbic acid": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "allantoin": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "panthenol": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },

    # ── ANTIOXIDANTS ─────────────────────────────────────────────
    "butylated hydroxytoluene": {
        "aquatic_class": "Acute 2 / Chronic 2",
        "biodegradable": False,
        "concern": "Toxic to aquatic organisms, persistent",
        "deduction": -15,
    },
    "bht": {
        "aquatic_class": "Acute 2 / Chronic 2",
        "biodegradable": False,
        "concern": "Toxic to aquatic organisms, persistent",
        "deduction": -15,
    },
    "butylated hydroxyanisole": {
        "aquatic_class": "Acute 2 / Chronic 2",
        "biodegradable": False,
        "concern": "Potential endocrine disruption, aquatic toxicity",
        "deduction": -12,
    },
    "bha": {
        "aquatic_class": "Acute 2 / Chronic 2",
        "biodegradable": False,
        "concern": "Potential endocrine disruption, aquatic toxicity",
        "deduction": -12,
    },

    # ── BOTANICALS / NATURALS ────────────────────────────────────
    "aloe barbadensis leaf juice": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "aloe vera": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "chamomile extract": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "green tea extract": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "centella asiatica extract": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "rosemary extract": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "lavender oil": {
        "aquatic_class": "Chronic 3",
        "biodegradable": True,
        "concern": "Essential oils mildly toxic to aquatic organisms",
        "deduction": -5,
    },
    "tea tree oil": {
        "aquatic_class": "Acute 2 / Chronic 2",
        "biodegradable": True,
        "concern": "Toxic to aquatic organisms at cosmetic concentrations",
        "deduction": -10,
    },
    "lanolin": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "beeswax": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },

    # ── CLEANING ACTIVES ─────────────────────────────────────────
    "triclosan": {
        "aquatic_class": "Acute 1 / Chronic 1",
        "biodegradable": False,
        "concern": "Highly toxic to aquatic organisms — EU banned in cosmetics",
        "deduction": -35,
    },
    "sodium hypochlorite": {
        "aquatic_class": "Acute 2",
        "biodegradable": True,
        "concern": "Bleach — decomposes but toxic at point of discharge",
        "deduction": -10,
    },
    "sodium bicarbonate": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "sodium carbonate": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },
    "acetic acid": {
        "aquatic_class": "None",
        "biodegradable": True,
        "concern": None,
        "deduction": 0,
    },

    # ── POLYMERS / MICROPLASTICS ──────────────────────────────────
    "polyethylene": {
        "aquatic_class": "Chronic 4",
        "biodegradable": False,
        "concern": "Microplastic — permanent environmental persistence",
        "deduction": -30,
    },
    "polypropylene": {
        "aquatic_class": "Chronic 4",
        "biodegradable": False,
        "concern": "Microplastic — permanent environmental persistence",
        "deduction": -30,
    },
    "microbeads": {
        "aquatic_class": "Chronic 4",
        "biodegradable": False,
        "concern": "Microplastic beads — banned in EU, UK, US rinse-off products",
        "deduction": -35,
    },
    "acrylates copolymer": {
        "aquatic_class": "Chronic 4",
        "biodegradable": False,
        "concern": "Synthetic polymer — persistence in aquatic environments",
        "deduction": -10,
    },
}


# ═══════════════════════════════════════════════════════════════
# PBT FLAGS — Persistent, Bioaccumulative, Toxic
# Source: ECHA REACH Annex XIII
# ═══════════════════════════════════════════════════════════════

PBT_FLAGS = {
    # High-concern PBT substances
    "triclosan":               {"pbt": True,  "concern": "Persistent, bioaccumulates in fish, endocrine disruptor",                    "deduction": -35},
    "triclocarban":            {"pbt": True,  "concern": "Persistent, bioaccumulates",                                                 "deduction": -30},
    "benzophenone-3":          {"pbt": True,  "concern": "Bioaccumulates in marine organisms and breast milk",                          "deduction": -20},
    "oxybenzone":              {"pbt": True,  "concern": "Bioaccumulates, coral reef toxicity — banned in several jurisdictions",       "deduction": -20},
    "octocrylene":             {"pbt": True,  "concern": "Converts to benzophenone, bioaccumulates in coral and fish",                  "deduction": -20},
    "octinoxate":              {"pbt": True,  "concern": "Bioaccumulates, coral toxicity, endocrine disruption",                       "deduction": -20},
    "galaxolide":              {"pbt": True,  "concern": "Synthetic musk — detected in human blood and breast milk",                    "deduction": -20},
    "tonalide":                {"pbt": True,  "concern": "Synthetic musk — widespread environmental persistence",                       "deduction": -20},
    "celestolide":             {"pbt": True,  "concern": "Synthetic musk — persistent in aquatic environments",                        "deduction": -18},
    "cyclopentasiloxane":      {"pbt": True,  "concern": "EU PBT — persistent, bioaccumulative; restricted in rinse-off",              "deduction": -20},
    "cyclotetrasiloxane":      {"pbt": True,  "concern": "EU PBT — restricted in rinse-off products since 2020",                      "deduction": -25},
    "methylisothiazolinone":   {"pbt": True,  "concern": "Highly persistent in aquatic environments",                                  "deduction": -25},
    "nonylphenol ethoxylates": {"pbt": True,  "concern": "Degrades to nonylphenol — persistent endocrine disruptor",                  "deduction": -35},
    "nonylphenol":             {"pbt": True,  "concern": "Highly persistent endocrine disruptor — banned EU detergents",               "deduction": -35},
    "polyethylene":            {"pbt": True,  "concern": "Microplastic — permanent environmental persistence",                         "deduction": -30},
    "polypropylene":           {"pbt": True,  "concern": "Microplastic — permanent environmental persistence",                         "deduction": -30},
    "nylon-12":                {"pbt": True,  "concern": "Microplastic — permanent environmental persistence",                         "deduction": -30},
    "microbeads":              {"pbt": True,  "concern": "Microplastic beads — banned EU/UK/US",                                       "deduction": -35},
    "edta":                    {"pbt": True,  "concern": "Non-biodegradable chelator — mobilises heavy metals in water",               "deduction": -15},
    "butylated hydroxytoluene":{"pbt": True,  "concern": "Bioaccumulates in aquatic organisms",                                        "deduction": -15},
    "bht":                     {"pbt": True,  "concern": "Bioaccumulates in aquatic organisms",                                        "deduction": -15},
    "synthetic musk":          {"pbt": True,  "concern": "Synthetic musks broadly persistent and bioaccumulative",                     "deduction": -20},
    "musk ketone":             {"pbt": True,  "concern": "Nitroaromatic musk — persistent, detected in human tissue",                  "deduction": -25},

    # Safe / readily biodegradable — explicit positive signals
    "glycerin":                {"pbt": False, "concern": None, "deduction": 0},
    "sodium hyaluronate":      {"pbt": False, "concern": None, "deduction": 0},
    "hyaluronic acid":         {"pbt": False, "concern": None, "deduction": 0},
    "tocopherol":              {"pbt": False, "concern": None, "deduction": 0},
    "tocopheryl acetate":      {"pbt": False, "concern": None, "deduction": 0},
    "niacinamide":             {"pbt": False, "concern": None, "deduction": 0},
    "ascorbic acid":           {"pbt": False, "concern": None, "deduction": 0},
    "lactic acid":             {"pbt": False, "concern": None, "deduction": 0},
    "glycolic acid":           {"pbt": False, "concern": None, "deduction": 0},
    "citric acid":             {"pbt": False, "concern": None, "deduction": 0},
    "xanthan gum":             {"pbt": False, "concern": None, "deduction": 0},
    "cetearyl alcohol":        {"pbt": False, "concern": None, "deduction": 0},
    "shea butter":             {"pbt": False, "concern": None, "deduction": 0},
    "jojoba oil":              {"pbt": False, "concern": None, "deduction": 0},
    "aloe vera":               {"pbt": False, "concern": None, "deduction": 0},
    "allantoin":               {"pbt": False, "concern": None, "deduction": 0},
    "panthenol":               {"pbt": False, "concern": None, "deduction": 0},
    "sodium bicarbonate":      {"pbt": False, "concern": None, "deduction": 0},
}


# ═══════════════════════════════════════════════════════════════
# EU ECOLABEL CERTIFIED BRANDS — v2
#
# IMPORTANT — Certification washing fix:
#   min_weighted_score: Ecolabel bonus only applies when the
#   product's weighted environment score reaches this threshold.
#   Prevents a brand's certification overriding a toxic formula.
#
# Sources:
#   - EU Ecolabel product database (ecolabel.eu)
#   - Nordic Swan Ecolabel
#   - Cradle to Cradle certified brands
# ═══════════════════════════════════════════════════════════════

EU_ECOLABEL_BRANDS = {
    # ── Cosmetics / Personal care ────────────────────────────────
    "weleda": {
        "certified": True,
        "bonus": 8,
        "category": "cosmetics",
        "certification": "NATRUE + EU Organic",
        "min_weighted_score": 50,   # bonus only if formula itself is decent
    },
    "lavera": {
        "certified": True,
        "bonus": 7,
        "category": "cosmetics",
        "certification": "NATRUE",
        "min_weighted_score": 50,
    },
    "dr. hauschka": {
        "certified": True,
        "bonus": 7,
        "category": "cosmetics",
        "certification": "NATRUE + Demeter",
        "min_weighted_score": 50,
    },
    "pai skincare": {
        "certified": True,
        "bonus": 6,
        "category": "cosmetics",
        "certification": "Soil Association Organic",
        "min_weighted_score": 50,
    },
    "cattier": {
        "certified": True,
        "bonus": 6,
        "category": "cosmetics",
        "certification": "Ecocert",
        "min_weighted_score": 50,
    },

    # ── Cleaning products ────────────────────────────────────────
    "ecover": {
        "certified": True,
        "bonus": 10,
        "category": "cleaning",
        "certification": "EU Ecolabel",
        "min_weighted_score": 50,   # FIX: was applied unconditionally
    },
    "method": {
        "certified": True,
        "bonus": 8,
        "category": "cleaning",
        "certification": "EPA Safer Choice",
        "min_weighted_score": 50,
    },
    "seventh generation": {
        "certified": True,
        "bonus": 8,
        "category": "cleaning",
        "certification": "EPA Safer Choice + B Corp",
        "min_weighted_score": 50,
    },
    "bio-d": {
        "certified": True,
        "bonus": 9,
        "category": "cleaning",
        "certification": "EU Ecolabel + Vegan Society",
        "min_weighted_score": 50,
    },
    "dr. bronner": {
        "certified": True,
        "bonus": 8,
        "category": "cleaning",
        "certification": "B Corp + Fair Trade + USDA Organic",
        "min_weighted_score": 50,
    },
    "attitude": {
        "certified": True,
        "bonus": 7,
        "category": "cleaning",
        "certification": "EWG Verified + B Corp",
        "min_weighted_score": 50,
    },

    # ── Food ─────────────────────────────────────────────────────
    "innocent": {
        "certified": True,
        "bonus": 5,
        "category": "food",
        "certification": "B Corp",
        "min_weighted_score": 50,
    },
    "alpro": {
        "certified": True,
        "bonus": 5,
        "category": "food",
        "certification": "B Corp",
        "min_weighted_score": 50,
    },
}


# ═══════════════════════════════════════════════════════════════
# LOOKUP FUNCTIONS
# ═══════════════════════════════════════════════════════════════

def get_environment_signals(category: str) -> dict:
    return ENVIRONMENT_SIGNALS.get(category, ENVIRONMENT_SIGNALS["cosmetics"])


def get_aquatic_hazard(ingredient: str) -> dict:
    """
    Returns ECHA aquatic hazard data for an ingredient.
    Tries exact match first, then partial match.
    """
    name = ingredient.lower().strip()
    # Exact match
    if name in ECHA_AQUATIC_HAZARDS:
        return {"ingredient": ingredient, **ECHA_AQUATIC_HAZARDS[name]}
    # Partial match — ingredient name contains key or vice versa
    for key, val in ECHA_AQUATIC_HAZARDS.items():
        if key in name or name in key:
            return {"ingredient": ingredient, **val}
    # Not found
    return {
        "ingredient":    ingredient,
        "aquatic_class": "Unknown",
        "biodegradable": None,
        "concern":       None,
        "deduction":     0
    }


def check_ecolabel(brand: str, weighted_score: float = 100) -> dict:
    """
    Returns EU Ecolabel certification data for a brand.

    CERTIFICATION WASHING FIX:
    The bonus is only returned if the product's weighted_score
    meets the brand's min_weighted_score threshold. This prevents
    a certified brand name from inflating a genuinely toxic formula.

    Args:
        brand:          brand name string
        weighted_score: the product's pre-bonus weighted score (0-100)
    """
    name = brand.lower().strip()
    for key, val in EU_ECOLABEL_BRANDS.items():
        if key in name or name in key:
            entry = {"brand": brand, **val}
            # Apply certification washing gate
            min_score = val.get("min_weighted_score", 50)
            if weighted_score < min_score:
                entry["bonus"] = 0
                entry["bonus_blocked"] = True
                entry["bonus_blocked_reason"] = (
                    f"Formula weighted score ({weighted_score}/100) below "
                    f"certification threshold ({min_score}/100) — "
                    f"bonus withheld to prevent certification washing"
                )
            else:
                entry["bonus_blocked"] = False
            return entry
    return {"brand": brand, "certified": False, "bonus": 0, "bonus_blocked": False}


print(f"✓ NOURA Environment Config v2 loaded")
print(f"  ECHA database:    {len(ECHA_AQUATIC_HAZARDS)} ingredients")
print(f"  PBT flags:        {len(PBT_FLAGS)} entries")
print(f"  Ecolabel brands:  {len(EU_ECOLABEL_BRANDS)} brands")

Overwriting /content/noura_environment_config.py


In [None]:
%%writefile /content/noura_environment_engine.py
"""
NOURA Environment Engine — Master Scorer
Cell 31

Calculates the Environment dimension score (0–100) for any product.
Environment = 15% of the final NOURA score across all categories.

Signal architecture per category:

  COSMETICS
    Biodegradability  40%  ← ECHA aquatic hazard data       ✅ This cell
    Packaging         30%  ← Brand/manual data              ⚠  LOW confidence
    Manufacturing     20%  ← B Corp (Week 13 overlap)       ⚠  LOW confidence
    Palm oil          10%  ← RSPO (Week 13)                 ⚠  LOW confidence

  FOOD
    Carbon footprint  40%  ← Open Food Facts Eco-Score      ✅ This cell (live API)
    Packaging         30%  ← Open Food Facts materials      ✅ This cell (live API)
    Pesticides        20%  ← EFSA residue data              ⚠  Stub — Week 12
    Water usage       10%  ← HowGood LCA                   ⚠  Stub — future

  CLEANING
    Aquatic toxicity  50%  ← ECHA aquatic hazard data       ✅ This cell
    VOCs              30%  ← ECHA + EU Ecolabel VOC list    ✅ This cell
    Packaging         20%  ← Brand/manual data              ⚠  LOW confidence

  BABY
    Biodegradability  40%  ← ECHA persistence + bioaccum.  ✅ This cell
    Packaging         30%  ← Brand/manual data              ⚠  LOW confidence
    Supply chain      20%  ← B Corp (Week 13)              ⚠  Stub
    Cumulative expo.  10%  ← ECHA PBT flags                ✅ This cell

Where data is unavailable, signals return a neutral stub score (65/100)
with LOW confidence — they don't drag the score down unfairly but don't
inflate it either. The confidence field signals to the aggregator that
the score should be weighted down in the final display.
"""

import requests
import time
import sys
from datetime import datetime

sys.path.insert(0, '/content')

try:
    from noura_environment_config import (
        ENVIRONMENT_SIGNALS,
        ECHA_AQUATIC_HAZARDS,
        EU_ECOLABEL_BRANDS,
        get_aquatic_hazard,
        check_ecolabel,
        get_environment_signals,
    )
    CONFIG_AVAILABLE = True
except ImportError:
    CONFIG_AVAILABLE = False
    print("⚠  noura_environment_config not found — run Cell 30 first")


# ═══════════════════════════════════════════════════════════════
# SECTION 1: VOC DATABASE
# Volatile Organic Compounds — indoor air quality and smog
# Relevant to cleaning products and leave-on cosmetics.
# Sources: EU Ecolabel VOC criteria, ECHA REACH
# ═══════════════════════════════════════════════════════════════

VOC_DATABASE = {
    # High-VOC solvents — cleaning
    "ethanol":                  {"voc": True, "risk": "low",      "deduction": -5,
                                 "note": "Biodegradable but VOC — limit in sprays"},
    "isopropanol":              {"voc": True, "risk": "low",      "deduction": -5,
                                 "note": "Biodegradable VOC"},
    "isopropyl alcohol":        {"voc": True, "risk": "low",      "deduction": -5,
                                 "note": "Biodegradable VOC"},
    "alcohol denat":            {"voc": True, "risk": "low",      "deduction": -5,
                                 "note": "Denatured ethanol — VOC"},
    "butane":                   {"voc": True, "risk": "moderate", "deduction": -12,
                                 "note": "Propellant — smog precursor"},
    "propane":                  {"voc": True, "risk": "moderate", "deduction": -12,
                                 "note": "Propellant — smog precursor"},
    "isobutane":                {"voc": True, "risk": "moderate", "deduction": -12,
                                 "note": "Propellant — smog precursor"},
    "2-butoxyethanol":          {"voc": True, "risk": "high",     "deduction": -25,
                                 "note": "High-VOC solvent — restricted EU Ecolabel"},
    "glycol ethers":            {"voc": True, "risk": "high",     "deduction": -20,
                                 "note": "Reproductive concern + high VOC"},
    "diethylene glycol monobutyl ether": {
                                 "voc": True, "risk": "high",     "deduction": -20,
                                 "note": "EU Ecolabel restricted solvent"},
    "d-limonene":               {"voc": True, "risk": "moderate", "deduction": -8,
                                 "note": "Natural terpene — VOC, reacts to form allergen"},
    "pine oil":                 {"voc": True, "risk": "moderate", "deduction": -8,
                                 "note": "Terpene VOC — smog precursor"},
    "fragrance":                {"voc": True, "risk": "moderate", "deduction": -10,
                                 "note": "Undisclosed VOC-containing mixture"},
    "parfum":                   {"voc": True, "risk": "moderate", "deduction": -10,
                                 "note": "Undisclosed VOC-containing mixture"},
    # Low/no VOC — positive signals
    "water":                    {"voc": False, "risk": "none",    "deduction": 0},
    "aqua":                     {"voc": False, "risk": "none",    "deduction": 0},
    "glycerin":                 {"voc": False, "risk": "none",    "deduction": 0},
    "sodium hydroxide":         {"voc": False, "risk": "none",    "deduction": 0},
    "citric acid":              {"voc": False, "risk": "none",    "deduction": 0},
    "sodium bicarbonate":       {"voc": False, "risk": "none",    "deduction": 0},
    "lactic acid":              {"voc": False, "risk": "none",    "deduction": 0},
}


# ═══════════════════════════════════════════════════════════════
# SECTION 2: PBT / PERSISTENCE FLAGS
# Persistent, Bioaccumulative, Toxic substances.
# Especially important for baby products and aquatic ecosystems.
# Source: ECHA REACH Annex XIII
# ═══════════════════════════════════════════════════════════════

PBT_FLAGS = {
    "triclosan":                {"pbt": True, "concern": "Persistent, bioaccumulates in fish, endocrine disruptor", "deduction": -35},
    "triclocarban":             {"pbt": True, "concern": "Persistent, bioaccumulates", "deduction": -30},
    "benzophenone-3":           {"pbt": True, "concern": "Bioaccumulates in marine organisms and breast milk", "deduction": -20},
    "octocrylene":              {"pbt": True, "concern": "Converts to benzophenone, bioaccumulates", "deduction": -20},
    "octinoxate":               {"pbt": True, "concern": "Bioaccumulates, coral toxicity", "deduction": -20},
    "galaxolide":               {"pbt": True, "concern": "Synthetic musk — detected in human blood and breast milk", "deduction": -20},
    "tonalide":                 {"pbt": True, "concern": "Synthetic musk — widespread environmental persistence", "deduction": -20},
    "methylisothiazolinone":    {"pbt": True, "concern": "Highly persistent in aquatic environments", "deduction": -25},
    "nonylphenol ethoxylates":  {"pbt": True, "concern": "Degrades to nonylphenol — highly persistent endocrine disruptor", "deduction": -35},
    "polyethylene":             {"pbt": True, "concern": "Microplastic — permanent environmental persistence", "deduction": -30},
    "polypropylene":            {"pbt": True, "concern": "Microplastic — permanent environmental persistence", "deduction": -30},
    "nylon-12":                 {"pbt": True, "concern": "Microplastic — permanent environmental persistence", "deduction": -30},
    "edta":                     {"pbt": True, "concern": "Non-biodegradable chelator — mobilises heavy metals in water", "deduction": -15},
    # Safe / biodegradable
    "glycerin":                 {"pbt": False, "concern": None, "deduction": 0},
    "sodium hyaluronate":       {"pbt": False, "concern": None, "deduction": 0},
    "tocopherol":               {"pbt": False, "concern": None, "deduction": 0},
}


# ═══════════════════════════════════════════════════════════════
# SECTION 3: OPEN FOOD FACTS — ECO-SCORE
# Live API for food and baby food products.
# Returns A–E Eco-Score and packaging data.
# ═══════════════════════════════════════════════════════════════

OFF_API = "https://world.openfoodfacts.org/api/v2/product"
OFF_SEARCH = "https://world.openfoodfacts.org/cgi/search.pl"

ECO_SCORE_MAP = {
    "A": 95,
    "B": 78,
    "C": 60,
    "D": 40,
    "E": 20,
    "a": 95, "b": 78, "c": 60, "d": 40, "e": 20,
}

PACKAGING_SCORE_MAP = {
    # Material scores — higher = more sustainable
    "glass":              90,
    "cardboard":          85,
    "paper":              85,
    "aluminium":          70,  # high recyclability but energy-intensive to produce
    "metal":              70,
    "hdpe":               60,  # most recyclable plastic
    "pet":                55,
    "pp":                 50,
    "ldpe":               35,
    "pvc":                10,  # rarely recyclable, toxic to produce/burn
    "polystyrene":        10,
    "bioplastic":         65,
    "compostable":        80,
    "unknown":            40,  # neutral default
}


def fetch_openfoodfacts(product_name: str, barcode: str = None) -> dict:
    """
    Query Open Food Facts for Eco-Score and packaging data.
    Tries barcode first (exact), then product name search.

    Returns:
        dict with eco_score, packaging_score, and raw OFF data
    """
    result = {
        "source":          "Open Food Facts",
        "product":         product_name,
        "found":           False,
        "eco_score":       None,
        "eco_grade":       None,
        "packaging_score": None,
        "packaging_materials": [],
        "nutriscore":      None,
        "score":           65,   # neutral stub if not found
        "confidence":      "LOW",
        "api_checked":     False,
        "timestamp":       datetime.now().isoformat()
    }

    try:
        # ── Try barcode lookup first ──────────────────────────
        if barcode:
            url = f"{OFF_API}/{barcode}"
            r = requests.get(url, timeout=8,
                             headers={"User-Agent": "NOURA-Scanner/1.0"})
            result["api_checked"] = True

            if r.status_code == 200:
                data = r.json()
                product = data.get("product", {})
                if product:
                    return _parse_off_product(product, result)

        # ── Fall back to name search ──────────────────────────
        params = {
            "search_terms":    product_name,
            "search_simple":   1,
            "action":          "process",
            "json":            1,
            "page_size":       3,
            "fields":          "product_name,ecoscore_grade,ecoscore_score,"
                               "packaging_materials_tags,nutriscore_grade,brands"
        }
        r = requests.get(OFF_SEARCH, params=params, timeout=8,
                         headers={"User-Agent": "NOURA-Scanner/1.0"})
        result["api_checked"] = True

        if r.status_code == 200:
            data = r.json()
            products = data.get("products", [])
            if products:
                return _parse_off_product(products[0], result)

    except requests.exceptions.RequestException as e:
        result["error"] = str(e)

    return result


def _parse_off_product(product: dict, base: dict) -> dict:
    """Parse a single Open Food Facts product into NOURA format."""
    base["found"] = True

    # ── Eco-Score ─────────────────────────────────────────────
    grade = product.get("ecoscore_grade", "").upper()
    raw_score = product.get("ecoscore_score")

    if grade and grade in ECO_SCORE_MAP:
        base["eco_grade"] = grade
        base["eco_score"] = ECO_SCORE_MAP[grade]
    elif raw_score is not None:
        try:
            base["eco_score"] = min(100, max(0, int(raw_score)))
        except (TypeError, ValueError):
            pass

    # ── Packaging ─────────────────────────────────────────────
    materials_tags = product.get("packaging_materials_tags", [])
    materials = []
    packaging_scores = []

    for tag in materials_tags:
        # Tags come as "en:glass", "en:cardboard" etc.
        material = tag.replace("en:", "").replace("-", " ").strip()
        materials.append(material)
        mat_score = PACKAGING_SCORE_MAP.get(material.lower(),
                                            PACKAGING_SCORE_MAP["unknown"])
        packaging_scores.append(mat_score)

    base["packaging_materials"] = materials
    base["packaging_score"] = (round(sum(packaging_scores) / len(packaging_scores))
                               if packaging_scores else None)

    # ── Nutriscore (bonus context, not scored) ─────────────────
    base["nutriscore"] = product.get("nutriscore_grade", "").upper() or None

    # ── Compute combined environment score ────────────────────
    eco   = base["eco_score"]
    pkg   = base["packaging_score"]

    if eco is not None and pkg is not None:
        base["score"] = round(eco * 0.70 + pkg * 0.30)
        base["confidence"] = "HIGH"
    elif eco is not None:
        base["score"] = eco
        base["confidence"] = "MEDIUM"
    elif pkg is not None:
        base["score"] = pkg
        base["confidence"] = "LOW"
    else:
        base["score"] = 65   # neutral stub
        base["confidence"] = "LOW"

    return base


# ═══════════════════════════════════════════════════════════════
# SECTION 4: INDIVIDUAL SIGNAL SCORERS
# Each returns {"score": int, "confidence": str, "flags": list}
# ═══════════════════════════════════════════════════════════════

BASE = 100   # deductions applied on top


def _score_biodegradability(ingredients: list) -> dict:
    """
    Scores biodegradability using ECHA aquatic hazard data.
    Checks both aquatic toxicity AND PBT persistence flags.
    Used for cosmetics and baby categories.
    """
    total_deduction = 0
    flags = []
    non_biodegradable = []

    for ing in ingredients:
        name = ing.lower().strip()

        # Check ECHA aquatic hazard
        if CONFIG_AVAILABLE:
            hazard = get_aquatic_hazard(ing)
            if hazard["deduction"] < 0:
                total_deduction += hazard["deduction"]
                if hazard["concern"]:
                    flags.append(f"{ing}: {hazard['concern']}")
            if hazard["biodegradable"] is False:
                non_biodegradable.append(ing)

        # Check PBT persistence
        for key, pbt in PBT_FLAGS.items():
            if key in name and pbt["pbt"] and pbt["deduction"] < 0:
                # Avoid double-counting if already caught by ECHA
                if pbt["concern"] and not any(ing in f for f in flags):
                    flags.append(f"{ing} (PBT): {pbt['concern']}")
                    total_deduction += max(pbt["deduction"], -10)  # partial, ECHA primary

    score = max(0, min(100, BASE + total_deduction))
    return {
        "signal":            "biodegradability",
        "score":             score,
        "flags":             flags,
        "non_biodegradable": non_biodegradable,
        "confidence":        "HIGH" if CONFIG_AVAILABLE else "LOW"
    }


def _score_aquatic_toxicity(ingredients: list) -> dict:
    """
    Scores aquatic toxicity using ECHA classifications.
    Primary signal for cleaning products — highest weight (50%).
    """
    total_deduction = 0
    flags = []
    acute_toxic = []

    for ing in ingredients:
        if CONFIG_AVAILABLE:
            hazard = get_aquatic_hazard(ing)
            if hazard["deduction"] < 0:
                total_deduction += hazard["deduction"]
                if hazard["concern"]:
                    flags.append(f"{ing}: {hazard['concern']}")
                if "Acute 1" in (hazard.get("aquatic_class") or ""):
                    acute_toxic.append(ing)

    # Extra penalty if multiple high-toxicity ingredients present
    if len(acute_toxic) >= 2:
        total_deduction -= 10

    score = max(0, min(100, BASE + total_deduction))
    return {
        "signal":       "aquatic_toxicity",
        "score":        score,
        "flags":        flags,
        "acute_toxic":  acute_toxic,
        "confidence":   "HIGH" if CONFIG_AVAILABLE else "LOW"
    }


def _score_vocs(ingredients: list) -> dict:
    """
    Scores VOC content from ingredient list.
    Primary signal for cleaning products (30%).
    Also applied to cosmetics spray/aerosol formulas.
    """
    total_deduction = 0
    flags = []
    voc_ingredients = []

    for ing in ingredients:
        name = ing.lower().strip()
        for key, data in VOC_DATABASE.items():
            if key in name and data["voc"]:
                total_deduction += data["deduction"]
                voc_ingredients.append(ing)
                if data["note"]:
                    flags.append(f"{ing}: {data['note']}")
                break

    score = max(0, min(100, BASE + total_deduction))
    return {
        "signal":          "vocs",
        "score":           score,
        "flags":           flags,
        "voc_ingredients": voc_ingredients,
        "confidence":      "HIGH" if voc_ingredients else "MEDIUM"
    }


def _score_packaging_stub(packaging_info: dict = None) -> dict:
    """
    Packaging signal.
    If packaging_info is provided (dict with 'materials', 'recyclable',
    'refillable' keys), scores accordingly.
    Otherwise returns a LOW-confidence neutral stub.

    packaging_info example:
        {
            "materials":   ["glass", "cardboard"],
            "recyclable":  True,
            "refillable":  False,
            "concentrated": False
        }
    """
    if not packaging_info:
        return {
            "signal":     "packaging",
            "score":      65,
            "flags":      [],
            "confidence": "LOW",
            "note":       "No packaging data provided — neutral stub"
        }

    scores = []
    flags = []

    # Score each material
    for material in packaging_info.get("materials", []):
        mat_score = PACKAGING_SCORE_MAP.get(
            material.lower().strip(),
            PACKAGING_SCORE_MAP["unknown"]
        )
        scores.append(mat_score)

    base_score = round(sum(scores) / len(scores)) if scores else 60

    # Bonuses and penalties
    bonus = 0
    if packaging_info.get("refillable"):
        bonus += 10
    if packaging_info.get("concentrated"):
        bonus += 8    # concentrated formulas = less packaging per use
    if packaging_info.get("recyclable") is False:
        bonus -= 15
        flags.append("Packaging not recyclable")

    final_score = max(5, min(100, base_score + bonus))  # floor at 5, not 0
    confidence  = "MEDIUM" if scores else "LOW"

    return {
        "signal":     "packaging",
        "score":      final_score,
        "flags":      flags,
        "confidence": confidence,
        "materials":  packaging_info.get("materials", [])
    }


def _score_carbon_footprint(product_name: str, barcode: str = None) -> dict:
    """
    Carbon footprint via Open Food Facts Eco-Score.
    Used for food and baby food categories (40% weight for food).
    """
    off = fetch_openfoodfacts(product_name, barcode)

    flags = []
    if off["eco_grade"] in ("D", "E"):
        flags.append(
            f"Eco-Score {off['eco_grade']} — high environmental impact"
        )
    if off["packaging_materials"]:
        bad = [m for m in off["packaging_materials"]
               if PACKAGING_SCORE_MAP.get(m.lower(), 50) < 40]
        if bad:
            flags.append(f"Low-recyclability packaging: {', '.join(bad)}")

    return {
        "signal":       "carbon_footprint",
        "score":        off["score"],
        "flags":        flags,
        "eco_grade":    off.get("eco_grade"),
        "eco_score":    off.get("eco_score"),
        "packaging":    off.get("packaging_materials", []),
        "confidence":   off["confidence"],
        "api_checked":  off["api_checked"],
        "off_data":     off
    }


def _score_cumulative_exposure(ingredients: list) -> dict:
    """
    Cumulative environmental exposure risk — baby products only.
    Checks PBT substances that bioaccumulate and persist in the food chain,
    posing long-term exposure risk for children.
    """
    total_deduction = 0
    flags = []
    pbt_found = []

    for ing in ingredients:
        name = ing.lower().strip()
        for key, data in PBT_FLAGS.items():
            if key in name and data["pbt"]:
                total_deduction += data["deduction"]
                pbt_found.append(ing)
                if data["concern"]:
                    flags.append(f"{ing}: {data['concern']}")
                break

    score = max(0, min(100, BASE + total_deduction))
    return {
        "signal":     "cumulative_exposure",
        "score":      score,
        "flags":      flags,
        "pbt_found":  pbt_found,
        "confidence": "HIGH" if CONFIG_AVAILABLE else "LOW"
    }


def _score_pesticides_stub() -> dict:
    """Stub for EFSA pesticide residue data — Week 12."""
    return {
        "signal":     "pesticides",
        "score":      65,
        "flags":      [],
        "confidence": "LOW",
        "note":       "EFSA pesticide residue integration — Week 12"
    }


def _score_manufacturing_stub() -> dict:
    """Stub for manufacturing / supply chain — Week 13 (B Corp overlap)."""
    return {
        "signal":     "manufacturing",
        "score":      65,
        "flags":      [],
        "confidence": "LOW",
        "note":       "B Corp manufacturing data — Week 13"
    }


def _score_palm_oil_stub() -> dict:
    """Stub for RSPO palm oil certification — Week 13."""
    return {
        "signal":     "palm_oil",
        "score":      65,
        "flags":      [],
        "confidence": "LOW",
        "note":       "RSPO palm oil data — Week 13"
    }


def _score_supply_chain_stub() -> dict:
    """Stub for supply chain score — Week 13."""
    return {
        "signal":     "supply_chain",
        "score":      65,
        "flags":      [],
        "confidence": "LOW",
        "note":       "Supply chain data — Week 13"
    }


# ═══════════════════════════════════════════════════════════════
# SECTION 5: MASTER ENVIRONMENT SCORER
# ═══════════════════════════════════════════════════════════════

def calculate_environment_score(
    product_name:    str,
    ingredients:     list,
    category:        str = "cosmetics",
    brand_name:      str = "",
    packaging_info:  dict = None,
    barcode:         str = None,
) -> dict:
    """
    Master environment score calculator.
    Aggregates all environment signals into one weighted 0–100 score.

    Args:
        product_name:   product name (used for Open Food Facts lookup)
        ingredients:    list of ingredient name strings
        category:       'cosmetics', 'food', 'cleaning', 'baby'
        brand_name:     brand name (for EU Ecolabel bonus check)
        packaging_info: optional dict — see _score_packaging_stub() for format
        barcode:        optional EAN barcode for precise Open Food Facts lookup

    Returns:
        {
            "environment_score": int (0–100),
            "verdict":           str,
            "flags":             list,
            "signal_scores":     dict,
            "ecolabel_bonus":    int,
            "confidence":        str,
            "summary":           str,
            "timestamp":         str
        }
    """
    print(f"\n  🌿 Running environment analysis for: {product_name}")
    print(f"     Category: {category} | Ingredients: {len(ingredients)}")

    if category not in ENVIRONMENT_SIGNALS:
        category = "cosmetics"
        print(f"     ⚠  Unknown category — defaulting to cosmetics")

    signals = get_environment_signals(category) if CONFIG_AVAILABLE else {}

    # ── Run signals by category ────────────────────────────────
    signal_results = {}

    if category == "cosmetics":
        print("     Biodegradability...", end=" ")
        signal_results["biodegradability"] = _score_biodegradability(ingredients)
        print(f"score: {signal_results['biodegradability']['score']}")

        print("     Packaging...", end=" ")
        signal_results["packaging"] = _score_packaging_stub(packaging_info)
        print(f"score: {signal_results['packaging']['score']}"
              f" ({signal_results['packaging']['confidence']} confidence)")

        print("     Manufacturing...", end=" ")
        signal_results["manufacturing"] = _score_manufacturing_stub()
        print(f"score: {signal_results['manufacturing']['score']} (stub)")

        print("     Palm oil...", end=" ")
        signal_results["palm_oil"] = _score_palm_oil_stub()
        print(f"score: {signal_results['palm_oil']['score']} (stub)")

        weights = {"biodegradability": 0.40, "packaging": 0.30,
                   "manufacturing": 0.20, "palm_oil": 0.10}

    elif category == "food":
        print("     Carbon footprint (Open Food Facts)...", end=" ")
        signal_results["carbon_footprint"] = _score_carbon_footprint(product_name, barcode)
        print(f"score: {signal_results['carbon_footprint']['score']}"
              f" (grade: {signal_results['carbon_footprint'].get('eco_grade', 'N/A')})")

        print("     Packaging (Open Food Facts)...", end=" ")
        # Packaging data comes from same OFF call — reuse result
        off_data = signal_results["carbon_footprint"].get("off_data", {})
        if off_data.get("packaging_score"):
            signal_results["packaging"] = {
                "signal":     "packaging",
                "score":      off_data["packaging_score"],
                "flags":      [],
                "confidence": "MEDIUM",
                "materials":  off_data.get("packaging_materials", [])
            }
        else:
            signal_results["packaging"] = _score_packaging_stub(packaging_info)
        print(f"score: {signal_results['packaging']['score']}")

        print("     Pesticides...", end=" ")
        signal_results["pesticides"] = _score_pesticides_stub()
        print(f"score: {signal_results['pesticides']['score']} (stub)")

        print("     Water usage...", end=" ")
        signal_results["water_usage"] = {"signal": "water_usage", "score": 65,
                                          "flags": [], "confidence": "LOW",
                                          "note": "HowGood LCA — future"}
        print(f"score: {signal_results['water_usage']['score']} (stub)")

        weights = {"carbon_footprint": 0.40, "packaging": 0.30,
                   "pesticides": 0.20, "water_usage": 0.10}

    elif category == "cleaning":
        print("     Aquatic toxicity...", end=" ")
        signal_results["aquatic_toxicity"] = _score_aquatic_toxicity(ingredients)
        print(f"score: {signal_results['aquatic_toxicity']['score']}")

        print("     VOCs...", end=" ")
        signal_results["vocs"] = _score_vocs(ingredients)
        print(f"score: {signal_results['vocs']['score']}")

        print("     Packaging...", end=" ")
        signal_results["packaging"] = _score_packaging_stub(packaging_info)
        print(f"score: {signal_results['packaging']['score']}"
              f" ({signal_results['packaging']['confidence']} confidence)")

        weights = {"aquatic_toxicity": 0.50, "vocs": 0.30, "packaging": 0.20}

    elif category == "baby":
        print("     Biodegradability...", end=" ")
        signal_results["biodegradability"] = _score_biodegradability(ingredients)
        print(f"score: {signal_results['biodegradability']['score']}")

        print("     Packaging...", end=" ")
        signal_results["packaging"] = _score_packaging_stub(packaging_info)
        print(f"score: {signal_results['packaging']['score']}"
              f" ({signal_results['packaging']['confidence']} confidence)")

        print("     Supply chain...", end=" ")
        signal_results["supply_chain"] = _score_supply_chain_stub()
        print(f"score: {signal_results['supply_chain']['score']} (stub)")

        print("     Cumulative exposure...", end=" ")
        signal_results["cumulative_exposure"] = _score_cumulative_exposure(ingredients)
        print(f"score: {signal_results['cumulative_exposure']['score']}")

        weights = {"biodegradability": 0.40, "packaging": 0.30,
                   "supply_chain": 0.20, "cumulative_exposure": 0.10}

    # ── Weighted aggregation ───────────────────────────────────
    weighted_score = sum(
        signal_results[sig]["score"] * weights.get(sig, 0)
        for sig in signal_results
        if sig in weights
    )
    weighted_score = round(weighted_score)

    # ── EU Ecolabel bonus ──────────────────────────────────────
    ecolabel_bonus = 0
    ecolabel_data  = {}
    if brand_name:
        ecolabel_data = (check_ecolabel(brand_name, weighted_score) if CONFIG_AVAILABLE else {})
        if ecolabel_data.get("certified"):
            ecolabel_bonus = ecolabel_data.get("bonus", 0)
            print(f"     EU Ecolabel bonus for {brand_name}: +{ecolabel_bonus}")

    final_score = min(100, weighted_score + ecolabel_bonus)

    # ── Collect all flags ──────────────────────────────────────
    all_flags = []
    seen = set()
    for sr in signal_results.values():
        for flag in sr.get("flags", []):
            key = flag[:50].lower()
            if key not in seen:
                seen.add(key)
                all_flags.append(flag)

    # ── Confidence ─────────────────────────────────────────────
    high_confidence_signals = sum(
        1 for sr in signal_results.values()
        if sr.get("confidence") in ("HIGH", "MEDIUM")
    )
    total_signals = len(signal_results)

    if high_confidence_signals >= total_signals * 0.75:
        confidence = "HIGH"
    elif high_confidence_signals >= total_signals * 0.40:
        confidence = "MEDIUM"
    else:
        confidence = "LOW"

    # ── Verdict ────────────────────────────────────────────────
    if final_score >= 80:   verdict = "CLEAN"
    elif final_score >= 60: verdict = "ACCEPTABLE"
    elif final_score >= 40: verdict = "CAUTION"
    else:                   verdict = "HIGHER_RISK"

    # ── Summary ────────────────────────────────────────────────
    stub_count = sum(
        1 for sr in signal_results.values()
        if sr.get("confidence") == "LOW"
    )
    if not all_flags:
        summary = f"No environmental concerns detected."
    else:
        summary = f"{len(all_flags)} environmental concern(s). Highest: {all_flags[0][:80]}"
    if stub_count > 0:
        summary += f" ({stub_count} signal(s) pending data — score may improve)"

    return {
        "product":           product_name,
        "category":          category,
        "environment_score": final_score,
        "weighted_score":    weighted_score,
        "ecolabel_bonus":    ecolabel_bonus,
        "verdict":           verdict,
        "flags":             all_flags,
        "flag_count":        len(all_flags),
        "signal_scores":     {k: v["score"] for k, v in signal_results.items()},
        "signal_confidence": {k: v.get("confidence", "LOW")
                              for k, v in signal_results.items()},
        "signal_weights":    weights,
        "confidence":        confidence,
        "summary":           summary,
        "timestamp":         datetime.now().isoformat()
    }


# ═══════════════════════════════════════════════════════════════
# SECTION 6: PRINT HELPER
# ═══════════════════════════════════════════════════════════════

def print_environment_report(result: dict):
    """Pretty-print an environment score result to console."""
    icons = {"CLEAN": "🟢", "ACCEPTABLE": "🟡",
             "CAUTION": "🟠", "HIGHER_RISK": "🔴"}
    icon = icons.get(result["verdict"], "⚪")
    conf_icons = {"HIGH": "●", "MEDIUM": "◑", "LOW": "○"}

    print(f"\n{'═'*58}")
    print(f"  NOURA ENVIRONMENT SCORE — {result['product'].upper()}")
    print(f"{'═'*58}")
    print(f"  {icon} Score:      {result['environment_score']}/100")
    print(f"  Verdict:    {result['verdict']}")
    print(f"  Category:   {result['category']}")
    print(f"  Confidence: {result['confidence']}")

    if result["ecolabel_bonus"]:
        print(f"  EU Ecolabel bonus applied: +{result['ecolabel_bonus']}")

    print(f"\n  Signal breakdown:")
    for signal, score in result["signal_scores"].items():
        weight    = result["signal_weights"].get(signal, 0)
        conf      = result["signal_confidence"].get(signal, "LOW")
        conf_icon = conf_icons.get(conf, "○")
        stub_tag  = "  [stub]" if conf == "LOW" else ""
        print(f"    {conf_icon} {signal:<22} {score:>3}/100"
              f"  (weight: {weight*100:.0f}%){stub_tag}")

    print(f"\n  Weighted score: {result['weighted_score']}/100")
    print(f"  Final score:    {result['environment_score']}/100")

    if result["flags"]:
        print(f"\n  ⚠  Concerns ({result['flag_count']}):")
        for flag in result["flags"][:5]:
            print(f"     • {flag[:72]}")
        if result["flag_count"] > 5:
            print(f"     ... and {result['flag_count'] - 5} more")
    else:
        print(f"\n  ✅ No environmental concerns detected")

    if result["confidence"] == "LOW":
        print(f"\n  ℹ  LOW confidence — some signals awaiting data sources.")
        print(f"     Score reflects available evidence only.")

    print(f"{'═'*58}\n")


# ═══════════════════════════════════════════════════════════════
# SELF-TEST
# ═══════════════════════════════════════════════════════════════

if __name__ == "__main__":
    print("NOURA — Environment Engine Self-Test")
    print("Testing all four product categories\n")

    # ── Test 1: Clean cosmetics brand ─────────────────────────
    weleda = calculate_environment_score(
        product_name  = "Weleda Skin Food Original Ultra-Rich Cream",
        ingredients   = ["aqua", "glycerin", "lanolin", "tocopherol",
                         "beeswax", "sunflower seed oil", "citric acid",
                         "xanthan gum", "rosemary extract"],
        category      = "cosmetics",
        brand_name    = "Weleda",
        packaging_info= {"materials": ["glass", "cardboard"],
                         "recyclable": True, "refillable": False}
    )
    print_environment_report(weleda)

    # ── Test 2: Problematic cleaning product ──────────────────
    cleaner = calculate_environment_score(
        product_name  = "Generic Antibacterial Spray",
        ingredients   = ["water", "benzalkonium chloride",
                         "methylisothiazolinone", "fragrance",
                         "2-butoxyethanol", "edta"],
        category      = "cleaning",
        brand_name    = "Generic Brand",
        packaging_info= {"materials": ["pvc"], "recyclable": False}
    )
    print_environment_report(cleaner)

    # ── Test 3: Eco-certified cleaner ─────────────────────────
    ecover = calculate_environment_score(
        product_name  = "Ecover All Purpose Cleaner",
        ingredients   = ["water", "sodium bicarbonate", "citric acid",
                         "glycerin", "lactic acid"],
        category      = "cleaning",
        brand_name    = "Ecover",
        packaging_info= {"materials": ["hdpe"], "recyclable": True,
                         "concentrated": True}
    )
    print_environment_report(ecover)

    # ── Test 4: Food product (live Open Food Facts call) ──────
    food = calculate_environment_score(
        product_name  = "Innocent Smoothie",
        ingredients   = ["apple juice", "banana", "mango", "water"],
        category      = "food",
        barcode       = "5038862634106"   # Innocent smoothie barcode
    )
    print_environment_report(food)

    # ── Test 5: Baby product ──────────────────────────────────
    baby = calculate_environment_score(
        product_name  = "Johnson's Baby Lotion",
        ingredients   = ["aqua", "glycerin", "phenoxyethanol",
                         "fragrance", "benzophenone-3"],
        category      = "baby",
        brand_name    = "Johnson's"
    )
    print_environment_report(baby)

    # ── Sanity checks ──────────────────────────────────────────
    print("Sanity checks:")
    print("-" * 45)

    assert weleda["environment_score"] > cleaner["environment_score"], \
        "❌ FAIL: Weleda should score higher than generic antibacterial spray"
    print("  ✅ Weleda scores higher than generic antibacterial spray")

    assert ecover["environment_score"] > cleaner["environment_score"], \
        "❌ FAIL: Eco-certified Ecover should beat generic cleaner"
    print("  ✅ Ecover scores higher than generic cleaner")

    assert baby["environment_score"] < weleda["environment_score"], \
        "❌ FAIL: Baby product with benzophenone-3 should score lower than Weleda"
    print("  ✅ Baby product with persistent UV filter scores lower than Weleda")

    print("\n✅ ALL TESTS PASSED — Environment Engine operational")
    print("   Week 12 (partial): ECHA biodegradability, VOC, Open Food Facts live")
    print("   Remaining stubs: packaging (manual), pesticides (EFSA), palm oil (RSPO)")

Overwriting /content/noura_environment_engine.py


In [None]:
# ================================================================
# NOURA — Cell 32: Environment Engine Full Audit (Self-Contained)
# This cell loads everything itself — no dependencies on cell order.
# Just paste and run.
# ================================================================

import importlib.util, sys, requests, time

# ── Step 1: Load config ─────────────────────────────────────────
def _load_module(name, path):
    spec = importlib.util.spec_from_file_location(name, path)
    mod  = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(mod)
    sys.modules[name] = mod
    return mod

try:
    cfg = _load_module("noura_environment_config",
                       "/content/noura_environment_config.py")
    print("✓ Config loaded")
except Exception as e:
    print(f"✗ Could not load config: {e}")
    print("  → Make sure /content/noura_environment_config.py exists")
    raise

# ── Step 2: Load engine ─────────────────────────────────────────
try:
    eng = _load_module("noura_environment_engine",
                       "/content/noura_environment_engine.py")
    print("✓ Engine loaded")
except Exception as e:
    print(f"✗ Could not load engine: {e}")
    print("  → Make sure /content/noura_environment_engine.py exists")
    raise

# ── Step 3: Pull functions into local scope ─────────────────────
calculate_environment_score = eng.calculate_environment_score
_score_packaging_stub       = eng._score_packaging_stub
PBT_FLAGS                   = eng.PBT_FLAGS
get_aquatic_hazard          = cfg.get_aquatic_hazard

print("✓ Functions ready")
print()

# ================================================================
PASS = "✅ PASS"
FAIL = "❌ FAIL"
results = []

def check(label, condition, detail="", severity="BUG"):
    status = PASS if condition else FAIL
    results.append((status, label, detail, severity))
    icon = "✅" if condition else "❌"
    print(f"  {icon}  {label}")
    if detail:
        print(f"       → {detail}")
    return condition

print("=" * 65)
print("  NOURA ENVIRONMENT ENGINE — AUDIT REPORT")
print("=" * 65)


# ── AUDIT 1: Duplicate flags ────────────────────────────────────
print()
print("AUDIT 1: Duplicate flag bug (benzophenone-3)")
print("-" * 65)

baby_test = calculate_environment_score(
    product_name="Baby Audit Test",
    ingredients=["aqua","glycerin","phenoxyethanol","fragrance","benzophenone-3"],
    category="baby",
    brand_name="TestBrand"
)

flags = baby_test.get("flags", [])
# A flag is a dict — build a dedup key from ingredient + first 40 chars of reason
flag_keys = []
for f in flags:
    if isinstance(f, dict):
        key = f.get("ingredient","") + f.get("reason","")[:40]
    else:
        key = str(f)[:60]
    flag_keys.append(key)

has_dupes = len(flag_keys) != len(set(flag_keys))
dupe_count = len(flag_keys) - len(set(flag_keys))

check(
    "No duplicate flags for same ingredient",
    not has_dupes,
    f"Total flags: {len(flags)}  |  Duplicates: {dupe_count}  |  "
    f"benzophenone-3 appears in both biodegradability + cumulative_exposure signals",
    "BUG"
)


# ── AUDIT 2: Ecolabel bonus — certification washing ─────────────
print()
print("AUDIT 2: Ecolabel bonus — certification washing risk")
print("-" * 65)

toxic_ecover = calculate_environment_score(
    product_name="Hypothetical Toxic Ecover Product",
    ingredients=["benzalkonium chloride","methylisothiazolinone","triclosan","edta","fragrance"],
    category="cleaning",
    brand_name="Ecover",
    packaging_info={"materials": ["pvc"], "recyclable": False}
)

weighted = toxic_ecover.get("weighted_score", 0)
final    = toxic_ecover.get("environment_score", 0)
bonus    = toxic_ecover.get("ecolabel_bonus", 0)

check(
    "Ecolabel bonus not applied when weighted score < 50",
    not (bonus > 0 and weighted < 50),
    f"Weighted (pre-bonus): {weighted}/100  |  Ecolabel bonus: +{bonus}  |  Final: {final}/100  |  "
    f"{'⚠ Brand bonus overrides toxic formula' if bonus > 0 and weighted < 50 else 'OK'}",
    "BUG"
)


# ── AUDIT 3: ECHA/PBT database coverage ─────────────────────────
print()
print("AUDIT 3: ECHA/PBT database coverage for common ingredients")
print("-" * 65)

common_ingredients = [
    "sodium lauryl sulfate",
    "sodium laureth sulfate",
    "titanium dioxide",
    "phenoxyethanol",
    "cyclopentasiloxane",
    "cocamidopropyl betaine",
    "petrolatum",
    "mineral oil",
    "palm oil",
    "microbeads",
    "propylene glycol",
    "butylated hydroxytoluene",
    "peg-100 stearate",
    "sodium benzoate",
    "cetearyl alcohol",
    "dimethicone",
]

missing = []
found   = []
for ing in common_ingredients:
    hazard = get_aquatic_hazard(ing)
    pbt    = PBT_FLAGS.get(ing.lower(), {})
    if hazard.get("deduction", 0) != 0 or pbt.get("pbt"):
        found.append(ing)
    else:
        missing.append(ing)

pct = round(len(found) / len(common_ingredients) * 100)
check(
    "ECHA/PBT covers ≥50% of 16 most common ingredients",
    pct >= 50,
    f"Coverage: {pct}%  ({len(found)}/{len(common_ingredients)} found)\n"
    f"       Found:   {found if found else 'none'}\n"
    f"       Missing: {missing[:8]}{'...' if len(missing)>8 else ''}",
    "GAP"
)


# ── AUDIT 4: Packaging score floor ──────────────────────────────
print()
print("AUDIT 4: Packaging score floor at 0")
print("-" * 65)

worst_pkg = _score_packaging_stub({"materials": ["pvc"], "recyclable": False})
pkg_score = worst_pkg.get("score", 0)

check(
    "Worst-case packaging scores above 0",
    pkg_score > 0,
    f"PVC + non-recyclable = {pkg_score}/100  |  "
    f"{'Score of 0 distorts weighted aggregation — floor at 5 recommended' if pkg_score == 0 else 'OK'}",
    "MINOR"
)


# ── AUDIT 5: Open Food Facts API ────────────────────────────────
print()
print("AUDIT 5: Open Food Facts API connectivity")
print("-" * 65)

off_reachable = False
eco_grade     = None
eco_score_val = None

try:
    r = requests.get(
        "https://world.openfoodfacts.org/api/v2/product/5038862634106",
        timeout=10,
        headers={"User-Agent": "NOURA-Audit/1.0 (contact@noura.ai)"}
    )
    if r.status_code == 200:
        data    = r.json()
        product = data.get("product", {})
        eco_grade     = product.get("ecoscore_grade", None)
        eco_score_val = product.get("ecoscore_score", None)
        off_reachable = True
        check(
            "Open Food Facts API reachable",
            True,
            f"Barcode 5011476110016 (Innocent Smoothie)  |  "
            f"Eco-grade: {eco_grade}  |  Eco-score: {eco_score_val}",
            "INTEGRATION"
        )
    else:
        check("Open Food Facts API reachable", False,
              f"HTTP {r.status_code}", "INTEGRATION")
except Exception as e:
    check("Open Food Facts API reachable", False, str(e)[:100], "INTEGRATION")

# Sub-test: does live eco data flow into the food score?
if off_reachable:
    food_test = calculate_environment_score(
        product_name="Innocent Smoothie",
        ingredients=["apple juice","mango","banana","passion fruit"],
        category="food",
        barcode="5038862634106"
    )
    cf_signal = food_test.get("signal_breakdown", {}).get("carbon_footprint", {})
    cf_score  = cf_signal.get("score", 65)
    cf_conf   = cf_signal.get("confidence", "LOW")

    check(
        "Live OFF eco-score flows into food carbon_footprint signal",
        cf_score != 65 or cf_conf != "LOW",
        f"carbon_footprint score: {cf_score}/100  confidence: {cf_conf}  |  "
        f"{'Returns stub 65 — live data not flowing into score' if cf_score == 65 else 'Live data flowing correctly ✓'}",
        "INTEGRATION"
    )


# ── AUDIT 6: Real formula coverage ──────────────────────────────
print()
print("AUDIT 6: Real-world formula coverage (CeraVe Moisturizing Cream)")
print("-" * 65)

cerave = [
    "glycerin","cetearyl alcohol","niacinamide","petrolatum",
    "cetyl alcohol","peg-40 castor oil","dimethicone","phenoxyethanol",
    "sodium lauroyl lactylate","ceramide np","cholesterol","sodium hyaluronate"
]

live_ings = []
stub_ings = []
for ing in cerave:
    hazard = get_aquatic_hazard(ing)
    pbt    = PBT_FLAGS.get(ing.lower(), {})
    if hazard.get("deduction", 0) != 0 or pbt.get("pbt"):
        live_ings.append(ing)
    else:
        stub_ings.append(ing)

live_pct = round(len(live_ings) / len(cerave) * 100)
check(
    "Biodegradability engine covers ≥40% of CeraVe formula",
    live_pct >= 40,
    f"Live data: {live_pct}%  ({len(live_ings)}/{len(cerave)} ingredients)\n"
    f"       Has data: {live_ings if live_ings else 'none'}\n"
    f"       Missing:  {stub_ings[:6]}{'...' if len(stub_ings)>6 else ''}",
    "GAP"
)


# ── AUDIT 7: Score calibration ───────────────────────────────────
print()
print("AUDIT 7: Score calibration sanity checks")
print("-" * 65)

cal_tests = [
    dict(
        label="Clean natural cosmetic scores ≥60",
        args=dict(product_name="Natural Cream",
                  ingredients=["aloe vera","shea butter","jojoba oil","rose hip oil"],
                  category="cosmetics", brand_name="Unknown"),
        check_fn=lambda r: r["environment_score"] >= 60,
        detail_fn=lambda r: f"Score: {r['environment_score']}/100"
    ),
    dict(
        label="Worst-case cleaning product scores ≤45",
        args=dict(product_name="Toxic Cleaner",
                  ingredients=["benzalkonium chloride","triclosan","methylisothiazolinone",
                               "formaldehyde","fragrance"],
                  category="cleaning",
                  packaging_info={"materials":["pvc"],"recyclable":False}),
        check_fn=lambda r: r["environment_score"] <= 45,
        detail_fn=lambda r: f"Score: {r['environment_score']}/100"
    ),
    dict(
        label="Clean baby product scores ≥60",
        args=dict(product_name="Clean Baby",
                  ingredients=["aqua","glycerin","sunflower seed oil","shea butter"],
                  category="baby", brand_name="Unknown"),
        check_fn=lambda r: r["environment_score"] >= 60,
        detail_fn=lambda r: f"Score: {r['environment_score']}/100"
    ),
    dict(
        label="Cosmetics with known PBT ingredient scores ≤80",
        args=dict(product_name="PBT Cosmetic",
                  ingredients=["aqua","glycerin","triclosan","synthetic musk","microplastics"],
                  category="cosmetics", brand_name="Unknown"),
        check_fn=lambda r: r["environment_score"] <= 80,
        detail_fn=lambda r: f"Score: {r['environment_score']}/100"
    ),
]

for t in cal_tests:
    try:
        res = calculate_environment_score(**t["args"])
        ok  = t["check_fn"](res)
        check(t["label"], ok, t["detail_fn"](res), "CALIBRATION")
    except Exception as e:
        check(t["label"], False, f"Error: {str(e)[:60]}", "CALIBRATION")


# ── FINAL SUMMARY ───────────────────────────────────────────────
print()
print("=" * 65)
print("  AUDIT SUMMARY")
print("=" * 65)

passes = sum(1 for r in results if r[0] == PASS)
fails  = sum(1 for r in results if r[0] == FAIL)

print(f"  Passed:  {passes}/{len(results)}")
print(f"  Failed:  {fails}/{len(results)}")
print()

by_sev = {}
for status, label, detail, sev in results:
    if status == FAIL:
        by_sev.setdefault(sev, []).append(label)

order = ["BUG","GAP","INTEGRATION","CALIBRATION","MINOR"]
icons = {"BUG":"🔴","GAP":"🟡","INTEGRATION":"🔵","CALIBRATION":"🟠","MINOR":"⚪"}
for sev in order:
    if sev in by_sev:
        for label in by_sev[sev]:
            print(f"  {icons[sev]} [{sev}] {label}")

if fails == 0:
    print("  🟢 All checks passed — engine is solid.")

print()
print("  Stub coverage summary:")
print("  ┌─────────────┬──────────────┬──────────────┐")
print("  │ Category    │ Live signals │ Stub signals │")
print("  ├─────────────┼──────────────┼──────────────┤")
print("  │ Cosmetics   │     40%      │     60%      │")
print("  │ Food        │   70%*       │     30%      │")
print("  │ Cleaning    │     80%      │     20%      │")
print("  │ Baby        │     50%      │     50%      │")
print("  └─────────────┴──────────────┴──────────────┘")
print("  * Food live % assumes OFF API reachable (Colab: yes)")
print("=" * 65)

✓ NOURA Environment Config v2 loaded
  ECHA database:    90 ingredients
  PBT flags:        41 entries
  Ecolabel brands:  13 brands
✓ Config loaded
✓ Engine loaded
✓ Functions ready

  NOURA ENVIRONMENT ENGINE — AUDIT REPORT

AUDIT 1: Duplicate flag bug (benzophenone-3)
-----------------------------------------------------------------

  🌿 Running environment analysis for: Baby Audit Test
     Category: baby | Ingredients: 5
     Biodegradability... score: 75
     Packaging... score: 65 (LOW confidence)
     Supply chain... score: 65 (stub)
     Cumulative exposure... score: 80
  ✅  No duplicate flags for same ingredient
       → Total flags: 3  |  Duplicates: 0  |  benzophenone-3 appears in both biodegradability + cumulative_exposure signals

AUDIT 2: Ecolabel bonus — certification washing risk
-----------------------------------------------------------------

  🌿 Running environment analysis for: Hypothetical Toxic Ecover Product
     Category: cleaning | Ingredients: 5
     Aquatic

In [None]:
# NOURA Animal Welfare Config v2 - Cell 33

config_code = """
ANIMAL_WELFARE_SIGNALS = {
    "cosmetics": {
        "cruelty_free":        {"weight": 0.40},
        "animal_ingredients":  {"weight": 0.25},
        "vegan_certified":     {"weight": 0.25},
        "china_testing_risk":  {"weight": 0.10},
    },
    "cleaning": {
        "cruelty_free":        {"weight": 0.50},
        "animal_ingredients":  {"weight": 0.30},
        "brand_ethics":        {"weight": 0.20},
    },
    "food": {
        "welfare_standards":   {"weight": 0.40},
        "animal_content":      {"weight": 0.30},
        "welfare_certs":       {"weight": 0.20},
        "supply_chain":        {"weight": 0.10},
    },
    "baby": {
        "cruelty_free":        {"weight": 0.40},
        "animal_ingredients":  {"weight": 0.35},
        "brand_ethics":        {"weight": 0.25},
    },
}

LEAPING_BUNNY_BRANDS = {
    "the ordinary":         {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "paulas choice":        {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "pai skincare":         {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "drunk elephant":       {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "tatcha":               {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "sunday riley":         {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "herbivore botanicals": {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "acure":                {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "yes to":               {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "dermalogica":          {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "the inkey list":       {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "medik8":               {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "cerave":               {"certified": False, "score": 50,  "certifier": None, "note": "Not certified - owned by LOreal"},
    "neutrogena":           {"certified": False, "score": 40,  "certifier": None, "note": "Not certified - JJ brand China presence"},
    "olay":                 {"certified": False, "score": 40,  "certifier": None, "note": "Not certified - PG brand"},
    "briogeo":              {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "not your mothers":     {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "giovanni":             {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "shea moisture":        {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "mielle organics":      {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "hask":                 {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "weleda":               {"certified": True,  "score": 100, "certifier": "Leaping Bunny + NATRUE"},
    "dr bronner":           {"certified": True,  "score": 100, "certifier": "Leaping Bunny + B Corp"},
    "bulldog skincare":     {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "faith in nature":      {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "green people":         {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "dove":                 {"certified": False, "score": 30,  "certifier": None, "note": "Unilever - China market presence"},
    "nivea":                {"certified": False, "score": 30,  "certifier": None, "note": "Beiersdorf - China market presence"},
    "elf cosmetics":        {"certified": True,  "score": 100, "certifier": "Leaping Bunny + PETA"},
    "nyx professional":     {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "milani":               {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "wet n wild":           {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "physicians formula":   {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "urban decay":          {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "too faced":            {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "rare beauty":          {"certified": True,  "score": 100, "certifier": "PETA"},
    "kylie cosmetics":      {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "ilia beauty":          {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "benefit":              {"certified": False, "score": 20,  "certifier": None, "note": "LVMH - China market presence"},
    "mac":                  {"certified": False, "score": 20,  "certifier": None, "note": "Estee Lauder - China market presence"},
    "nars":                 {"certified": False, "score": 20,  "certifier": None, "note": "Shiseido - China market presence"},
    "charlotte tilbury":    {"certified": False, "score": 20,  "certifier": None, "note": "China market presence"},
    "fenty beauty":         {"certified": False, "score": 30,  "certifier": None, "note": "LVMH - China market presence"},
    "clean reserve":        {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "phlur":                {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "chanel":               {"certified": False, "score": 20,  "certifier": None, "note": "China market presence"},
    "dior":                 {"certified": False, "score": 20,  "certifier": None, "note": "LVMH - China market presence"},
    "method":               {"certified": True,  "score": 100, "certifier": "Leaping Bunny + B Corp"},
    "seventh generation":   {"certified": True,  "score": 100, "certifier": "Leaping Bunny + B Corp"},
    "ecover":               {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "mrs meyers":           {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "attitude":             {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
    "bio-d":                {"certified": True,  "score": 100, "certifier": "Leaping Bunny + Vegan Society"},
    "astonish":             {"certified": True,  "score": 100, "certifier": "Leaping Bunny"},
}

VEGAN_CERTIFIED_BRANDS = {
    "elf cosmetics":        {"vegan_cert": True,  "certifier": "PETA Vegan",    "score": 100},
    "pacifica beauty":      {"vegan_cert": True,  "certifier": "PETA Vegan",    "score": 100},
    "milk makeup":          {"vegan_cert": True,  "certifier": "Vegan Society", "score": 100},
    "kat von d beauty":     {"vegan_cert": True,  "certifier": "PETA Vegan",    "score": 100},
    "urban decay":          {"vegan_cert": True,  "certifier": "Vegan Society", "score": 100},
    "medik8":               {"vegan_cert": True,  "certifier": "Vegan Society", "score": 100},
    "the inkey list":       {"vegan_cert": True,  "certifier": "Vegan Society", "score": 100},
    "bybi beauty":          {"vegan_cert": True,  "certifier": "Vegan Society", "score": 100},
    "inika organic":        {"vegan_cert": True,  "certifier": "Vegan Society", "score": 100},
    "green people":         {"vegan_cert": True,  "certifier": "Vegan Society", "score": 100},
    "faith in nature":      {"vegan_cert": True,  "certifier": "Vegan Society", "score": 100},
    "method":               {"vegan_cert": True,  "certifier": "PETA Vegan",    "score": 100},
    "attitude":             {"vegan_cert": True,  "certifier": "PETA Vegan",    "score": 100},
    "bio-d":                {"vegan_cert": True,  "certifier": "Vegan Society", "score": 100},
    "alpro":                {"vegan_cert": True,  "certifier": "Vegan Society", "score": 100},
    "oatly":                {"vegan_cert": True,  "certifier": "Vegan Society", "score": 100},
    "weleda":               {"vegan_cert": False, "certifier": None, "score": 60,
                             "note": "Contains beeswax and lanolin - no blanket vegan cert"},
    "lush":                 {"vegan_cert": False, "certifier": None, "score": 65,
                             "note": "Mix of vegan and non-vegan products"},
}

CHINA_SELLING_BRANDS = {
    "mac":               {"sells_china": True, "penalty": -50, "note": "Estee Lauder - China physical retail"},
    "nars":              {"sells_china": True, "penalty": -45, "note": "Shiseido - China physical retail"},
    "benefit":           {"sells_china": True, "penalty": -45, "note": "LVMH - China physical retail"},
    "charlotte tilbury": {"sells_china": True, "penalty": -45, "note": "China physical retail"},
    "chanel":            {"sells_china": True, "penalty": -50, "note": "China physical retail"},
    "dior":              {"sells_china": True, "penalty": -50, "note": "LVMH - China physical retail"},
    "ysl beauty":        {"sells_china": True, "penalty": -45, "note": "LOreal - China physical retail"},
    "lancome":           {"sells_china": True, "penalty": -45, "note": "LOreal - China physical retail"},
    "estee lauder":      {"sells_china": True, "penalty": -45, "note": "China physical retail"},
    "clinique":          {"sells_china": True, "penalty": -40, "note": "Estee Lauder - China physical retail"},
    "bobbi brown":       {"sells_china": True, "penalty": -40, "note": "Estee Lauder - China physical retail"},
    "shiseido":          {"sells_china": True, "penalty": -45, "note": "China physical retail"},
    "sk-ii":             {"sells_china": True, "penalty": -45, "note": "PG - China physical retail"},
    "la mer":            {"sells_china": True, "penalty": -45, "note": "Estee Lauder - China physical retail"},
    "la prairie":        {"sells_china": True, "penalty": -45, "note": "China physical retail"},
    "loreal":            {"sells_china": True, "penalty": -40, "note": "China physical retail - mass market"},
    "maybelline":        {"sells_china": True, "penalty": -40, "note": "LOreal - China physical retail"},
    "garnier":           {"sells_china": True, "penalty": -35, "note": "LOreal - China physical retail"},
    "fenty beauty":      {"sells_china": True, "penalty": -35, "note": "LVMH - China market presence"},
    "dove":              {"sells_china": True, "penalty": -30, "note": "Unilever - China market presence"},
    "nivea":             {"sells_china": True, "penalty": -30, "note": "Beiersdorf - China market presence"},
    "neutrogena":        {"sells_china": True, "penalty": -30, "note": "JJ - China market presence"},
    "olay":              {"sells_china": True, "penalty": -30, "note": "PG - China market presence"},
}

ANIMAL_DERIVED_INGREDIENTS = {
    "carmine":             {"severity": "HIGH",   "vegan": False, "deduction": -30,
                            "source": "Crushed cochineal insects",
                            "alternatives": "Beet juice, lycopene, paprika extract"},
    "ci 75470":            {"severity": "HIGH",   "vegan": False, "deduction": -30,
                            "source": "Carmine - crushed cochineal insects",
                            "alternatives": "Synthetic red pigments"},
    "cochineal":           {"severity": "HIGH",   "vegan": False, "deduction": -30,
                            "source": "Scale insects crushed for red pigment",
                            "alternatives": "Plant-based dyes"},
    "mink oil":            {"severity": "HIGH",   "vegan": False, "deduction": -35,
                            "source": "Fat from mink pelts - fur farming byproduct",
                            "alternatives": "Jojoba oil, plant-derived squalane"},
    "emu oil":             {"severity": "HIGH",   "vegan": False, "deduction": -30,
                            "source": "Fat from emu - slaughter byproduct",
                            "alternatives": "Plant oils"},
    "collagen":            {"severity": "HIGH",   "vegan": False, "deduction": -25,
                            "source": "Connective tissue from cows pigs or fish",
                            "alternatives": "Plant-based peptides, vegan collagen boosters"},
    "hydrolyzed collagen": {"severity": "HIGH",   "vegan": False, "deduction": -25,
                            "source": "Hydrolyzed animal connective tissue",
                            "alternatives": "Hydrolyzed wheat or soy protein"},
    "elastin":             {"severity": "HIGH",   "vegan": False, "deduction": -25,
                            "source": "Protein from animal ligaments and aorta tissue",
                            "alternatives": "Plant proteins, bakuchiol"},
    "keratin":             {"severity": "HIGH",   "vegan": False, "deduction": -25,
                            "source": "Ground horn hoof feathers - slaughter byproduct",
                            "alternatives": "Hydrolyzed wheat protein, oat protein"},
    "shellac":             {"severity": "HIGH",   "vegan": False, "deduction": -25,
                            "source": "Secretion from lac beetles",
                            "alternatives": "Zein (corn protein), plant-based resins"},
    "squalene":            {"severity": "HIGH",   "vegan": False, "deduction": -30,
                            "source": "Shark liver oil. Note: squalane (plant) is different.",
                            "alternatives": "Plant-derived squalane (olive or sugarcane)"},
    "guanine":             {"severity": "HIGH",   "vegan": False, "deduction": -20,
                            "source": "Fish scales - used for shimmer in cosmetics",
                            "alternatives": "Synthetic mica, bismuth oxychloride"},
    "ambergris":           {"severity": "HIGH",   "vegan": False, "deduction": -35,
                            "source": "Whale intestinal secretion",
                            "alternatives": "Synthetic ambergris (Ambroxide)"},
    "castoreum":           {"severity": "HIGH",   "vegan": False, "deduction": -30,
                            "source": "Secretion from beaver castor sacs",
                            "alternatives": "Synthetic raspberry and vanilla fragrance"},
    "civet":               {"severity": "HIGH",   "vegan": False, "deduction": -35,
                            "source": "Secretion from civet cats - cruelly farmed for perfume",
                            "alternatives": "Synthetic civettone"},
    "casein":              {"severity": "HIGH",   "vegan": False, "deduction": -20,
                            "source": "Milk protein - dairy-derived",
                            "alternatives": "Soy protein, oat protein"},
    "whey":                {"severity": "HIGH",   "vegan": False, "deduction": -20,
                            "source": "Dairy byproduct",
                            "alternatives": "Pea protein, rice protein"},
    "tallow":              {"severity": "HIGH",   "vegan": False, "deduction": -30,
                            "source": "Rendered beef or mutton fat - slaughter byproduct",
                            "alternatives": "Plant-derived fatty acids"},
    "sodium tallowate":    {"severity": "HIGH",   "vegan": False, "deduction": -30,
                            "source": "Soap from tallow (animal fat)",
                            "alternatives": "Sodium palmate, sodium cocoate"},
    "lanolin":             {"severity": "MEDIUM", "vegan": False, "deduction": -15,
                            "source": "Wool grease from sheep - shearing byproduct",
                            "alternatives": "Plant waxes, synthetic emollients"},
    "lanolin alcohol":     {"severity": "MEDIUM", "vegan": False, "deduction": -15,
                            "source": "Derived from lanolin (sheep wool wax)",
                            "alternatives": "Cetyl alcohol, stearyl alcohol"},
    "beeswax":             {"severity": "MEDIUM", "vegan": False, "deduction": -12,
                            "source": "Bee product - harvesting disrupts hive",
                            "alternatives": "Candelilla wax, carnauba wax, rice bran wax"},
    "cera alba":           {"severity": "MEDIUM", "vegan": False, "deduction": -12,
                            "source": "Beeswax (INCI name)",
                            "alternatives": "Candelilla wax, carnauba wax"},
    "honey":               {"severity": "MEDIUM", "vegan": False, "deduction": -10,
                            "source": "Bee product - non-vegan by Vegan Society standards",
                            "alternatives": "Plant-based humectants"},
    "mel":                 {"severity": "MEDIUM", "vegan": False, "deduction": -10,
                            "source": "Honey (INCI name)",
                            "alternatives": "Plant-based humectants"},
    "royal jelly":         {"severity": "MEDIUM", "vegan": False, "deduction": -10,
                            "source": "Secretion produced by worker bees",
                            "alternatives": "Plant-based actives"},
    "propolis":            {"severity": "MEDIUM", "vegan": False, "deduction": -10,
                            "source": "Resinous mixture produced by bees",
                            "alternatives": "Plant extracts with antimicrobial properties"},
    "silk":                {"severity": "MEDIUM", "vegan": False, "deduction": -15,
                            "source": "Silkworm cocoons - silkworms killed in production",
                            "alternatives": "Vegan silk proteins (plant fermentation)"},
    "hydrolyzed silk":     {"severity": "MEDIUM", "vegan": False, "deduction": -15,
                            "source": "Silk protein - silkworm derived",
                            "alternatives": "Hydrolyzed wheat protein"},
    "serica":              {"severity": "MEDIUM", "vegan": False, "deduction": -15,
                            "source": "Silk - Bombyx Mori (INCI name)",
                            "alternatives": "Vegan protein alternatives"},
    "cholesterol":         {"severity": "MEDIUM", "vegan": False, "deduction": -10,
                            "source": "Usually animal-derived from lanolin or wool",
                            "alternatives": "Plant sterols, phytosterols"},
    "stearic acid":        {"severity": "LOW",    "vegan": None,  "deduction": -5,
                            "source": "Plant (palm, soy) or animal (tallow) - verify with brand",
                            "alternatives": "Plant-certified stearic acid"},
    "glycerin":            {"severity": "LOW",    "vegan": None,  "deduction": -3,
                            "source": "Plant (palm, soy, coconut) or animal (tallow) derived",
                            "alternatives": "Plant-certified glycerin (widely available)"},
    "hyaluronic acid":     {"severity": "LOW",    "vegan": None,  "deduction": -2,
                            "source": "Traditionally from rooster combs; now mostly biofermentation",
                            "alternatives": "Verify biofermentation source"},
    "sodium hyaluronate":  {"severity": "LOW",    "vegan": None,  "deduction": -2,
                            "source": "Salt form of hyaluronic acid - mostly biofermentation now",
                            "alternatives": "Confirm biofermentation source with supplier"},
    "squalane":            {"severity": "LOW",    "vegan": None,  "deduction": -3,
                            "source": "Shark-derived or plant (olive, sugarcane) - verify",
                            "alternatives": "Plant-derived squalane (Biossance, Sophim)"},
    "retinol":             {"severity": "LOW",    "vegan": None,  "deduction": -3,
                            "source": "Animal-derived (fish liver oil) or synthetic - verify",
                            "alternatives": "Synthetic retinol, bakuchiol"},
    "oleic acid":          {"severity": "LOW",    "vegan": None,  "deduction": -3,
                            "source": "Plant (olive, sunflower) or animal (tallow) - verify",
                            "alternatives": "Plant-certified oleic acid"},
    "caprylic acid":       {"severity": "LOW",    "vegan": None,  "deduction": -2,
                            "source": "Usually coconut or palm but can be dairy-derived",
                            "alternatives": "Coconut-derived caprylic acid"},
}

FOOD_WELFARE_CERTIFICATIONS = {
    "rspca assured":           {"score": 95, "confidence": "HIGH", "audited": True,
                                "note": "RSPCA Assured - independently audited UK welfare standard"},
    "certified humane":        {"score": 90, "confidence": "HIGH", "audited": True,
                                "note": "Certified Humane - HFAC audited (US/international)"},
    "humane certified":        {"score": 90, "confidence": "HIGH", "audited": True,
                                "note": "Humane Certified - HFAC audited"},
    "animal welfare approved": {"score": 95, "confidence": "HIGH", "audited": True,
                                "note": "Animal Welfare Approved - American Humane"},
    "biodynamic":              {"score": 90, "confidence": "HIGH", "audited": True,
                                "note": "Demeter biodynamic - independently audited"},
    "vegan":                   {"score": 100, "confidence": "HIGH", "audited": True,
                                "note": "Vegan - no animal products; welfare N/A"},
    "organic":                 {"score": 75, "confidence": "MEDIUM", "audited": False,
                                "note": "Organic - basic welfare requirements, not a welfare audit"},
    "eu organic":              {"score": 78, "confidence": "MEDIUM", "audited": False,
                                "note": "EU Organic - some welfare provisions"},
    "usda organic":            {"score": 73, "confidence": "MEDIUM", "audited": False,
                                "note": "USDA Organic - some welfare provisions"},
    "free range":              {"score": 68, "confidence": "MEDIUM", "audited": False,
                                "note": "Free range - outdoor access required; not welfare-audited"},
    "pasture raised":          {"score": 80, "confidence": "MEDIUM", "audited": False,
                                "note": "Pasture raised - better than free range; not audited"},
    "grass fed":               {"score": 72, "confidence": "MEDIUM", "audited": False,
                                "note": "Grass-fed - diet claim; not a welfare scheme"},
    "cage free":               {"score": 58, "confidence": "MEDIUM", "audited": False,
                                "note": "Cage free - no battery cages; not welfare-audited"},
    "plant based":             {"score": 95, "confidence": "MEDIUM", "audited": False,
                                "note": "Plant-based - no animal ingredients; welfare N/A"},
    "plant-based":             {"score": 95, "confidence": "MEDIUM", "audited": False,
                                "note": "Plant-based - no animal ingredients; welfare N/A"},
}


def get_animal_welfare_signals(category):
    return ANIMAL_WELFARE_SIGNALS.get(category, ANIMAL_WELFARE_SIGNALS["cosmetics"])


def check_china_policy(brand):
    name = brand.lower().strip()
    for key, val in CHINA_SELLING_BRANDS.items():
        if key in name or name in key:
            return {
                "brand": brand, "sells_china": True,
                "penalty": val["penalty"], "note": val.get("note", ""),
                "csar_note": "Post-2021 CSAR: general cosmetics may qualify for exemptions. Special-use products still require testing."
            }
    return {"brand": brand, "sells_china": False, "penalty": 0}


def check_cruelty_free(brand):
    name = brand.lower().strip()
    for key, val in LEAPING_BUNNY_BRANDS.items():
        if key in name or name in key:
            result = {"brand": brand, **val}
            china  = check_china_policy(brand)
            if china["sells_china"] and val.get("certified"):
                result["china_conflict"] = True
                result["china_note"] = brand + " has Leaping Bunny cert but sells in mainland China - verify independently."
            return result
    china = check_china_policy(brand)
    if china["sells_china"]:
        return {"brand": brand, "certified": False,
                "score": max(0, 50 + china["penalty"]),
                "certifier": None, "note": china.get("note", "China testing risk")}
    return {"brand": brand, "certified": False, "score": 50,
            "certifier": None, "note": "No cruelty-free certification found"}


def check_vegan_certified(brand):
    name = brand.lower().strip()
    for key, val in VEGAN_CERTIFIED_BRANDS.items():
        if key in name or name in key:
            return {"brand": brand, **val}
    return {"brand": brand, "vegan_cert": False, "certifier": None,
            "score": 50, "note": "No vegan certification found"}


def scan_animal_ingredients(ingredients):
    total_deduction = 0
    flags, found_high, found_medium, found_low = [], [], [], []
    for ing in ingredients:
        name = ing.lower().strip()
        for key, val in ANIMAL_DERIVED_INGREDIENTS.items():
            if key in name or name in key:
                total_deduction += val["deduction"]
                flags.append({
                    "ingredient":   ing,
                    "severity":     val["severity"],
                    "source":       val["source"],
                    "deduction":    val["deduction"],
                    "alternatives": val.get("alternatives"),
                })
                sev = val["severity"]
                if sev == "HIGH":     found_high.append(ing)
                elif sev == "MEDIUM": found_medium.append(ing)
                else:                 found_low.append(ing)
                break
    score    = max(0, min(100, 100 + total_deduction))
    is_vegan = len(found_high) == 0 and len(found_medium) == 0
    return {
        "score": score, "total_deduction": total_deduction, "flags": flags,
        "found_high": found_high, "found_medium": found_medium, "found_low": found_low,
        "is_vegan": is_vegan,
        "vegan_status": (
            "VEGAN"        if is_vegan and len(found_low) == 0 else
            "LIKELY VEGAN" if is_vegan else
            "NOT VEGAN"
        ),
        "confidence": "HIGH" if len(ingredients) >= 3 else "LOW",
    }


def check_food_welfare(product_name, brand_name=""):
    text = (product_name + " " + brand_name).lower()
    found_certs, best_score, best_conf = [], 50, "LOW"
    for cert_key, cert_val in FOOD_WELFARE_CERTIFICATIONS.items():
        if cert_key in text:
            found_certs.append({
                "certification": cert_key,
                "score":         cert_val["score"],
                "confidence":    cert_val["confidence"],
                "audited":       cert_val.get("audited", False),
                "note":          cert_val["note"],
            })
            if cert_val["score"] > best_score:
                best_score = cert_val["score"]
                best_conf  = cert_val["confidence"]
    return {
        "certifications_found": found_certs,
        "score":      best_score,
        "confidence": best_conf if found_certs else "LOW",
        "note": (
            "Found: " + ", ".join(c["certification"] for c in found_certs)
            if found_certs else
            "No welfare certifications detected - neutral score"
        )
    }


print("Config loaded")
print("  Leaping Bunny brands: " + str(len(LEAPING_BUNNY_BRANDS)))
print("  Vegan brands:         " + str(len(VEGAN_CERTIFIED_BRANDS)))
print("  China risk brands:    " + str(len(CHINA_SELLING_BRANDS)))
print("  Animal ingredients:   " + str(len(ANIMAL_DERIVED_INGREDIENTS)))
print("  Food welfare certs:   " + str(len(FOOD_WELFARE_CERTIFICATIONS)))
"""

with open('/content/noura_animal_welfare_config.py', 'w', encoding='utf-8') as f:
    f.write(config_code)

exec(config_code)
print("Cell 33 complete - config written and loaded")

Config loaded
  Leaping Bunny brands: 54
  Vegan brands:         18
  China risk brands:    23
  Animal ingredients:   39
  Food welfare certs:   15
Cell 33 complete - config written and loaded


In [None]:
engine_code = """
import sys
from datetime import datetime

sys.path.insert(0, '/content')

from noura_animal_welfare_config import (
    ANIMAL_WELFARE_SIGNALS,
    get_animal_welfare_signals,
    check_cruelty_free,
    check_vegan_certified,
    check_china_policy,
    scan_animal_ingredients,
    check_food_welfare,
)


def _score_cruelty_free(brand_name):
    if not brand_name:
        return {"signal": "cruelty_free", "score": 50, "confidence": "LOW",
                "flags": [], "note": "No brand name provided - neutral score"}
    data  = check_cruelty_free(brand_name)
    score = data.get("score", 50)
    flags = []
    if data.get("certified"):
        note = brand_name + " certified: " + str(data.get("certifier", "unknown"))
        if data.get("china_conflict"):
            flags.append("China conflict: " + data.get("china_note", ""))
            score = max(score - 20, 30)
    else:
        note = data.get("note", "Not certified")
        if score < 40:
            flags.append(brand_name + ": " + data.get("note", "China testing risk"))
    return {
        "signal":    "cruelty_free",
        "score":     score,
        "confidence": "HIGH",
        "certified": data.get("certified", False),
        "certifier": data.get("certifier"),
        "flags":     flags,
        "note":      note,
    }


def _score_vegan_certification(brand_name, ingredient_scan):
    flags       = []
    brand_data  = check_vegan_certified(brand_name) if brand_name else {}
    brand_cert  = brand_data.get("vegan_cert", False)
    is_vegan    = ingredient_scan.get("is_vegan", True)
    vegan_status= ingredient_scan.get("vegan_status", "UNKNOWN")

    if brand_cert and is_vegan:
        score = 100
        note  = "Certified vegan + clean ingredient scan (" + vegan_status + ")"
    elif brand_cert and not is_vegan:
        score = 60
        note  = "Brand vegan cert but animal-derived ingredients found in formula"
        flags.append("Mismatch: brand vegan cert conflicts with formula ingredients")
    elif not brand_cert and is_vegan:
        score = 75
        note  = "No brand vegan cert - ingredient scan shows " + vegan_status
    else:
        score = max(0, ingredient_scan.get("score", 50))
        note  = "No vegan cert - " + vegan_status

    if ingredient_scan.get("found_high"):
        flags.append("Animal-derived HIGH: " + ", ".join(ingredient_scan["found_high"][:3]))
    if ingredient_scan.get("found_medium"):
        flags.append("Animal-derived MEDIUM: " + ", ".join(ingredient_scan["found_medium"][:3]))

    return {
        "signal":       "vegan_certified",
        "score":        score,
        "confidence":   "HIGH" if brand_name else "MEDIUM",
        "brand_cert":   brand_cert,
        "certifier":    brand_data.get("certifier"),
        "vegan_status": vegan_status,
        "flags":        flags,
        "note":         note,
    }


def _score_china_testing_risk(brand_name):
    if not brand_name:
        return {"signal": "china_testing_risk", "score": 50,
                "confidence": "LOW", "flags": [],
                "note": "No brand provided - China policy unknown"}
    data  = check_china_policy(brand_name)
    flags = []
    if data["sells_china"]:
        score = max(0, 50 + data["penalty"])
        flags.append(brand_name + " - China market presence. " + data.get("csar_note", ""))
        note  = data.get("note", "China market presence")
        conf  = "HIGH"
    else:
        lb    = check_cruelty_free(brand_name)
        score = 95 if lb.get("certified") else 65
        note  = "No confirmed China market presence"
        conf  = "MEDIUM" if lb.get("certified") else "LOW"
    return {
        "signal":      "china_testing_risk",
        "score":       score,
        "confidence":  conf,
        "sells_china": data["sells_china"],
        "flags":       flags,
        "note":        note,
    }


def _score_animal_ingredients(ingredients, category="cosmetics"):
    if not ingredients:
        return {"signal": "animal_ingredients", "score": 65,
                "confidence": "LOW", "flags": [], "note": "No ingredients provided"}
    scan  = scan_animal_ingredients(ingredients)
    score = scan["score"]
    if category == "baby" and score < 100:
        score = max(0, min(100, 100 + round(scan["total_deduction"] * 1.3)))
    flag_strings = []
    for f in scan["flags"]:
        icon = "HIGH" if f["severity"] == "HIGH" else ("MEDIUM" if f["severity"] == "MEDIUM" else "LOW")
        alt  = " -> " + f["alternatives"] if f.get("alternatives") else ""
        flag_strings.append("[" + icon + "] " + f["ingredient"] + ": " + f["source"][:80] + alt)
    return {
        "signal":       "animal_ingredients",
        "score":        score,
        "confidence":   scan["confidence"],
        "vegan_status": scan["vegan_status"],
        "found_high":   scan["found_high"],
        "found_medium": scan["found_medium"],
        "found_low":    scan["found_low"],
        "flags":        flag_strings,
        "note":         scan["vegan_status"] + " - " + str(len(scan["flags"])) + " animal-derived ingredient(s)",
    }


def _score_brand_ethics(brand_name):
    cf    = _score_cruelty_free(brand_name)
    china = _score_china_testing_risk(brand_name)
    score = round(cf["score"] * 0.60 + china["score"] * 0.40)
    return {
        "signal":     "brand_ethics",
        "score":      score,
        "confidence": "MEDIUM" if brand_name else "LOW",
        "flags":      cf["flags"] + china["flags"],
        "note":       "CF: " + str(cf["score"]) + "/100 | China risk: " + str(china["score"]) + "/100",
    }


def _score_food_welfare_standards(product_name, brand_name, ingredients):
    ing_scan = scan_animal_ingredients(ingredients)
    if ing_scan["is_vegan"] and ing_scan["vegan_status"] == "VEGAN":
        return {
            "signal": "welfare_standards", "score": 100, "confidence": "HIGH",
            "certifications": [], "flags": [],
            "note": "Plant-based product - animal welfare standards not applicable",
        }
    welfare = check_food_welfare(product_name, brand_name)
    flags   = []
    if welfare["score"] < 60 and not welfare["certifications_found"]:
        flags.append("No animal welfare certification detected in product name")
    return {
        "signal":         "welfare_standards",
        "score":          welfare["score"],
        "confidence":     welfare["confidence"],
        "certifications": welfare["certifications_found"],
        "flags":          flags,
        "note":           welfare["note"],
    }


def _score_food_animal_content(ingredients):
    scan  = scan_animal_ingredients(ingredients)
    flags = []
    if scan["vegan_status"] == "VEGAN":
        score = 100
        note  = "No animal-derived ingredients detected"
    elif scan["vegan_status"] == "LIKELY VEGAN":
        score = 80
        note  = "Likely vegan - ambiguous origin ingredients present"
        if scan["found_low"]:
            flags.append("Ambiguous origin - verify source: " + ", ".join(scan["found_low"][:3]))
    else:
        score = scan["score"]
        note  = "Animal-derived ingredients present - " + str(len(scan["found_high"])) + " high, " + str(len(scan["found_medium"])) + " medium severity"
        for f in scan["flags"][:3]:
            flags.append("[" + f["severity"] + "] " + f["ingredient"] + ": " + f["source"][:70])
    return {
        "signal":       "animal_content",
        "score":        score,
        "confidence":   scan["confidence"],
        "vegan_status": scan["vegan_status"],
        "flags":        flags,
        "note":         note,
    }


def _score_food_welfare_certs(product_name, brand_name):
    welfare      = check_food_welfare(product_name, brand_name)
    audited_found= [c for c in welfare["certifications_found"] if c.get("audited")]
    if audited_found:
        score = max(c["score"] for c in audited_found)
        conf  = "HIGH"
    else:
        score = welfare["score"]
        conf  = welfare["confidence"]
    return {
        "signal":         "welfare_certs",
        "score":          score,
        "confidence":     conf,
        "certifications": welfare["certifications_found"],
        "audited_certs":  audited_found,
        "flags":          [],
        "note":           welfare["note"],
    }


def _score_supply_chain_stub():
    return {
        "signal":     "supply_chain",
        "score":      65,
        "confidence": "LOW",
        "flags":      [],
        "note":       "Supply chain animal welfare - Week 13 stub",
    }


def calculate_animal_welfare_score(product_name, ingredients, category="cosmetics", brand_name=""):
    print("  Analysing animal welfare: " + product_name)
    print("     Category: " + category + " | Brand: " + (brand_name or "unknown") + " | Ingredients: " + str(len(ingredients)))

    if category not in ANIMAL_WELFARE_SIGNALS:
        category = "cosmetics"

    signal_results = {}
    ing_scan = _score_animal_ingredients(ingredients, category)

    if category == "cosmetics":
        print("     Cruelty-free...", end=" ")
        signal_results["cruelty_free"]       = _score_cruelty_free(brand_name)
        print("score: " + str(signal_results["cruelty_free"]["score"]))
        print("     Animal ingredients...", end=" ")
        signal_results["animal_ingredients"]  = ing_scan
        print("score: " + str(ing_scan["score"]) + " (" + ing_scan["vegan_status"] + ")")
        print("     Vegan certification...", end=" ")
        signal_results["vegan_certified"]     = _score_vegan_certification(brand_name, ing_scan)
        print("score: " + str(signal_results["vegan_certified"]["score"]))
        print("     China testing risk...", end=" ")
        signal_results["china_testing_risk"]  = _score_china_testing_risk(brand_name)
        print("score: " + str(signal_results["china_testing_risk"]["score"]))
        weights = {"cruelty_free": 0.40, "animal_ingredients": 0.25,
                   "vegan_certified": 0.25, "china_testing_risk": 0.10}

    elif category == "cleaning":
        print("     Cruelty-free...", end=" ")
        signal_results["cruelty_free"]       = _score_cruelty_free(brand_name)
        print("score: " + str(signal_results["cruelty_free"]["score"]))
        print("     Animal ingredients...", end=" ")
        signal_results["animal_ingredients"]  = ing_scan
        print("score: " + str(ing_scan["score"]) + " (" + ing_scan["vegan_status"] + ")")
        print("     Brand ethics...", end=" ")
        signal_results["brand_ethics"]        = _score_brand_ethics(brand_name)
        print("score: " + str(signal_results["brand_ethics"]["score"]))
        weights = {"cruelty_free": 0.50, "animal_ingredients": 0.30, "brand_ethics": 0.20}

    elif category == "food":
        print("     Welfare standards...", end=" ")
        signal_results["welfare_standards"]   = _score_food_welfare_standards(product_name, brand_name, ingredients)
        print("score: " + str(signal_results["welfare_standards"]["score"]))
        print("     Animal content...", end=" ")
        signal_results["animal_content"]      = _score_food_animal_content(ingredients)
        print("score: " + str(signal_results["animal_content"]["score"]) + " (" + signal_results["animal_content"]["vegan_status"] + ")")
        print("     Welfare certifications...", end=" ")
        signal_results["welfare_certs"]       = _score_food_welfare_certs(product_name, brand_name)
        print("score: " + str(signal_results["welfare_certs"]["score"]))
        print("     Supply chain...", end=" ")
        signal_results["supply_chain"]        = _score_supply_chain_stub()
        print("score: " + str(signal_results["supply_chain"]["score"]) + " (stub)")
        weights = {"welfare_standards": 0.40, "animal_content": 0.30,
                   "welfare_certs": 0.20, "supply_chain": 0.10}

    elif category == "baby":
        print("     Cruelty-free...", end=" ")
        signal_results["cruelty_free"]       = _score_cruelty_free(brand_name)
        print("score: " + str(signal_results["cruelty_free"]["score"]))
        print("     Animal ingredients (strict)...", end=" ")
        signal_results["animal_ingredients"]  = ing_scan
        print("score: " + str(ing_scan["score"]) + " (" + ing_scan["vegan_status"] + ")")
        print("     Brand ethics...", end=" ")
        signal_results["brand_ethics"]        = _score_brand_ethics(brand_name)
        print("score: " + str(signal_results["brand_ethics"]["score"]))
        weights = {"cruelty_free": 0.40, "animal_ingredients": 0.35, "brand_ethics": 0.25}

    weighted_score = round(sum(
        signal_results[sig]["score"] * weights.get(sig, 0)
        for sig in signal_results if sig in weights
    ))

    all_flags = []
    seen = set()
    for sr in signal_results.values():
        for flag in sr.get("flags", []):
            key = str(flag)[:60].lower()
            if key not in seen:
                seen.add(key)
                all_flags.append(flag)

    high_conf = sum(1 for sr in signal_results.values() if sr.get("confidence") in ("HIGH", "MEDIUM"))
    total_sig = len(signal_results)
    if high_conf >= total_sig * 0.75:   confidence = "HIGH"
    elif high_conf >= total_sig * 0.40: confidence = "MEDIUM"
    else:                               confidence = "LOW"

    if weighted_score >= 80:   verdict = "CLEAN"
    elif weighted_score >= 60: verdict = "ACCEPTABLE"
    elif weighted_score >= 40: verdict = "CAUTION"
    else:                      verdict = "HIGHER_RISK"

    stub_count = sum(1 for sr in signal_results.values() if sr.get("confidence") == "LOW")
    summary = (
        "No animal welfare concerns detected." if not all_flags
        else str(len(all_flags)) + " animal welfare concern(s) detected."
    )
    if stub_count > 0:
        summary += " (" + str(stub_count) + " signal(s) pending data)"

    return {
        "product":              product_name,
        "category":             category,
        "brand":                brand_name,
        "animal_welfare_score": weighted_score,
        "verdict":              verdict,
        "flags":                all_flags,
        "flag_count":           len(all_flags),
        "signal_scores":        {k: v["score"] for k, v in signal_results.items()},
        "signal_confidence":    {k: v.get("confidence", "LOW") for k, v in signal_results.items()},
        "signal_weights":       weights,
        "signal_breakdown":     signal_results,
        "confidence":           confidence,
        "summary":              summary,
        "timestamp":            datetime.now().isoformat(),
    }


def print_animal_welfare_report(result):
    icons      = {"CLEAN": "GREEN", "ACCEPTABLE": "YELLOW", "CAUTION": "ORANGE", "HIGHER_RISK": "RED"}
    conf_icons = {"HIGH": "HIGH", "MEDIUM": "MED", "LOW": "LOW"}
    verdict    = result["verdict"]

    print("")
    print("=" * 58)
    print("  NOURA ANIMAL WELFARE - " + result["product"].upper())
    print("=" * 58)
    print("  Score:      " + str(result["animal_welfare_score"]) + "/100  [" + verdict + "]")
    print("  Brand:      " + (result["brand"] or "unknown"))
    print("  Category:   " + result["category"])
    print("  Confidence: " + result["confidence"])
    print("")
    print("  Signal breakdown:")
    for signal, score in result["signal_scores"].items():
        weight = result["signal_weights"].get(signal, 0)
        conf   = result["signal_confidence"].get(signal, "LOW")
        stub   = "  [stub]" if conf == "LOW" else ""
        print("    " + signal.ljust(22) + str(score).rjust(3) + "/100  (weight: " + str(round(weight*100)) + "%)" + stub)

    if result["flags"]:
        print("")
        print("  Concerns (" + str(result["flag_count"]) + "):")
        for flag in result["flags"][:6]:
            print("     - " + str(flag)[:80])
        if result["flag_count"] > 6:
            print("     ... and " + str(result["flag_count"] - 6) + " more")
    else:
        print("")
        print("  No animal welfare concerns detected")

    print("=" * 58)
    print("")


# Self-test
print("Running Animal Welfare Engine self-test...")
print("")

t1 = calculate_animal_welfare_score(
    product_name="ELF Hydrating Camo Concealer",
    ingredients=["aqua", "glycerin", "niacinamide", "sodium hyaluronate", "phenoxyethanol", "xanthan gum", "citric acid"],
    category="cosmetics",
    brand_name="elf cosmetics",
)
print_animal_welfare_report(t1)

t2 = calculate_animal_welfare_score(
    product_name="Chanel No5 Body Lotion",
    ingredients=["aqua", "glycerin", "cetearyl alcohol", "lanolin", "beeswax", "fragrance", "carmine"],
    category="cosmetics",
    brand_name="chanel",
)
print_animal_welfare_report(t2)

t3 = calculate_animal_welfare_score(
    product_name="Method All Purpose Cleaner",
    ingredients=["water", "sodium gluconate", "sodium citrate", "glycerin", "methylisothiazolinone"],
    category="cleaning",
    brand_name="method",
)
print_animal_welfare_report(t3)

t4 = calculate_animal_welfare_score(
    product_name="Oatly Oat Drink Organic",
    ingredients=["oats", "water", "rapeseed oil", "calcium carbonate", "salt"],
    category="food",
    brand_name="oatly",
)
print_animal_welfare_report(t4)

t5 = calculate_animal_welfare_score(
    product_name="Generic Chicken Breast",
    ingredients=["chicken"],
    category="food",
    brand_name="generic brand",
)
print_animal_welfare_report(t5)

t6 = calculate_animal_welfare_score(
    product_name="Johnsons Baby Lotion",
    ingredients=["aqua", "glycerin", "lanolin", "beeswax", "phenoxyethanol", "sodium hyaluronate"],
    category="baby",
    brand_name="johnsons",
)
print_animal_welfare_report(t6)

print("Sanity checks:")
print("-" * 45)
assert t1["animal_welfare_score"] > t2["animal_welfare_score"], "ELF should outscore Chanel"
print("  PASS: ELF (" + str(t1["animal_welfare_score"]) + ") scores higher than Chanel (" + str(t2["animal_welfare_score"]) + ")")

assert t4["animal_welfare_score"] > t5["animal_welfare_score"], "Oatly should outscore generic chicken"
print("  PASS: Oatly (" + str(t4["animal_welfare_score"]) + ") scores higher than generic chicken (" + str(t5["animal_welfare_score"]) + ")")

assert t3["animal_welfare_score"] > 70, "Method (Leaping Bunny) should score above 70"
print("  PASS: Method scores " + str(t3["animal_welfare_score"]) + "/100 - above 70")

assert t6["animal_welfare_score"] < t1["animal_welfare_score"], "Baby product with lanolin+beeswax should score lower than ELF"
print("  PASS: Baby product (" + str(t6["animal_welfare_score"]) + ") scores lower than ELF (" + str(t1["animal_welfare_score"]) + ")")

print("")
print("ALL TESTS PASSED - Animal Welfare Engine operational")
"""

with open('/content/noura_animal_welfare_engine.py', 'w', encoding='utf-8') as f:
    f.write(engine_code)

exec(engine_code)
print("Cell 34 complete - engine written and loaded")

ModuleNotFoundError: No module named 'noura_animal_welfare_config'

In [None]:
# NOURA Governance Config - Cell 35

config_code = """
GOVERNANCE_SIGNALS = {
    "cosmetics": {
        "bcorp":          {"weight": 0.30},
        "ownership":      {"weight": 0.30},
        "supply_chain":   {"weight": 0.25},
        "labor":          {"weight": 0.15},
    },
    "cleaning": {
        "bcorp":          {"weight": 0.30},
        "ownership":      {"weight": 0.30},
        "supply_chain":   {"weight": 0.25},
        "labor":          {"weight": 0.15},
    },
    "food": {
        "fair_trade":     {"weight": 0.35},
        "ownership":      {"weight": 0.25},
        "supply_chain":   {"weight": 0.25},
        "labor":          {"weight": 0.15},
    },
    "baby": {
        "bcorp":          {"weight": 0.25},
        "ownership":      {"weight": 0.35},
        "supply_chain":   {"weight": 0.25},
        "labor":          {"weight": 0.15},
    },
}


# B Corp certified brands
# Full B Corp directory: bcorporation.net/directory - 7,000+ companies
# NOURA curated subset of consumer-facing brands in priority markets
# B Corp requires recertification every 3 years - verify at source

B_CORP_BRANDS = {
    # Cosmetics and personal care
    "weleda":             {"certified": True, "score": 100, "since": 2016,
                           "note": "B Corp + NATRUE + Demeter"},
    "dr bronner":         {"certified": True, "score": 100, "since": 2015,
                           "note": "B Corp + Fair Trade + Leaping Bunny"},
    "pai skincare":       {"certified": True, "score": 100, "since": 2019,
                           "note": "B Corp + Leaping Bunny"},
    "bulldog skincare":   {"certified": True, "score": 100, "since": 2019,
                           "note": "B Corp + Leaping Bunny"},
    "green people":       {"certified": True, "score": 100, "since": 2018,
                           "note": "B Corp + Vegan Society + Leaping Bunny"},
    "faith in nature":    {"certified": True, "score": 100, "since": 2021,
                           "note": "B Corp + Vegan Society + Leaping Bunny"},
    "bybi beauty":        {"certified": True, "score": 100, "since": 2020,
                           "note": "B Corp + Vegan Society"},
    "cork colour":        {"certified": True, "score": 100, "since": 2022,
                           "note": "B Corp certified nail brand"},
    "hurraw balm":        {"certified": True, "score": 100, "since": 2018,
                           "note": "B Corp certified lip balm"},
    "meow meow tweet":    {"certified": True, "score": 100, "since": 2019,
                           "note": "B Corp + vegan"},
    # Cleaning
    "method":             {"certified": True, "score": 100, "since": 2016,
                           "note": "B Corp + Leaping Bunny + Cradle to Cradle"},
    "seventh generation": {"certified": True, "score": 100, "since": 2007,
                           "note": "B Corp - one of the original certifieds"},
    "ecover":             {"certified": True, "score": 100, "since": 2020,
                           "note": "B Corp + EU Ecolabel"},
    "bio-d":              {"certified": True, "score": 100, "since": 2018,
                           "note": "B Corp + Leaping Bunny + Vegan Society"},
    "attitude":           {"certified": True, "score": 100, "since": 2021,
                           "note": "B Corp + EWG Verified + Leaping Bunny"},
    "together group":     {"certified": True, "score": 100, "since": 2019,
                           "note": "B Corp cleaning brand"},
    # Food and drink
    "innocent":           {"certified": True, "score": 100, "since": 2018,
                           "note": "B Corp - though majority owned by Coca-Cola"},
    "alpro":              {"certified": True, "score": 100, "since": 2017,
                           "note": "B Corp - owned by Danone"},
    "oatly":              {"certified": False, "score": 70, "since": None,
                           "note": "Not B Corp - IPO 2021, mixed ownership including Blackstone"},
    "tony chocolonely":   {"certified": True, "score": 100, "since": 2016,
                           "note": "B Corp + Fair Trade - mission-led brand"},
    "ben jerrys":         {"certified": True, "score": 100, "since": 2012,
                           "note": "B Corp - owned by Unilever (ownership conflict noted)"},
    "graze":              {"certified": True, "score": 100, "since": 2019,
                           "note": "B Corp snack brand"},
    "pip and nut":        {"certified": True, "score": 100, "since": 2020,
                           "note": "B Corp nut butter brand"},
    # Not certified - major brands
    "loreal":             {"certified": False, "score": 30, "since": None,
                           "note": "Not B Corp - large multinational"},
    "unilever":           {"certified": False, "score": 40, "since": None,
                           "note": "Not B Corp - has sustainability commitments but not certified"},
    "procter gamble":     {"certified": False, "score": 30, "since": None,
                           "note": "Not B Corp"},
    "johnson johnson":    {"certified": False, "score": 35, "since": None,
                           "note": "Not B Corp"},
    "estee lauder":       {"certified": False, "score": 30, "since": None,
                           "note": "Not B Corp"},
    "shiseido":           {"certified": False, "score": 35, "since": None,
                           "note": "Not B Corp"},
    "lvmh":               {"certified": False, "score": 25, "since": None,
                           "note": "Not B Corp - luxury conglomerate"},
}


# Fair Trade certified brands
# Sources: Fairtrade International, Fair for Life (IMO), World Fair Trade Organization
# Fair Trade primarily relevant for food, coffee, cocoa, cotton products

FAIR_TRADE_BRANDS = {
    # Coffee
    "cafedirect":         {"certified": True, "score": 100, "certifier": "Fairtrade",
                           "note": "Pioneer Fair Trade coffee - cooperative owned"},
    "clipper teas":       {"certified": True, "score": 100, "certifier": "Fairtrade",
                           "note": "Fairtrade + organic tea brand"},
    "teapigs":            {"certified": True, "score": 95,  "certifier": "Fairtrade",
                           "note": "Fairtrade certified teas"},
    "equal exchange":     {"certified": True, "score": 100, "certifier": "Fairtrade + WFTO",
                           "note": "Worker-owned cooperative, pioneering Fair Trade"},
    # Chocolate / cocoa
    "tony chocolonely":   {"certified": True, "score": 100, "certifier": "Fairtrade",
                           "note": "Fairtrade + mission to end slavery in chocolate"},
    "divine chocolate":   {"certified": True, "score": 100, "certifier": "Fairtrade",
                           "note": "Cocoa farmer owned - gold standard"},
    "seed and bean":      {"certified": True, "score": 100, "certifier": "Fairtrade",
                           "note": "Fairtrade + organic chocolate"},
    "alter eco":          {"certified": True, "score": 100, "certifier": "Fair for Life",
                           "note": "Fair for Life (IMO) + organic"},
    # Food
    "traidcraft":         {"certified": True, "score": 100, "certifier": "Fairtrade + WFTO",
                           "note": "WFTO member - pioneer Fair Trade"},
    "coop":               {"certified": True, "score": 90,  "certifier": "Fairtrade",
                           "note": "Co-operative Group - large Fairtrade range"},
    "waitrose":           {"certified": True, "score": 85,  "certifier": "Fairtrade",
                           "note": "Fairtrade own-brand range"},
    # Cosmetics with Fair Trade ingredients
    "dr bronner":         {"certified": True, "score": 100, "certifier": "Fair Trade USA",
                           "note": "Fair Trade certified ingredients - coconut oil palm oil"},
    "the body shop":      {"certified": True, "score": 85,  "certifier": "Community Fair Trade",
                           "note": "Community Fair Trade programme - own scheme not Fairtrade Int"},
    # Not certified
    "nestle":             {"certified": False, "score": 10, "certifier": None,
                           "note": "Not Fair Trade - ongoing concerns re cocoa supply chain"},
    "cadbury":            {"certified": False, "score": 20, "certifier": None,
                           "note": "Mondelez - Cocoa Life programme but not Fairtrade certified"},
    "mars":               {"certified": False, "score": 20, "certifier": None,
                           "note": "Not Fair Trade - Sustainable in a Generation plan"},
}


# Corporate ownership map
# This is the most important governance signal - surfaces parent company
# ethics issues that brand-level scoring misses
#
# Score methodology:
#   100 = independent, mission-led, or cooperative-owned
#    80 = independent but conventionally structured
#    60 = owned by mid-tier company with mixed record
#    40 = owned by large conglomerate with some sustainability effort
#    20 = owned by company with significant ethical concerns
#     0 = owned by company with severe ongoing ethical violations
#
# Key cases where brand looks clean but parent is problematic:
#   Burt's Bees -> Clorox (bleach, conventional cleaning)
#   Tom's of Maine -> Colgate-Palmolive (animal testing)
#   The Body Shop -> Natura (was L'Oreal - now better)
#   St. Ives, Simple, REN -> Unilever
#   Dermalogica, Murad, Kate Somerville -> Unilever
#   Garnier, NYX, IT Cosmetics -> L'Oreal
#   Too Faced, Urban Decay -> L'Oreal (LVMH acquisition)
#   Drunk Elephant -> Shiseido
#   Tatcha -> Unilever
#   Farmacy -> P&G
#   Versed -> Church & Dwight
#   Yes To -> private equity

CORPORATE_OWNERSHIP = {
    # Truly independent / mission-led / cooperative
    "dr bronner":         {"owner": "Family-owned B Corp",            "owner_score": 100,
                           "independent": True,
                           "note": "Employee-owned, mission-led, pioneered Fair Trade cosmetics"},
    "weleda":             {"owner": "Anthroposophical foundation",     "owner_score": 100,
                           "independent": True,
                           "note": "Non-profit foundation ownership - mission-led since 1921"},
    "pai skincare":       {"owner": "Independent B Corp",             "owner_score": 100,
                           "independent": True,
                           "note": "Founder-led, B Corp certified"},
    "lush":               {"owner": "Employee-owned",                  "owner_score": 95,
                           "independent": True,
                           "note": "Majority employee-owned - founder retains stake"},
    "faith in nature":    {"owner": "Independent B Corp",             "owner_score": 100,
                           "independent": True,
                           "note": "Independent UK brand - B Corp certified"},
    "green people":       {"owner": "Independent B Corp",             "owner_score": 100,
                           "independent": True,
                           "note": "Founder-owned - B Corp certified"},
    "method":             {"owner": "SC Johnson (subsidiary)",        "owner_score": 55,
                           "independent": False,
                           "note": "Acquired by SC Johnson 2017 - SC Johnson is privately held, moderate record"},
    "seventh generation": {"owner": "Unilever (subsidiary)",          "owner_score": 45,
                           "independent": False,
                           "note": "Acquired by Unilever 2016 - maintains B Corp but parent not certified"},
    "ecover":             {"owner": "SC Johnson (subsidiary)",        "owner_score": 55,
                           "independent": False,
                           "note": "Acquired by SC Johnson - maintains B Corp certification"},
    "the body shop":      {"owner": "Natura and Co",                  "owner_score": 70,
                           "independent": False,
                           "note": "Acquired by Natura 2017 (from L'Oreal) - Natura is B Corp certified"},
    "burts bees":         {"owner": "Clorox Company",                 "owner_score": 30,
                           "independent": False,
                           "note": "Acquired by Clorox 2007 - Clorox makes conventional bleach and chemicals"},
    "toms of maine":      {"owner": "Colgate-Palmolive",              "owner_score": 25,
                           "independent": False,
                           "note": "Acquired by Colgate-Palmolive 2006 - parent tests on animals"},
    "st ives":            {"owner": "Unilever",                       "owner_score": 40,
                           "independent": False,
                           "note": "Unilever brand"},
    "simple":             {"owner": "Unilever",                       "owner_score": 40,
                           "independent": False,
                           "note": "Unilever brand"},
    "ren skincare":       {"owner": "Unilever",                       "owner_score": 40,
                           "independent": False,
                           "note": "Acquired by Unilever 2015"},
    "dermalogica":        {"owner": "Unilever",                       "owner_score": 40,
                           "independent": False,
                           "note": "Acquired by Unilever 2015"},
    "murad":              {"owner": "Unilever",                       "owner_score": 40,
                           "independent": False,
                           "note": "Unilever prestige brand"},
    "kate somerville":    {"owner": "Unilever",                       "owner_score": 40,
                           "independent": False,
                           "note": "Unilever prestige brand"},
    "tatcha":             {"owner": "Unilever",                       "owner_score": 40,
                           "independent": False,
                           "note": "Acquired by Unilever 2019"},
    "drunk elephant":     {"owner": "Shiseido",                       "owner_score": 40,
                           "independent": False,
                           "note": "Acquired by Shiseido 2019 for 845M USD"},
    "nyx professional":   {"owner": "L'Oreal",                       "owner_score": 35,
                           "independent": False,
                           "note": "Acquired by L'Oreal 2014"},
    "urban decay":        {"owner": "L'Oreal",                       "owner_score": 35,
                           "independent": False,
                           "note": "Acquired by L'Oreal 2012 - maintains Leaping Bunny cert"},
    "too faced":          {"owner": "L'Oreal (via Luxe)",            "owner_score": 35,
                           "independent": False,
                           "note": "Acquired by Estee Lauder 2016 then L'Oreal"},
    "garnier":            {"owner": "L'Oreal",                       "owner_score": 35,
                           "independent": False,
                           "note": "L'Oreal brand since 1965"},
    "cerave":             {"owner": "L'Oreal",                       "owner_score": 35,
                           "independent": False,
                           "note": "Acquired by L'Oreal 2017 for 1.3B USD"},
    "la roche-posay":     {"owner": "L'Oreal",                       "owner_score": 35,
                           "independent": False,
                           "note": "L'Oreal Active Cosmetics division"},
    "kiehl's":           {"owner": "L'Oreal",                       "owner_score": 35,
                           "independent": False,
                           "note": "Acquired by L'Oreal 2000"},
    "the ordinary":       {"owner": "DECIEM (Estee Lauder majority)", "owner_score": 40,
                           "independent": False,
                           "note": "Estee Lauder acquired majority stake in DECIEM 2021"},
    "mac":                {"owner": "Estee Lauder",                   "owner_score": 35,
                           "independent": False,
                           "note": "Estee Lauder brand since 1994"},
    "clinique":           {"owner": "Estee Lauder",                   "owner_score": 35,
                           "independent": False,
                           "note": "Estee Lauder brand since 1968"},
    "bobbi brown":        {"owner": "Estee Lauder",                   "owner_score": 35,
                           "independent": False,
                           "note": "Estee Lauder brand - founder departed 2016"},
    "jo malone":          {"owner": "Estee Lauder",                   "owner_score": 35,
                           "independent": False,
                           "note": "Acquired by Estee Lauder 1999"},
    "benefit":            {"owner": "LVMH",                           "owner_score": 30,
                           "independent": False,
                           "note": "LVMH brand"},
    "dior beauty":        {"owner": "LVMH",                           "owner_score": 30,
                           "independent": False,
                           "note": "LVMH conglomerate"},
    "fenty beauty":       {"owner": "LVMH",                           "owner_score": 30,
                           "independent": False,
                           "note": "LVMH joint venture with Rihanna"},
    "kylie cosmetics":    {"owner": "Coty (majority)",                "owner_score": 30,
                           "independent": False,
                           "note": "Coty acquired 51% in 2019"},
    "covergirl":          {"owner": "Coty",                           "owner_score": 30,
                           "independent": False,
                           "note": "Acquired by Coty from P&G 2016"},
    "neutrogena":         {"owner": "Johnson and Johnson",            "owner_score": 35,
                           "independent": False,
                           "note": "J&J brand since 1994"},
    "aveeno":             {"owner": "Johnson and Johnson",            "owner_score": 35,
                           "independent": False,
                           "note": "J&J brand"},
    "olay":               {"owner": "Procter and Gamble",             "owner_score": 30,
                           "independent": False,
                           "note": "P&G brand"},
    "sk-ii":              {"owner": "Procter and Gamble",             "owner_score": 30,
                           "independent": False,
                           "note": "P&G prestige brand"},
    "pantene":            {"owner": "Procter and Gamble",             "owner_score": 30,
                           "independent": False,
                           "note": "P&G brand"},
    "head shoulders":     {"owner": "Procter and Gamble",             "owner_score": 30,
                           "independent": False,
                           "note": "P&G brand"},
    "dove":               {"owner": "Unilever",                       "owner_score": 40,
                           "independent": False,
                           "note": "Unilever brand - Real Beauty campaign but parent tests in China"},
    "axe":                {"owner": "Unilever",                       "owner_score": 40,
                           "independent": False,
                           "note": "Unilever brand"},
    "nivea":              {"owner": "Beiersdorf",                     "owner_score": 45,
                           "independent": False,
                           "note": "Beiersdorf - German multinational, moderate sustainability record"},
    "eucerin":            {"owner": "Beiersdorf",                     "owner_score": 45,
                           "independent": False,
                           "note": "Beiersdorf medical brand"},
    # Cleaning
    "fairy":              {"owner": "Procter and Gamble",             "owner_score": 30,
                           "independent": False,
                           "note": "P&G brand"},
    "ariel":              {"owner": "Procter and Gamble",             "owner_score": 30,
                           "independent": False,
                           "note": "P&G brand"},
    "dettol":             {"owner": "Reckitt",                        "owner_score": 35,
                           "independent": False,
                           "note": "Reckitt - has sustainability commitments"},
    "domestos":           {"owner": "Unilever",                       "owner_score": 40,
                           "independent": False,
                           "note": "Unilever cleaning brand"},
    "flash":              {"owner": "Procter and Gamble",             "owner_score": 30,
                           "independent": False,
                           "note": "P&G brand"},
    "cif":                {"owner": "Unilever",                       "owner_score": 40,
                           "independent": False,
                           "note": "Unilever brand"},
    # Food
    "innocent":           {"owner": "Coca-Cola (majority)",           "owner_score": 35,
                           "independent": False,
                           "note": "Coca-Cola acquired 90%+ stake - maintains B Corp but parent controversial"},
    "alpro":              {"owner": "Danone",                         "owner_score": 55,
                           "independent": False,
                           "note": "Danone - B Corp holding company, sustainability commitments"},
    "oatly":              {"owner": "Public (IPO 2021) + Blackstone", "owner_score": 45,
                           "independent": False,
                           "note": "Blackstone minority stake raised concerns re deforestation links"},
    "ben jerrys":         {"owner": "Unilever",                       "owner_score": 40,
                           "independent": False,
                           "note": "Acquired by Unilever 2000 - ongoing tension over brand independence"},
    "tony chocolonely":   {"owner": "Independent (partially)",        "owner_score": 90,
                           "independent": True,
                           "note": "Mission-led - some external investment but retains independence"},
    "divine chocolate":   {"owner": "Cocoa farmer cooperative",       "owner_score": 100,
                           "independent": True,
                           "note": "44% owned by Kuapa Kokoo farmer cooperative in Ghana"},
    "nestle":             {"owner": "Nestle (public)",                "owner_score": 10,
                           "independent": False,
                           "note": "Ongoing concerns: water rights, infant formula, cocoa supply chain"},
    "coca-cola":          {"owner": "Coca-Cola (public)",             "owner_score": 20,
                           "independent": False,
                           "note": "Plastic pollution, water usage concerns"},
    "pepsico":            {"owner": "PepsiCo (public)",               "owner_score": 25,
                           "independent": False,
                           "note": "Some sustainability commitments but large conventional food company"},
}


# Supply chain transparency
# Brands that publicly publish supplier lists, conduct third-party audits,
# and/or pay living wages throughout their supply chain
# Source: Know The Chain benchmarks, Fashion Revolution, B Corp disclosure

SUPPLY_CHAIN_TRANSPARENCY = {
    # High transparency
    "patagonia":          {"score": 100, "publishes_suppliers": True,  "living_wage": True,
                           "third_party_audit": True,
                           "note": "Gold standard - full supplier list, living wage, regenerative"},
    "eileen fisher":      {"score": 95,  "publishes_suppliers": True,  "living_wage": True,
                           "third_party_audit": True,
                           "note": "B Corp - publishes supply chain, take-back programme"},
    "dr bronner":         {"score": 95,  "publishes_suppliers": True,  "living_wage": True,
                           "third_party_audit": True,
                           "note": "Fair Trade certified ingredients - full traceability"},
    "tony chocolonely":   {"score": 95,  "publishes_suppliers": True,  "living_wage": True,
                           "third_party_audit": True,
                           "note": "Full bean-to-bar traceability, living wage programme"},
    "divine chocolate":   {"score": 100, "publishes_suppliers": True,  "living_wage": True,
                           "third_party_audit": True,
                           "note": "Farmer-owned - complete supply chain visibility"},
    "weleda":             {"score": 90,  "publishes_suppliers": True,  "living_wage": True,
                           "third_party_audit": True,
                           "note": "Biodynamic sourcing partnerships - supplier relationships published"},
    "faith in nature":    {"score": 85,  "publishes_suppliers": False, "living_wage": True,
                           "third_party_audit": True,
                           "note": "B Corp disclosure - living wage committed"},
    "method":             {"score": 80,  "publishes_suppliers": False, "living_wage": True,
                           "third_party_audit": True,
                           "note": "B Corp - Cradle to Cradle certified, SC Johnson parent"},
    "seventh generation": {"score": 80,  "publishes_suppliers": False, "living_wage": False,
                           "third_party_audit": True,
                           "note": "B Corp disclosure but Unilever parent limits visibility"},
    # Medium transparency
    "the body shop":      {"score": 65,  "publishes_suppliers": False, "living_wage": False,
                           "third_party_audit": True,
                           "note": "Community Fair Trade programme - partial traceability"},
    "lush":               {"score": 70,  "publishes_suppliers": False, "living_wage": True,
                           "third_party_audit": False,
                           "note": "Publishes buying policy, Living Wage employer (UK)"},
    "the ordinary":       {"score": 50,  "publishes_suppliers": False, "living_wage": False,
                           "third_party_audit": False,
                           "note": "DECIEM - limited supply chain disclosure"},
    "cerave":             {"score": 40,  "publishes_suppliers": False, "living_wage": False,
                           "third_party_audit": False,
                           "note": "L'Oreal parent - some group-level disclosure"},
    # Low transparency
    "kylie cosmetics":    {"score": 20,  "publishes_suppliers": False, "living_wage": False,
                           "third_party_audit": False,
                           "note": "Minimal supply chain disclosure"},
    "shein":              {"score": 5,   "publishes_suppliers": False, "living_wage": False,
                           "third_party_audit": False,
                           "note": "Significant concerns re labor conditions"},
}


def get_governance_signals(category):
    return GOVERNANCE_SIGNALS.get(category, GOVERNANCE_SIGNALS["cosmetics"])


def check_bcorp(brand):
    name = brand.lower().strip()
    for key, val in B_CORP_BRANDS.items():
        if key in name or name in key:
            return {"brand": brand, **val}
    return {"brand": brand, "certified": False, "score": 50, "since": None,
            "note": "Not in B Corp database - status unknown"}


def check_fair_trade(brand):
    name = brand.lower().strip()
    for key, val in FAIR_TRADE_BRANDS.items():
        if key in name or name in key:
            return {"brand": brand, **val}
    return {"brand": brand, "certified": False, "score": 50, "certifier": None,
            "note": "No Fair Trade certification found"}


def check_ownership(brand):
    name = brand.lower().strip()
    for key, val in CORPORATE_OWNERSHIP.items():
        if key in name or name in key:
            return {"brand": brand, **val}
    return {"brand": brand, "owner": "Unknown", "owner_score": 60,
            "independent": None,
            "note": "Ownership not in database - neutral score applied"}


def check_supply_chain(brand):
    name = brand.lower().strip()
    for key, val in SUPPLY_CHAIN_TRANSPARENCY.items():
        if key in name or name in key:
            return {"brand": brand, **val}
    return {"brand": brand, "score": 50, "publishes_suppliers": None,
            "living_wage": None, "third_party_audit": None,
            "note": "Supply chain transparency not in database - neutral score"}


print("Config loaded")
print("  B Corp brands:           " + str(len(B_CORP_BRANDS)))
print("  Fair Trade brands:       " + str(len(FAIR_TRADE_BRANDS)))
print("  Ownership records:       " + str(len(CORPORATE_OWNERSHIP)))
print("  Supply chain records:    " + str(len(SUPPLY_CHAIN_TRANSPARENCY)))
"""

with open('/content/noura_governance_config.py', 'w', encoding='utf-8') as f:
    f.write(config_code)

exec(config_code)
print("Cell 35 complete - governance config written and loaded")

Config loaded
  B Corp brands:           30
  Fair Trade brands:       16
  Ownership records:       62
  Supply chain records:    15
Cell 35 complete - governance config written and loaded


In [None]:
# NOURA Governance Engine - Cell 36

engine_code = """
import sys
from datetime import datetime

sys.path.insert(0, '/content')

from noura_governance_config import (
    GOVERNANCE_SIGNALS,
    get_governance_signals,
    check_bcorp,
    check_fair_trade,
    check_ownership,
    check_supply_chain,
)


def _score_bcorp(brand_name):
    if not brand_name:
        return {"signal": "bcorp", "score": 50, "confidence": "LOW",
                "flags": [], "note": "No brand name provided"}
    data  = check_bcorp(brand_name)
    score = data.get("score", 50)
    flags = []
    if data.get("certified"):
        note = brand_name + " is B Corp certified (since " + str(data.get("since", "unknown")) + ")"
        if data.get("note"):
            note += " - " + data["note"]
    else:
        note  = data.get("note", "Not B Corp certified")
        if score < 40:
            flags.append(brand_name + ": " + note)
    return {
        "signal":    "bcorp",
        "score":     score,
        "confidence": "HIGH",
        "certified": data.get("certified", False),
        "flags":     flags,
        "note":      note,
    }


def _score_fair_trade(brand_name):
    if not brand_name:
        return {"signal": "fair_trade", "score": 50, "confidence": "LOW",
                "flags": [], "note": "No brand name provided"}
    data  = check_fair_trade(brand_name)
    score = data.get("score", 50)
    flags = []
    if data.get("certified"):
        note = brand_name + " Fair Trade certified - " + str(data.get("certifier", ""))
    else:
        note = data.get("note", "No Fair Trade certification")
        if score < 30:
            flags.append(brand_name + ": " + note)
    return {
        "signal":    "fair_trade",
        "score":     score,
        "confidence": "HIGH",
        "certified": data.get("certified", False),
        "certifier": data.get("certifier"),
        "flags":     flags,
        "note":      note,
    }


def _score_ownership(brand_name):
    if not brand_name:
        return {"signal": "ownership", "score": 60, "confidence": "LOW",
                "flags": [], "note": "No brand name provided"}
    data  = check_ownership(brand_name)
    score = data.get("owner_score", 60)
    flags = []
    note  = data.get("note", "")
    owner = data.get("owner", "Unknown")
    if not data.get("independent") and score < 50:
        flags.append("Owned by " + owner + " - " + note)
    return {
        "signal":      "ownership",
        "score":       score,
        "confidence":  "HIGH" if data.get("owner") != "Unknown" else "LOW",
        "owner":       owner,
        "independent": data.get("independent"),
        "flags":       flags,
        "note":        owner + " - " + note if note else owner,
    }


def _score_supply_chain(brand_name):
    if not brand_name:
        return {"signal": "supply_chain", "score": 50, "confidence": "LOW",
                "flags": [], "note": "No brand name provided"}
    data  = check_supply_chain(brand_name)
    score = data.get("score", 50)
    flags = []
    positives = []
    if data.get("publishes_suppliers"):
        positives.append("publishes supplier list")
    if data.get("living_wage"):
        positives.append("living wage commitment")
    if data.get("third_party_audit"):
        positives.append("third-party audited")
    if score < 40:
        flags.append(brand_name + ": limited supply chain transparency")
    note = data.get("note", "No supply chain data")
    if positives:
        note = ", ".join(positives) + " - " + note
    return {
        "signal":            "supply_chain",
        "score":             score,
        "confidence":        "HIGH" if data.get("score") else "LOW",
        "publishes_suppliers": data.get("publishes_suppliers"),
        "living_wage":       data.get("living_wage"),
        "third_party_audit": data.get("third_party_audit"),
        "flags":             flags,
        "note":              note,
    }


def _score_labor_stub():
    return {
        "signal":     "labor",
        "score":      65,
        "confidence": "LOW",
        "flags":      [],
        "note":       "Labor practices - ILO compliance data pending (Week 14 stub)",
    }


def calculate_governance_score(product_name, ingredients, category="cosmetics", brand_name=""):
    print("  Analysing governance: " + product_name)
    print("     Category: " + category + " | Brand: " + (brand_name or "unknown"))

    if category not in GOVERNANCE_SIGNALS:
        category = "cosmetics"

    signal_results = {}

    if category in ("cosmetics", "cleaning", "baby"):
        print("     B Corp...", end=" ")
        signal_results["bcorp"]        = _score_bcorp(brand_name)
        print("score: " + str(signal_results["bcorp"]["score"]) +
              (" (certified)" if signal_results["bcorp"].get("certified") else ""))

        print("     Ownership...", end=" ")
        signal_results["ownership"]    = _score_ownership(brand_name)
        print("score: " + str(signal_results["ownership"]["score"]) +
              " (" + signal_results["ownership"].get("owner", "unknown") + ")")

        print("     Supply chain...", end=" ")
        signal_results["supply_chain"] = _score_supply_chain(brand_name)
        print("score: " + str(signal_results["supply_chain"]["score"]))

        print("     Labor...", end=" ")
        signal_results["labor"]        = _score_labor_stub()
        print("score: " + str(signal_results["labor"]["score"]) + " (stub)")

        if category == "baby":
            weights = {"bcorp": 0.25, "ownership": 0.35, "supply_chain": 0.25, "labor": 0.15}
        else:
            weights = {"bcorp": 0.30, "ownership": 0.30, "supply_chain": 0.25, "labor": 0.15}

    elif category == "food":
        print("     Fair Trade...", end=" ")
        signal_results["fair_trade"]   = _score_fair_trade(brand_name)
        print("score: " + str(signal_results["fair_trade"]["score"]) +
              (" (certified)" if signal_results["fair_trade"].get("certified") else ""))

        print("     Ownership...", end=" ")
        signal_results["ownership"]    = _score_ownership(brand_name)
        print("score: " + str(signal_results["ownership"]["score"]) +
              " (" + signal_results["ownership"].get("owner", "unknown") + ")")

        print("     Supply chain...", end=" ")
        signal_results["supply_chain"] = _score_supply_chain(brand_name)
        print("score: " + str(signal_results["supply_chain"]["score"]))

        print("     Labor...", end=" ")
        signal_results["labor"]        = _score_labor_stub()
        print("score: " + str(signal_results["labor"]["score"]) + " (stub)")

        weights = {"fair_trade": 0.35, "ownership": 0.25, "supply_chain": 0.25, "labor": 0.15}

    weighted_score = round(sum(
        signal_results[sig]["score"] * weights.get(sig, 0)
        for sig in signal_results if sig in weights
    ))

    all_flags = []
    seen = set()
    for sr in signal_results.values():
        for flag in sr.get("flags", []):
            key = str(flag)[:60].lower()
            if key not in seen:
                seen.add(key)
                all_flags.append(flag)

    high_conf = sum(1 for sr in signal_results.values() if sr.get("confidence") in ("HIGH", "MEDIUM"))
    total_sig = len(signal_results)
    if high_conf >= total_sig * 0.75:   confidence = "HIGH"
    elif high_conf >= total_sig * 0.40: confidence = "MEDIUM"
    else:                               confidence = "LOW"

    if weighted_score >= 80:   verdict = "CLEAN"
    elif weighted_score >= 60: verdict = "ACCEPTABLE"
    elif weighted_score >= 40: verdict = "CAUTION"
    else:                      verdict = "HIGHER_RISK"

    stub_count = sum(1 for sr in signal_results.values() if sr.get("confidence") == "LOW")
    summary = (
        "No governance concerns detected." if not all_flags
        else str(len(all_flags)) + " governance concern(s) detected."
    )
    if stub_count > 0:
        summary += " (" + str(stub_count) + " signal(s) pending data)"

    return {
        "product":          product_name,
        "category":         category,
        "brand":            brand_name,
        "governance_score": weighted_score,
        "verdict":          verdict,
        "flags":            all_flags,
        "flag_count":       len(all_flags),
        "signal_scores":    {k: v["score"] for k, v in signal_results.items()},
        "signal_confidence":{k: v.get("confidence", "LOW") for k, v in signal_results.items()},
        "signal_weights":   weights,
        "signal_breakdown": signal_results,
        "confidence":       confidence,
        "summary":          summary,
        "timestamp":        datetime.now().isoformat(),
    }


def print_governance_report(result):
    print("")
    print("=" * 58)
    print("  NOURA GOVERNANCE - " + result["product"].upper())
    print("=" * 58)
    print("  Score:      " + str(result["governance_score"]) + "/100  [" + result["verdict"] + "]")
    print("  Brand:      " + (result["brand"] or "unknown"))
    print("  Category:   " + result["category"])
    print("  Confidence: " + result["confidence"])
    print("")
    print("  Signal breakdown:")
    for signal, score in result["signal_scores"].items():
        weight = result["signal_weights"].get(signal, 0)
        conf   = result["signal_confidence"].get(signal, "LOW")
        stub   = "  [stub]" if conf == "LOW" else ""
        owner  = ""
        if signal == "ownership":
            owner = " (" + result["signal_breakdown"]["ownership"].get("owner", "") + ")"
        print("    " + signal.ljust(22) + str(score).rjust(3) + "/100  (weight: " + str(round(weight*100)) + "%)" + owner + stub)

    if result["flags"]:
        print("")
        print("  Concerns (" + str(result["flag_count"]) + "):")
        for flag in result["flags"][:5]:
            print("     - " + str(flag)[:80])
    else:
        print("")
        print("  No governance concerns detected")
    print("=" * 58)
    print("")


# Self-test
print("Running Governance Engine self-test...")
print("")

t1 = calculate_governance_score(
    product_name="Dr Bronner Pure Castile Soap",
    ingredients=["water", "coconut oil", "potassium hydroxide", "glycerin"],
    category="cleaning",
    brand_name="dr bronner",
)
print_governance_report(t1)

t2 = calculate_governance_score(
    product_name="CeraVe Moisturising Cream",
    ingredients=["aqua", "glycerin", "cetearyl alcohol", "petrolatum", "dimethicone"],
    category="cosmetics",
    brand_name="cerave",
)
print_governance_report(t2)

t3 = calculate_governance_score(
    product_name="Burts Bees Lip Balm",
    ingredients=["cera alba", "cocos nucifera oil", "lanolin"],
    category="cosmetics",
    brand_name="burts bees",
)
print_governance_report(t3)

t4 = calculate_governance_score(
    product_name="Tony Chocolonely Dark Chocolate",
    ingredients=["cocoa mass", "sugar", "cocoa butter"],
    category="food",
    brand_name="tony chocolonely",
)
print_governance_report(t4)

t5 = calculate_governance_score(
    product_name="Innocent Smoothie",
    ingredients=["apple", "banana", "mango"],
    category="food",
    brand_name="innocent",
)
print_governance_report(t5)

t6 = calculate_governance_score(
    product_name="Weleda Skin Food",
    ingredients=["aqua", "glycerin", "lanolin", "beeswax"],
    category="cosmetics",
    brand_name="weleda",
)
print_governance_report(t6)

print("Sanity checks:")
print("-" * 45)
assert t1["governance_score"] > t2["governance_score"], "Dr Bronner (B Corp + independent) should outscore CeraVe (L'Oreal)"
print("  PASS: Dr Bronner (" + str(t1["governance_score"]) + ") > CeraVe (" + str(t2["governance_score"]) + ")")

assert t4["governance_score"] > t5["governance_score"], "Tony Chocolonely (Fair Trade + independent) should outscore Innocent (Coca-Cola owned)"
print("  PASS: Tony Chocolonely (" + str(t4["governance_score"]) + ") > Innocent (" + str(t5["governance_score"]) + ")")

assert t3["governance_score"] < t6["governance_score"], "Burts Bees (Clorox owned) should score lower than Weleda (foundation owned)"
print("  PASS: Burts Bees (" + str(t3["governance_score"]) + ") < Weleda (" + str(t6["governance_score"]) + ")")

assert t1["governance_score"] > 75, "Dr Bronner should score above 75"
print("  PASS: Dr Bronner scores " + str(t1["governance_score"]) + "/100 - above 75")

print("")
print("ALL TESTS PASSED - Governance Engine operational")
"""

with open('/content/noura_governance_engine.py', 'w', encoding='utf-8') as f:
    f.write(engine_code)

exec(engine_code)
print("Cell 36 complete - governance engine written and loaded")

Config loaded
  B Corp brands:           30
  Fair Trade brands:       16
  Ownership records:       62
  Supply chain records:    15
Running Governance Engine self-test...

  Analysing governance: Dr Bronner Pure Castile Soap
     Category: cleaning | Brand: dr bronner
     B Corp... score: 100 (certified)
     Ownership... score: 100 (Family-owned B Corp)
     Supply chain... score: 95
     Labor... score: 65 (stub)

  NOURA GOVERNANCE - DR BRONNER PURE CASTILE SOAP
  Score:      94/100  [CLEAN]
  Brand:      dr bronner
  Category:   cleaning
  Confidence: HIGH

  Signal breakdown:
    bcorp                 100/100  (weight: 30%)
    ownership             100/100  (weight: 30%) (Family-owned B Corp)
    supply_chain           95/100  (weight: 25%)
    labor                  65/100  (weight: 15%)  [stub]

  No governance concerns detected

  Analysing governance: CeraVe Moisturising Cream
     Category: cosmetics | Brand: cerave
     B Corp... score: 50
     Ownership... score: 35 (L'

In [None]:
# NOURA Master Aggregator - Cell 37
# Dimension weights: Health 50%, Environment 25%, Animals 10%, Governance 15%

aggregator_code = """
import sys
from datetime import datetime

sys.path.insert(0, '/content')

# Dimension weights
DIMENSION_WEIGHTS = {
    "health":         0.50,
    "environment":    0.25,
    "animal_welfare": 0.10,
    "governance":     0.15,
}

# Verdict bands (final NOURA score)
VERDICT_BANDS = [
    (85, 100, "EXCELLENT",   "Meets highest standards across all dimensions"),
    (70,  84, "GOOD",        "Strong performance with minor concerns"),
    (55,  69, "ACCEPTABLE",  "Acceptable but notable trade-offs present"),
    (40,  54, "CAUTION",     "Significant concerns in one or more dimensions"),
    ( 0,  39, "HIGHER_RISK", "Serious concerns - review before purchase"),
]

# Confidence penalty - low confidence signals drag overall confidence down
CONFIDENCE_MAP = {"HIGH": 1.0, "MEDIUM": 0.7, "LOW": 0.4}


def _get_verdict(score):
    for low, high, verdict, description in VERDICT_BANDS:
        if low <= score <= high:
            return verdict, description
    return "UNKNOWN", ""


def _aggregate_confidence(dimension_results):
    total_weight = 0
    weighted_conf = 0
    for dim, weight in DIMENSION_WEIGHTS.items():
        if dim in dimension_results:
            conf_str  = dimension_results[dim].get("confidence", "LOW")
            conf_val  = CONFIDENCE_MAP.get(conf_str, 0.4)
            weighted_conf += conf_val * weight
            total_weight  += weight
    if total_weight == 0:
        return "LOW"
    avg = weighted_conf / total_weight
    if avg >= 0.80: return "HIGH"
    if avg >= 0.55: return "MEDIUM"
    return "LOW"


def calculate_noura_score(
    product_name,
    category,
    brand_name      = "",
    health_result   = None,
    env_result      = None,
    animal_result   = None,
    gov_result      = None,
):
    print("  Calculating NOURA score: " + product_name)

    dimension_results  = {}
    dimension_scores   = {}
    dimension_verdicts = {}
    all_flags          = []
    seen_flags         = set()

    # Health
    if health_result:
        h_score = health_result.get("health_score", health_result.get("score", 50))
        dimension_results["health"]  = health_result
        dimension_scores["health"]   = h_score
        dimension_verdicts["health"] = health_result.get("verdict", "UNKNOWN")
        for flag in health_result.get("flags", []):
            k = str(flag)[:60].lower()
            if k not in seen_flags:
                seen_flags.add(k)
                all_flags.append({"dimension": "health", "flag": str(flag)})
    else:
        dimension_scores["health"]   = 50
        dimension_verdicts["health"] = "NO_DATA"
        dimension_results["health"]  = {"confidence": "LOW"}

    # Environment
    if env_result:
        e_score = env_result.get("environment_score", env_result.get("score", 50))
        dimension_results["environment"]  = env_result
        dimension_scores["environment"]   = e_score
        dimension_verdicts["environment"] = env_result.get("verdict", "UNKNOWN")
        for flag in env_result.get("flags", []):
            k = str(flag)[:60].lower()
            if k not in seen_flags:
                seen_flags.add(k)
                all_flags.append({"dimension": "environment", "flag": str(flag)})
    else:
        dimension_scores["environment"]   = 50
        dimension_verdicts["environment"] = "NO_DATA"
        dimension_results["environment"]  = {"confidence": "LOW"}

    # Animal welfare
    if animal_result:
        a_score = animal_result.get("animal_welfare_score", animal_result.get("score", 50))
        dimension_results["animal_welfare"]  = animal_result
        dimension_scores["animal_welfare"]   = a_score
        dimension_verdicts["animal_welfare"] = animal_result.get("verdict", "UNKNOWN")
        for flag in animal_result.get("flags", []):
            k = str(flag)[:60].lower()
            if k not in seen_flags:
                seen_flags.add(k)
                all_flags.append({"dimension": "animal_welfare", "flag": str(flag)})
    else:
        dimension_scores["animal_welfare"]   = 50
        dimension_verdicts["animal_welfare"] = "NO_DATA"
        dimension_results["animal_welfare"]  = {"confidence": "LOW"}

    # Governance
    if gov_result:
        g_score = gov_result.get("governance_score", gov_result.get("score", 50))
        dimension_results["governance"]  = gov_result
        dimension_scores["governance"]   = g_score
        dimension_verdicts["governance"] = gov_result.get("verdict", "UNKNOWN")
        for flag in gov_result.get("flags", []):
            k = str(flag)[:60].lower()
            if k not in seen_flags:
                seen_flags.add(k)
                all_flags.append({"dimension": "governance", "flag": str(flag)})
    else:
        dimension_scores["governance"]   = 50
        dimension_verdicts["governance"] = "NO_DATA"
        dimension_results["governance"]  = {"confidence": "LOW"}

    # Weighted NOURA score
    noura_score = round(sum(
        dimension_scores[dim] * DIMENSION_WEIGHTS[dim]
        for dim in DIMENSION_WEIGHTS
    ))

    verdict, description = _get_verdict(noura_score)
    confidence           = _aggregate_confidence(dimension_results)

    # Governance reliability check
    gov_score     = dimension_scores.get("governance", 50)
    gov_conf      = dimension_results.get("governance", {}).get("confidence", "LOW")
    gov_reliable  = gov_conf in ("HIGH", "MEDIUM") and gov_score != 50

    # Dimension contribution breakdown
    contributions = {
        dim: round(dimension_scores[dim] * DIMENSION_WEIGHTS[dim], 1)
        for dim in DIMENSION_WEIGHTS
    }

    print("     Health:      " + str(dimension_scores["health"]) + "/100")
    print("     Environment: " + str(dimension_scores["environment"]) + "/100")
    print("     Animals:     " + str(dimension_scores["animal_welfare"]) + "/100")
    print("     Governance:  " + str(dimension_scores["governance"]) + "/100")
    print("     NOURA score: " + str(noura_score) + "/100  [" + verdict + "]")

    return {
        "product":            product_name,
        "category":           category,
        "brand":              brand_name,
        "noura_score":        noura_score,
        "verdict":            verdict,
        "verdict_description":description,
        "dimension_scores":   dimension_scores,
        "dimension_verdicts": dimension_verdicts,
        "dimension_weights":  DIMENSION_WEIGHTS,
        "contributions":      contributions,
        "flags":              all_flags,
        "flag_count":         len(all_flags),
        "confidence":         confidence,
        "gov_reliable":       gov_reliable,
        "timestamp":          datetime.now().isoformat(),
    }


def print_noura_report(result):
    score   = result["noura_score"]
    verdict = result["verdict"]

    if score >= 85:   bar_char = "EXCELLENT"
    elif score >= 70: bar_char = "GOOD     "
    elif score >= 55: bar_char = "MODERATE "
    elif score >= 40: bar_char = "CAUTION  "
    else:             bar_char = "HIGH RISK"

    bar_filled = round(score / 5)
    bar        = "#" * bar_filled + "-" * (20 - bar_filled)

    print("")
    print("=" * 62)
    print("  NOURA SCORE - " + result["product"].upper())
    print("=" * 62)
    print("  [" + bar + "] " + str(score) + "/100")
    print("  Verdict:    " + verdict + " - " + result["verdict_description"])
    print("  Brand:      " + (result["brand"] or "unknown"))
    print("  Category:   " + result["category"])
    print("  Confidence: " + result["confidence"])
    print("")
    print("  Dimension breakdown:")
    dims = [
        ("Health",      "health",         "50%"),
        ("Environment", "environment",    "25%"),
        ("Animals",     "animal_welfare", "10%"),
        ("Governance",  "governance",     "15%"),
    ]
    for label, key, weight in dims:
        s    = result["dimension_scores"][key]
        v    = result["dimension_verdicts"][key]
        cont = result["contributions"][key]
        bar2 = "#" * round(s / 10) + "-" * (10 - round(s / 10))
        print("    " + label.ljust(12) + " [" + bar2 + "] " +
              str(s).rjust(3) + "/100  (" + weight + " -> +" + str(cont) + "pts)  " + v)

    if result["flags"]:
        print("")
        print("  Top concerns (" + str(result["flag_count"]) + " total):")
        shown = {}
        count = 0
        for f in result["flags"]:
            dim = f["dimension"]
            if dim not in shown:
                shown[dim] = 0
            if shown[dim] < 2 and count < 6:
                print("     [" + dim.upper()[:3] + "] " + str(f["flag"])[:72])
                shown[dim] += 1
                count += 1
    else:
        print("")
        print("  No concerns flagged across all dimensions")

    if not result["gov_reliable"]:
        print("")
        print("  Note: Governance score neutral (50) - brand not in ownership database")

    print("=" * 62)
    print("")


print("Master aggregator loaded")
print("Weights: Health=" + str(int(DIMENSION_WEIGHTS["health"]*100)) + "% " +
      "Environment=" + str(int(DIMENSION_WEIGHTS["environment"]*100)) + "% " +
      "Animals=" + str(int(DIMENSION_WEIGHTS["animal_welfare"]*100)) + "% " +
      "Governance=" + str(int(DIMENSION_WEIGHTS["governance"]*100)) + "%")
"""

with open('/content/noura_aggregator.py', 'w', encoding='utf-8') as f:
    f.write(aggregator_code)

exec(aggregator_code)
print("Cell 37 complete - master aggregator written and loaded")

Master aggregator loaded
Weights: Health=50% Environment=25% Animals=10% Governance=15%
Cell 37 complete - master aggregator written and loaded


In [None]:
# NOURA 5-Product Validation - Cell 38
# Tests master aggregator across all 4 dimensions on 5 real products
# Also validates governance weight reliability

validation_code = """
import sys
sys.path.insert(0, '/content')

from noura_animal_welfare_engine import calculate_animal_welfare_score
from noura_governance_engine import calculate_governance_score

# Import aggregator
exec(open('/content/noura_aggregator.py').read())

print("=" * 62)
print("  NOURA 5-PRODUCT VALIDATION")
print("  Weights: Health 50% | Env 25% | Animals 10% | Gov 15%")
print("=" * 62)
print("")

# -- PRODUCT 1: Dr Bronner Pure Castile Soap ------------------
# Expected: very high score - certified across all dimensions
print("PRODUCT 1: Dr Bronner Pure Castile Soap (cleaning)")
print("-" * 50)

p1_animal = calculate_animal_welfare_score(
    product_name="Dr Bronner Pure Castile Soap",
    ingredients=["water", "coconut oil", "potassium hydroxide", "glycerin", "hemp oil"],
    category="cleaning",
    brand_name="dr bronner",
)
p1_gov = calculate_governance_score(
    product_name="Dr Bronner Pure Castile Soap",
    ingredients=["water", "coconut oil", "potassium hydroxide", "glycerin", "hemp oil"],
    category="cleaning",
    brand_name="dr bronner",
)
p1 = calculate_noura_score(
    product_name="Dr Bronner Pure Castile Soap",
    category="cleaning",
    brand_name="dr bronner",
    health_result   ={"health_score": 88, "verdict": "CLEAN",      "confidence": "HIGH", "flags": [], "score": 88},
    env_result      ={"environment_score": 85, "verdict": "CLEAN", "confidence": "HIGH", "flags": [], "score": 85},
    animal_result   =p1_animal,
    gov_result      =p1_gov,
)
print_noura_report(p1)

# -- PRODUCT 2: CeraVe Moisturising Cream ---------------------
# Expected: moderate score - good health profile but L'Oreal governance drag
print("PRODUCT 2: CeraVe Moisturising Cream (cosmetics)")
print("-" * 50)

p2_animal = calculate_animal_welfare_score(
    product_name="CeraVe Moisturising Cream",
    ingredients=["aqua", "glycerin", "cetearyl alcohol", "petrolatum",
                 "dimethicone", "niacinamide", "sodium hyaluronate"],
    category="cosmetics",
    brand_name="cerave",
)
p2_gov = calculate_governance_score(
    product_name="CeraVe Moisturising Cream",
    ingredients=["aqua", "glycerin", "cetearyl alcohol", "petrolatum", "dimethicone"],
    category="cosmetics",
    brand_name="cerave",
)
p2 = calculate_noura_score(
    product_name="CeraVe Moisturising Cream",
    category="cosmetics",
    brand_name="cerave",
    health_result   ={"health_score": 72, "verdict": "GOOD",        "confidence": "HIGH", "flags": ["petrolatum: petroleum-derived occlusive"], "score": 72},
    env_result      ={"environment_score": 58, "verdict": "ACCEPTABLE", "confidence": "MEDIUM", "flags": ["petrolatum: low biodegradability", "dimethicone: persistent silicone"], "score": 58},
    animal_result   =p2_animal,
    gov_result      =p2_gov,
)
print_noura_report(p2)

# -- PRODUCT 3: Chanel No5 Body Lotion ------------------------
# Expected: low score - China testing, animal ingredients, luxury governance
print("PRODUCT 3: Chanel No5 Body Lotion (cosmetics)")
print("-" * 50)

p3_animal = calculate_animal_welfare_score(
    product_name="Chanel No5 Body Lotion",
    ingredients=["aqua", "glycerin", "cetearyl alcohol", "lanolin",
                 "beeswax", "fragrance", "carmine"],
    category="cosmetics",
    brand_name="chanel",
)
p3_gov = calculate_governance_score(
    product_name="Chanel No5 Body Lotion",
    ingredients=["aqua", "glycerin", "cetearyl alcohol", "lanolin", "beeswax"],
    category="cosmetics",
    brand_name="chanel",
)
p3 = calculate_noura_score(
    product_name="Chanel No5 Body Lotion",
    category="cosmetics",
    brand_name="chanel",
    health_result   ={"health_score": 61, "verdict": "ACCEPTABLE", "confidence": "HIGH", "flags": ["fragrance: undisclosed allergens", "lanolin: potential allergen"], "score": 61},
    env_result      ={"environment_score": 45, "verdict": "CAUTION", "confidence": "MEDIUM", "flags": ["fragrance: synthetic VOC risk", "packaging: non-recyclable"], "score": 45},
    animal_result   =p3_animal,
    gov_result      =p3_gov,
)
print_noura_report(p3)

# -- PRODUCT 4: Oatly Oat Drink -------------------------------
# Expected: high score - vegan, plant-based, B Corp, but Blackstone ownership drag
print("PRODUCT 4: Oatly Oat Drink Organic (food)")
print("-" * 50)

p4_animal = calculate_animal_welfare_score(
    product_name="Oatly Oat Drink Organic",
    ingredients=["oats", "water", "rapeseed oil", "calcium carbonate", "salt"],
    category="food",
    brand_name="oatly",
)
p4_gov = calculate_governance_score(
    product_name="Oatly Oat Drink Organic",
    ingredients=["oats", "water", "rapeseed oil", "calcium carbonate", "salt"],
    category="food",
    brand_name="oatly",
)
p4 = calculate_noura_score(
    product_name="Oatly Oat Drink Organic",
    category="food",
    brand_name="oatly",
    health_result   ={"health_score": 80, "verdict": "GOOD",  "confidence": "HIGH", "flags": [], "score": 80},
    env_result      ={"environment_score": 82, "verdict": "GOOD", "confidence": "HIGH", "flags": [], "score": 82},
    animal_result   =p4_animal,
    gov_result      =p4_gov,
)
print_noura_report(p4)

# -- PRODUCT 5: Tony Chocolonely Dark Chocolate ---------------
# Expected: highest food score - Fair Trade, mission-led, full traceability
print("PRODUCT 5: Tony Chocolonely Dark Chocolate (food)")
print("-" * 50)

p5_animal = calculate_animal_welfare_score(
    product_name="Tony Chocolonely Dark Chocolate",
    ingredients=["cocoa mass", "sugar", "cocoa butter", "vanilla"],
    category="food",
    brand_name="tony chocolonely",
)
p5_gov = calculate_governance_score(
    product_name="Tony Chocolonely Dark Chocolate",
    ingredients=["cocoa mass", "sugar", "cocoa butter", "vanilla"],
    category="food",
    brand_name="tony chocolonely",
)
p5 = calculate_noura_score(
    product_name="Tony Chocolonely Dark Chocolate",
    category="food",
    brand_name="tony chocolonely",
    health_result   ={"health_score": 74, "verdict": "GOOD",  "confidence": "HIGH", "flags": ["sugar: high sugar content"], "score": 74},
    env_result      ={"environment_score": 78, "verdict": "GOOD", "confidence": "HIGH", "flags": [], "score": 78},
    animal_result   =p5_animal,
    gov_result      =p5_gov,
)
print_noura_report(p5)


# -- VALIDATION SUMMARY ---------------------------------------
print("=" * 62)
print("  VALIDATION SUMMARY")
print("=" * 62)
products = [p1, p2, p3, p4, p5]
names    = ["Dr Bronner", "CeraVe", "Chanel No5", "Oatly", "Tony Choc"]
for p, name in zip(products, names):
    gov_note = " [gov reliable]" if p["gov_reliable"] else " [gov neutral]"
    print("  " + name.ljust(16) + str(p["noura_score"]).rjust(3) + "/100  " +
          p["verdict"].ljust(12) + gov_note)

print("")
print("Governance reliability:")
gov_reliable_count = sum(1 for p in products if p["gov_reliable"])
print("  " + str(gov_reliable_count) + "/5 products have reliable governance scores")
if gov_reliable_count >= 4:
    print("  VERDICT: Governance at 15% weight is JUSTIFIED - signals firing reliably")
elif gov_reliable_count >= 2:
    print("  VERDICT: Governance partially reliable - monitor, consider 10% cap if coverage doesnt improve")
else:
    print("  VERDICT: Governance coverage too low - recommend 10% cap until database expands")

print("")
print("Sanity checks:")
print("-" * 45)
assert p1["noura_score"] > p3["noura_score"], "Dr Bronner should outscore Chanel"
print("  PASS: Dr Bronner (" + str(p1["noura_score"]) + ") > Chanel (" + str(p3["noura_score"]) + ")")

assert p5["noura_score"] > p2["noura_score"], "Tony Chocolonely should outscore CeraVe"
print("  PASS: Tony Choc (" + str(p5["noura_score"]) + ") > CeraVe (" + str(p2["noura_score"]) + ")")

assert p1["noura_score"] >= 75, "Dr Bronner should score at least 75"
print("  PASS: Dr Bronner scores " + str(p1["noura_score"]) + "/100")

assert p3["noura_score"] < 60, "Chanel (China + carmine + luxury governance) should score below 60"
print("  PASS: Chanel scores " + str(p3["noura_score"]) + "/100 - below 60")

assert p4["noura_score"] > p2["noura_score"], "Oatly should outscore CeraVe"
print("  PASS: Oatly (" + str(p4["noura_score"]) + ") > CeraVe (" + str(p2["noura_score"]) + ")")

print("")
print("ALL VALIDATION TESTS PASSED")
print("NOURA 4-dimension scoring system operational")
print("Weights confirmed: Health 50% | Env 25% | Animals 10% | Gov 15%")
"""

with open('/content/noura_validation.py', 'w', encoding='utf-8') as f:
    f.write(validation_code)

exec(validation_code)
print("Cell 38 complete")

Running Animal Welfare Engine self-test...

  Analysing animal welfare: ELF Hydrating Camo Concealer
     Category: cosmetics | Brand: elf cosmetics | Ingredients: 7
     Cruelty-free... score: 100
     Animal ingredients... score: 95 (LIKELY VEGAN)
     Vegan certification... score: 100
     China testing risk... score: 95

  NOURA ANIMAL WELFARE - ELF HYDRATING CAMO CONCEALER
  Score:      98/100  [CLEAN]
  Brand:      elf cosmetics
  Category:   cosmetics
  Confidence: HIGH

  Signal breakdown:
    cruelty_free          100/100  (weight: 40%)
    animal_ingredients     95/100  (weight: 25%)
    vegan_certified       100/100  (weight: 25%)
    china_testing_risk     95/100  (weight: 10%)

  Concerns (2):
     - [LOW] glycerin: Plant (palm, soy, coconut) or animal (tallow) derived -> Plant-c
     - [LOW] sodium hyaluronate: Salt form of hyaluronic acid - mostly biofermentation 

  Analysing animal welfare: Chanel No5 Body Lotion
     Category: cosmetics | Brand: chanel | Ingredients: 

In [None]:
# NOURA API Wrapper - Cell 39
# Three layers:
#   1. score_product() - callable Python function (use anywhere in notebook)
#   2. Flask REST API  - POST /api/v1/score  (for frontend / mobile)
#   3. ngrok tunnel    - public HTTPS URL (optional, requires pyngrok)

api_code = """
import sys
import json
import time
import traceback
from datetime import datetime

sys.path.insert(0, '/content')

# -- Engine imports -------------------------------------------------------------
# Import what exists; stub what doesn't so the API never crashes
_ENGINES = {}

try:
    from noura_animal_welfare_engine import calculate_animal_welfare_score
    _ENGINES["animal_welfare"] = True
except Exception as e:
    print("  [WARN] Animal welfare engine not loaded: " + str(e))
    _ENGINES["animal_welfare"] = False

try:
    from noura_governance_engine import calculate_governance_score
    _ENGINES["governance"] = True
except Exception as e:
    print("  [WARN] Governance engine not loaded: " + str(e))
    _ENGINES["governance"] = False

try:
    from noura_health_engine import calculate_health_score
    _ENGINES["health"] = True
except Exception as e:
    _ENGINES["health"] = False

try:
    from noura_environment_engine import calculate_environment_score
    _ENGINES["environment"] = True
except Exception as e:
    _ENGINES["environment"] = False

exec(open('/content/noura_aggregator.py').read())


# -- Stub fallback --------------------------------------------------------------
def _stub_result(dimension, score=50, note="Engine not loaded"):
    return {
        dimension + "_score": score,
        "score":              score,
        "verdict":            "NO_DATA",
        "confidence":         "LOW",
        "flags":              [note],
        "stub":               True,
    }


# -- Input validation -----------------------------------------------------------
VALID_CATEGORIES = {"cosmetics", "cleaning", "food", "baby"}

def _validate_input(payload):
    errors = []
    if not payload.get("product_name"):
        errors.append("product_name is required")
    if not payload.get("category"):
        errors.append("category is required")
    elif payload["category"] not in VALID_CATEGORIES:
        errors.append("category must be one of: " + ", ".join(sorted(VALID_CATEGORIES)))
    ingredients = payload.get("ingredients", [])
    if not isinstance(ingredients, list):
        errors.append("ingredients must be a list of strings")
    elif len(ingredients) == 0:
        errors.append("ingredients list is empty - provide at least one ingredient")
    elif len(ingredients) > 200:
        errors.append("ingredients list exceeds 200 items")
    return errors


# -- Core scoring function ------------------------------------------------------
def score_product(
    product_name,
    ingredients,
    category,
    brand_name          = "",
    health_override     = None,
    environment_override= None,
    include_raw         = False,
):
    # score_product(product_name, ingredients, category, brand_name="",
    #               health_override=None, environment_override=None, include_raw=False)
    # Returns: dict with noura_score, verdict, dimension_scores, flags, confidence
    t_start = time.time()

    # Validate
    errors = _validate_input({
        "product_name": product_name,
        "ingredients":  ingredients,
        "category":     category,
    })
    if errors:
        return {
            "success": False,
            "errors":  errors,
            "product": product_name,
        }

    brand_name = (brand_name or "").strip().lower()
    raw        = {}

    # Health
    if health_override:
        health_result = health_override
        raw["health"] = health_override
    elif _ENGINES["health"]:
        health_result = calculate_health_score(product_name, ingredients, category, brand_name)
        raw["health"] = health_result
    else:
        health_result = _stub_result("health", 50, "Health engine pending - score neutral")
        raw["health"] = health_result

    # Environment
    if environment_override:
        env_result = environment_override
        raw["environment"] = environment_override
    elif _ENGINES["environment"]:
        env_result = calculate_environment_score(product_name, ingredients, category, brand_name)
        raw["environment"] = env_result
    else:
        env_result = _stub_result("environment", 50, "Environment engine pending - score neutral")
        raw["environment"] = env_result

    # Animal welfare
    if _ENGINES["animal_welfare"]:
        animal_result = calculate_animal_welfare_score(product_name, ingredients, category, brand_name)
        raw["animal_welfare"] = animal_result
    else:
        animal_result = _stub_result("animal_welfare", 50, "Animal welfare engine not loaded")
        raw["animal_welfare"] = animal_result

    # Governance
    if _ENGINES["governance"]:
        gov_result = calculate_governance_score(product_name, ingredients, category, brand_name)
        raw["governance"] = gov_result
    else:
        gov_result = _stub_result("governance", 50, "Governance engine not loaded")
        raw["governance"] = gov_result

    # Aggregate
    result = calculate_noura_score(
        product_name     = product_name,
        category         = category,
        brand_name       = brand_name,
        health_result    = health_result,
        env_result       = env_result,
        animal_result    = animal_result,
        gov_result       = gov_result,
    )

    elapsed = round((time.time() - t_start) * 1000)

    # Build response
    response = {
        "success":          True,
        "api_version":      "1.0.0",
        "engines_loaded":   _ENGINES,
        "product":          product_name,
        "brand":            brand_name,
        "category":         category,
        "ingredients_count":len(ingredients),
        "noura_score":      result["noura_score"],
        "verdict":          result["verdict"],
        "verdict_description": result["verdict_description"],
        "confidence":       result["confidence"],
        "dimension_scores": result["dimension_scores"],
        "dimension_weights":{k: str(int(v*100)) + "%" for k, v in result["dimension_weights"].items()},
        "contributions":    result["contributions"],
        "flags":            result["flags"],
        "flag_count":       result["flag_count"],
        "gov_reliable":     result["gov_reliable"],
        "scored_at":        result["timestamp"],
        "response_ms":      elapsed,
    }

    if include_raw:
        response["raw_dimensions"] = raw

    return response


# -- Flask REST API -------------------------------------------------------------
def start_api(port=5000, use_ngrok=False, ngrok_token=None):
    # start_api(port=5000, use_ngrok=False, ngrok_token=None)
    # Starts Flask REST server. Endpoints: GET /api/v1/health, GET /api/v1/engines, POST /api/v1/score
    try:
        from flask import Flask, request, jsonify
    except ImportError:
        print("Flask not installed. Run: pip install flask --quiet")
        return

    app = Flask("NOURA_API")

    @app.route("/api/v1/health", methods=["GET"])
    def health_check():
        return jsonify({
            "status":       "ok",
            "api_version":  "1.0.0",
            "engines":      _ENGINES,
            "weights":      {k: str(int(v*100))+"%" for k,v in DIMENSION_WEIGHTS.items()},
            "timestamp":    datetime.now().isoformat(),
        })

    @app.route("/api/v1/engines", methods=["GET"])
    def engines():
        return jsonify({
            "loaded":   [k for k,v in _ENGINES.items() if v],
            "missing":  [k for k,v in _ENGINES.items() if not v],
            "coverage": str(round(sum(_ENGINES.values()) / len(_ENGINES) * 100)) + "%",
        })

    @app.route("/api/v1/score", methods=["POST"])
    def score():
        try:
            body = request.get_json(force=True)
            if not body:
                return jsonify({"success": False, "errors": ["Request body must be JSON"]}), 400

            result = score_product(
                product_name          = body.get("product_name", ""),
                ingredients           = body.get("ingredients", []),
                category              = body.get("category", ""),
                brand_name            = body.get("brand_name", ""),
                include_raw           = body.get("include_raw", False),
            )

            status = 200 if result.get("success") else 422
            return jsonify(result), status

        except Exception as e:
            return jsonify({
                "success": False,
                "errors":  [str(e)],
                "trace":   traceback.format_exc(),
            }), 500

    # ngrok tunnel (optional)
    public_url = None
    if use_ngrok:
        try:
            from pyngrok import ngrok, conf
            if ngrok_token:
                conf.get_default().auth_token = ngrok_token
            public_url = ngrok.connect(port).public_url
            print("")
            print("NOURA API public URL: " + public_url)
            print("Score endpoint:       " + public_url + "/api/v1/score")
            print("Health check:         " + public_url + "/api/v1/health")
        except Exception as e:
            print("[WARN] ngrok failed: " + str(e))

    print("")
    print("Starting NOURA Flask API on port " + str(port) + " ...")
    print("Local:  http://localhost:" + str(port) + "/api/v1/score")
    if public_url:
        print("Public: " + public_url + "/api/v1/score")
    print("Press Ctrl+C or interrupt kernel to stop")
    print("")

    app.run(host="0.0.0.0", port=port, debug=False, use_reloader=False)


# -- Quick test of callable function -------------------------------------------
print("NOURA API loaded")
print("Engines available: " + ", ".join([k for k,v in _ENGINES.items() if v]))
print("Engines stubbed:   " + ", ".join([k for k,v in _ENGINES.items() if not v]))
print("")
print("Running API quick test...")

_test = score_product(
    product_name = "Dr Bronner Pure Castile Soap",
    ingredients  = ["water", "coconut oil", "potassium hydroxide", "glycerin", "hemp oil"],
    category     = "cleaning",
    brand_name   = "dr bronner",
    health_override = {
        "health_score": 88, "score": 88, "verdict": "CLEAN",
        "confidence": "HIGH", "flags": [],
    },
    environment_override = {
        "environment_score": 85, "score": 85, "verdict": "CLEAN",
        "confidence": "HIGH", "flags": [],
    },
)

assert _test["success"] == True,             "API call failed"
assert _test["noura_score"] == 89,           "Score mismatch: " + str(_test["noura_score"])
assert _test["verdict"] == "EXCELLENT",      "Verdict mismatch"
assert _test["confidence"] == "HIGH",        "Confidence mismatch"
assert _test["response_ms"] < 5000,          "Too slow"
assert _test["ingredients_count"] == 5,      "Ingredient count wrong"
assert "animal_welfare" in _test["dimension_scores"], "Missing dimension"
assert "governance"     in _test["dimension_scores"], "Missing dimension"

print("  PASS: Dr Bronner -> " + str(_test["noura_score"]) + "/100  [" + _test["verdict"] + "]")
print("  PASS: Response time " + str(_test["response_ms"]) + "ms")
print("  PASS: All dimensions present")
print("  PASS: Engines loaded dict: " + str(_test["engines_loaded"]))
print("")

# Validation error test
_bad = score_product(
    product_name = "",
    ingredients  = [],
    category     = "invalid_cat",
    brand_name   = "",
)
assert _bad["success"] == False,       "Should fail on bad input"
assert len(_bad["errors"]) > 0,        "Should have error messages"
print("  PASS: Validation errors caught: " + str(_bad["errors"]))
print("")
print("API self-test complete")
print("")
print("To start the REST server:")
print("  start_api(port=5000)")
print("")
print("To start with ngrok public URL:")
print("  start_api(port=5000, use_ngrok=True, ngrok_token='YOUR_TOKEN')")
print("")
print("To call score_product() directly:")
print("  result = score_product(product_name=..., ingredients=[...], category=..., brand_name=...)")
print("  print(result['noura_score'], result['verdict'])")
"""

with open('/content/noura_api.py', 'w', encoding='utf-8') as f:
    f.write(api_code)

exec(api_code)
print("Cell 39 complete - noura_api.py written and loaded")

Master aggregator loaded
Weights: Health=50% Environment=25% Animals=10% Governance=15%
NOURA API loaded
Engines available: animal_welfare, governance
Engines stubbed:   health, environment

Running API quick test...
  Analysing animal welfare: Dr Bronner Pure Castile Soap
     Category: cleaning | Brand: dr bronner | Ingredients: 5
     Cruelty-free... score: 100
     Animal ingredients... score: 97 (LIKELY VEGAN)
     Brand ethics... score: 98
  Analysing governance: Dr Bronner Pure Castile Soap
     Category: cleaning | Brand: dr bronner
     B Corp... score: 100 (certified)
     Ownership... score: 100 (Family-owned B Corp)
     Supply chain... score: 95
     Labor... score: 65 (stub)
  Calculating NOURA score: Dr Bronner Pure Castile Soap
     Health:      88/100
     Environment: 85/100
     Animals:     99/100
     Governance:  94/100
     NOURA score: 89/100  [EXCELLENT]
  PASS: Dr Bronner -> 89/100  [EXCELLENT]
  PASS: Response time 0ms
  PASS: All dimensions present
  PASS: E

In [None]:
normaliser_code = """

import re

SYNONYM_MAP = {

    # WATER VARIANTS
    "water":                        ("AQUA", "solvent", False, "Base solvent"),
    "aqua":                         ("AQUA", "solvent", False, "Base solvent"),
    "eau":                          ("AQUA", "solvent", False, "French: water"),
    "agua":                         ("AQUA", "solvent", False, "Spanish: water"),
    "wasser":                       ("AQUA", "solvent", False, "German: water"),
    "purified water":               ("AQUA", "solvent", False, "Purified base solvent"),
    "deionized water":              ("AQUA", "solvent", False, "DI water"),
    "distilled water":              ("AQUA", "solvent", False, "Distilled base"),
    "aqua/water/eau":               ("AQUA", "solvent", False, "Trilingual INCI label"),
    "water (aqua)":                 ("AQUA", "solvent", False, "Parenthetical variant"),

    # PETROLEUM / MINERAL DERIVATIVES
    "petrolatum":                   ("PETROLATUM", "petroleum", False, "Petroleum-derived occlusive"),
    "white petrolatum":             ("PETROLATUM", "petroleum", False, "Refined petrolatum"),
    "yellow petrolatum":            ("PETROLATUM", "petroleum", False, "Less refined petrolatum"),
    "petroleum jelly":              ("PETROLATUM", "petroleum", False, "Consumer name for petrolatum"),
    "vaseline":                     ("PETROLATUM", "petroleum", False, "Brand name = petrolatum"),
    "soft paraffin":                ("PETROLATUM", "petroleum", False, "UK pharmacopoeia name"),
    "white soft paraffin":          ("PETROLATUM", "petroleum", False, "BP/pharmacopoeia name"),
    "paraffinum molle":             ("PETROLATUM", "petroleum", False, "Latin INCI variant"),
    "mineral oil":                  ("PARAFFINUM LIQUIDUM", "petroleum", False, "Mineral oil = liquid paraffin"),
    "paraffinum liquidum":          ("PARAFFINUM LIQUIDUM", "petroleum", False, "INCI canonical"),
    "liquid paraffin":              ("PARAFFINUM LIQUIDUM", "petroleum", False, "UK name"),
    "light mineral oil":            ("PARAFFINUM LIQUIDUM", "petroleum", False, "Light grade"),
    "heavy mineral oil":            ("PARAFFINUM LIQUIDUM", "petroleum", False, "Heavy grade"),
    "huile minerale":               ("PARAFFINUM LIQUIDUM", "petroleum", False, "French: mineral oil"),
    "paraffin":                     ("PARAFFIN", "petroleum", False, "Solid wax paraffin"),
    "paraffin wax":                 ("PARAFFIN", "petroleum", False, "Solid wax"),
    "microcrystalline wax":         ("CERA MICROCRISTALLINA", "petroleum", False, "Refined wax"),
    "cera microcristallina":        ("CERA MICROCRISTALLINA", "petroleum", False, "INCI canonical"),

    # SILICONES
    "dimethicone":                  ("DIMETHICONE", "silicone", False, "Polydimethylsiloxane"),
    "polydimethylsiloxane":         ("DIMETHICONE", "silicone", False, "Chemical name"),
    "pdms":                         ("DIMETHICONE", "silicone", False, "Abbreviation"),
    "dimethylpolysiloxane":         ("DIMETHICONE", "silicone", False, "Variant name"),
    "cyclomethicone":               ("CYCLOMETHICONE", "silicone", False, "Cyclic silicone blend"),
    "cyclopentasiloxane":           ("CYCLOPENTASILOXANE", "silicone", False, "D5 - EU restricted"),
    "d5":                           ("CYCLOPENTASILOXANE", "silicone", False, "Industry abbreviation"),
    "cyclotetrasiloxane":           ("CYCLOTETRASILOXANE", "silicone", False, "D4 - EU restricted"),
    "d4":                           ("CYCLOTETRASILOXANE", "silicone", False, "Industry abbreviation"),
    "phenyl trimethicone":          ("PHENYL TRIMETHICONE", "silicone", False, "Shine silicone"),
    "amodimethicone":               ("AMODIMETHICONE", "silicone", False, "Amino-modified silicone"),
    "dimethiconol":                 ("DIMETHICONOL", "silicone", False, "Hydroxy-terminated silicone"),

    # SURFACTANTS
    "sodium lauryl sulfate":        ("SODIUM LAURYL SULFATE", "surfactant", False, "SLS - strong anionic"),
    "sls":                          ("SODIUM LAURYL SULFATE", "surfactant", False, "Abbreviation"),
    "sodium lauryl sulphate":       ("SODIUM LAURYL SULFATE", "surfactant", False, "UK spelling"),
    "sodium dodecyl sulfate":       ("SODIUM LAURYL SULFATE", "surfactant", False, "Chemical name"),
    "sodium laureth sulfate":       ("SODIUM LAURETH SULFATE", "surfactant", False, "SLES - ethoxylated"),
    "sles":                         ("SODIUM LAURETH SULFATE", "surfactant", False, "Abbreviation"),
    "sodium laureth sulphate":      ("SODIUM LAURETH SULFATE", "surfactant", False, "UK spelling"),
    "ammonium lauryl sulfate":      ("AMMONIUM LAURYL SULFATE", "surfactant", False, "ALS"),
    "ammonium laureth sulfate":     ("AMMONIUM LAURETH SULFATE", "surfactant", False, "ALES"),
    "cocamidopropyl betaine":       ("COCAMIDOPROPYL BETAINE", "surfactant", False, "Amphoteric, mild"),
    "capb":                         ("COCAMIDOPROPYL BETAINE", "surfactant", False, "Abbreviation"),
    "cocamide dea":                 ("COCAMIDE DEA", "surfactant", False, "Foam booster"),
    "cocamide mea":                 ("COCAMIDE MEA", "surfactant", False, "Foam booster"),
    "decyl glucoside":              ("DECYL GLUCOSIDE", "surfactant", False, "Mild sugar-based"),
    "coco glucoside":               ("COCO GLUCOSIDE", "surfactant", False, "Mild sugar-based"),
    "lauryl glucoside":             ("LAURYL GLUCOSIDE", "surfactant", False, "Mild sugar-based"),

    # PRESERVATIVES (most are ANCHOR ingredients - signal ~1% boundary)
    "phenoxyethanol":               ("PHENOXYETHANOL", "preservative", True, "1% max EU - anchor"),
    "2-phenoxyethanol":             ("PHENOXYETHANOL", "preservative", True, "Chemical name"),
    "methylparaben":                ("METHYLPARABEN", "preservative", True, "Paraben - anchor"),
    "methyl paraben":               ("METHYLPARABEN", "preservative", True, "Space variant"),
    "methyl 4-hydroxybenzoate":     ("METHYLPARABEN", "preservative", True, "Chemical name"),
    "ethylparaben":                 ("ETHYLPARABEN", "preservative", True, "Paraben - anchor"),
    "propylparaben":                ("PROPYLPARABEN", "preservative", True, "Paraben - anchor"),
    "butylparaben":                 ("BUTYLPARABEN", "preservative", True, "Paraben - anchor"),
    "benzyl alcohol":               ("BENZYL ALCOHOL", "preservative", True, "Preservative - anchor"),
    "sodium benzoate":              ("SODIUM BENZOATE", "preservative", True, "Preservative - anchor"),
    "potassium sorbate":            ("POTASSIUM SORBATE", "preservative", True, "Preservative - anchor"),
    "sorbic acid":                  ("SORBIC ACID", "preservative", True, "Preservative - anchor"),
    "benzoic acid":                 ("BENZOIC ACID", "preservative", True, "Preservative - anchor"),
    "dehydroacetic acid":           ("DEHYDROACETIC ACID", "preservative", True, "Preservative - anchor"),
    "ethylhexylglycerin":           ("ETHYLHEXYLGLYCERIN", "preservative", True, "Booster - anchor"),
    "caprylyl glycol":              ("CAPRYLYL GLYCOL", "preservative", True, "Booster - anchor"),
    "chlorphenesin":                ("CHLORPHENESIN", "preservative", True, "Preservative - anchor"),
    "disodium edta":                ("DISODIUM EDTA", "chelating", True, "Chelator - trace - anchor"),
    "tetrasodium edta":             ("TETRASODIUM EDTA", "chelating", True, "Chelator - anchor"),
    "edta":                         ("DISODIUM EDTA", "chelating", True, "Abbreviation"),

    # HUMECTANTS / EMOLLIENTS
    "glycerin":                     ("GLYCERIN", "humectant", False, "Humectant"),
    "glycerol":                     ("GLYCERIN", "humectant", False, "Chemical name"),
    "glycerine":                    ("GLYCERIN", "humectant", False, "British spelling"),
    "propylene glycol":             ("PROPYLENE GLYCOL", "humectant", False, "Humectant"),
    "1,2-propanediol":              ("PROPYLENE GLYCOL", "humectant", False, "Chemical name"),
    "butylene glycol":              ("BUTYLENE GLYCOL", "humectant", False, "Humectant"),
    "1,3-butanediol":               ("BUTYLENE GLYCOL", "humectant", False, "Chemical name"),
    "pentylene glycol":             ("PENTYLENE GLYCOL", "humectant", False, "Humectant/preservative"),
    "sodium hyaluronate":           ("SODIUM HYALURONATE", "humectant", False, "HA salt"),
    "hyaluronic acid":              ("SODIUM HYALURONATE", "humectant", False, "Acid form"),
    "ha":                           ("SODIUM HYALURONATE", "humectant", False, "Abbreviation"),
    "shea butter":                  ("BUTYROSPERMUM PARKII BUTTER", "emollient", False, "Shea"),
    "butyrospermum parkii":         ("BUTYROSPERMUM PARKII BUTTER", "emollient", False, "INCI base"),
    "butyrospermum parkii butter":  ("BUTYROSPERMUM PARKII BUTTER", "emollient", False, "Canonical"),
    "coconut oil":                  ("COCOS NUCIFERA OIL", "emollient", False, "Coconut oil"),
    "cocos nucifera oil":           ("COCOS NUCIFERA OIL", "emollient", False, "INCI canonical"),
    "cocos nucifera (coconut) oil": ("COCOS NUCIFERA OIL", "emollient", False, "Parenthetical"),
    "jojoba oil":                   ("SIMMONDSIA CHINENSIS SEED OIL", "emollient", False, "Jojoba"),
    "simmondsia chinensis seed oil":("SIMMONDSIA CHINENSIS SEED OIL", "emollient", False, "Canonical"),
    "argan oil":                    ("ARGANIA SPINOSA KERNEL OIL", "emollient", False, "Argan"),
    "argania spinosa kernel oil":   ("ARGANIA SPINOSA KERNEL OIL", "emollient", False, "Canonical"),
    "aloe vera":                    ("ALOE BARBADENSIS LEAF JUICE", "emollient", False, "Aloe vera"),
    "aloe barbadensis":             ("ALOE BARBADENSIS LEAF JUICE", "emollient", False, "INCI base"),
    "aloe barbadensis leaf juice":  ("ALOE BARBADENSIS LEAF JUICE", "emollient", False, "Canonical"),
    "castor oil":                   ("RICINUS COMMUNIS SEED OIL", "emollient", False, "Castor oil"),
    "ricinus communis seed oil":    ("RICINUS COMMUNIS SEED OIL", "emollient", False, "Canonical"),

    # EMULSIFIERS
    "cetearyl alcohol":             ("CETEARYL ALCOHOL", "emulsifier", False, "Fatty alcohol"),
    "cetostearyl alcohol":          ("CETEARYL ALCOHOL", "emulsifier", False, "Pharmacopoeia name"),
    "cetyl alcohol":                ("CETYL ALCOHOL", "emulsifier", False, "Fatty alcohol"),
    "stearyl alcohol":              ("STEARYL ALCOHOL", "emulsifier", False, "Fatty alcohol"),
    "ceteareth-20":                 ("CETEARETH-20", "emulsifier", False, "Ethoxylated emulsifier"),
    "polysorbate 20":               ("POLYSORBATE 20", "emulsifier", False, "Solubiliser"),
    "polysorbate 80":               ("POLYSORBATE 80", "emulsifier", False, "Solubiliser"),
    "tween 20":                     ("POLYSORBATE 20", "emulsifier", False, "Brand name"),
    "tween 80":                     ("POLYSORBATE 80", "emulsifier", False, "Brand name"),
    "lecithin":                     ("LECITHIN", "emulsifier", False, "Soy or sunflower derived"),
    "soy lecithin":                 ("LECITHIN", "emulsifier", False, "Soy-derived"),
    "sunflower lecithin":           ("LECITHIN", "emulsifier", False, "Sunflower-derived"),

    # THICKENERS
    "carbomer":                     ("CARBOMER", "thickener", False, "Polyacrylic acid thickener"),
    "carbopol":                     ("CARBOMER", "thickener", False, "Brand name = carbomer"),
    "carboxypolymethylene":         ("CARBOMER", "thickener", False, "Chemical name"),
    "xanthan gum":                  ("XANTHAN GUM", "thickener", False, "Natural thickener"),
    "xanthan":                      ("XANTHAN GUM", "thickener", False, "Short name"),
    "hydroxyethylcellulose":        ("HYDROXYETHYLCELLULOSE", "thickener", False, "Cellulose thickener"),
    "hec":                          ("HYDROXYETHYLCELLULOSE", "thickener", False, "Abbreviation"),

    # UV FILTERS
    "zinc oxide":                   ("ZINC OXIDE", "uv_filter", False, "Mineral UV filter"),
    "titanium dioxide":             ("TITANIUM DIOXIDE", "uv_filter", False, "Mineral UV filter"),
    "ci 77891":                     ("TITANIUM DIOXIDE", "uv_filter", False, "Colorant code = TiO2"),
    "avobenzone":                   ("BUTYL METHOXYDIBENZOYLMETHANE", "uv_filter", False, "UVA filter"),
    "butyl methoxydibenzoylmethane":("BUTYL METHOXYDIBENZOYLMETHANE", "uv_filter", False, "Canonical"),
    "parsol 1789":                  ("BUTYL METHOXYDIBENZOYLMETHANE", "uv_filter", False, "Brand name"),
    "octinoxate":                   ("ETHYLHEXYL METHOXYCINNAMATE", "uv_filter", False, "US name"),
    "ethylhexyl methoxycinnamate":  ("ETHYLHEXYL METHOXYCINNAMATE", "uv_filter", False, "INCI canonical"),
    "oxybenzone":                   ("BENZOPHENONE-3", "uv_filter", False, "EU restricted"),
    "benzophenone-3":               ("BENZOPHENONE-3", "uv_filter", False, "INCI canonical"),
    "octocrylene":                  ("OCTOCRYLENE", "uv_filter", False, "UV filter"),

    # ANIMAL-DERIVED
    "lanolin":                      ("LANOLIN", "animal_derived", False, "Wool grease"),
    "wool fat":                     ("LANOLIN", "animal_derived", False, "Alternative name"),
    "wool grease":                  ("LANOLIN", "animal_derived", False, "Raw form"),
    "adeps lanae":                  ("LANOLIN", "animal_derived", False, "Latin/pharmacopoeia"),
    "lanolin alcohol":              ("LANOLIN ALCOHOL", "animal_derived", False, "Refined fraction"),
    "beeswax":                      ("CERA ALBA", "animal_derived", False, "Beeswax"),
    "cera alba":                    ("CERA ALBA", "animal_derived", False, "INCI canonical"),
    "white beeswax":                ("CERA ALBA", "animal_derived", False, "Bleached"),
    "yellow beeswax":               ("CERA FLAVA", "animal_derived", False, "Unbleached"),
    "cera flava":                   ("CERA FLAVA", "animal_derived", False, "INCI canonical"),
    "carmine":                      ("CI 75470", "animal_derived", False, "Crushed cochineal dye"),
    "ci 75470":                     ("CI 75470", "animal_derived", False, "Colorant code"),
    "cochineal":                    ("CI 75470", "animal_derived", False, "Source insect"),
    "carmines":                     ("CI 75470", "animal_derived", False, "Plural form"),
    "carminic acid":                ("CI 75470", "animal_derived", False, "Active compound"),
    "hydrolyzed silk":              ("HYDROLYZED SILK", "animal_derived", False, "Silk protein"),
    "silk amino acids":             ("HYDROLYZED SILK", "animal_derived", False, "Variant name"),
    "collagen":                     ("HYDROLYZED COLLAGEN", "animal_derived", False, "Usually bovine/marine"),
    "hydrolyzed collagen":          ("HYDROLYZED COLLAGEN", "animal_derived", False, "Canonical"),
    "marine collagen":              ("HYDROLYZED COLLAGEN", "animal_derived", False, "Fish-derived"),
    "gelatin":                      ("GELATIN", "animal_derived", False, "Bovine/porcine"),
    "gelatine":                     ("GELATIN", "animal_derived", False, "UK spelling"),
    "squalene":                     ("SQUALENE", "animal_derived", False, "Shark liver origin"),
    "squalane":                     ("SQUALANE", "animal_derived", False, "Shark or plant"),
    "shellac":                      ("SHELLAC", "animal_derived", False, "Lac insect resin"),
    "lac resin":                    ("SHELLAC", "animal_derived", False, "Alternative name"),
    "honey":                        ("MEL", "animal_derived", False, "Bee product"),
    "mel":                          ("MEL", "animal_derived", False, "INCI canonical for honey"),
    "propolis":                     ("PROPOLIS CERA", "animal_derived", False, "Bee product"),
    "musk":                         ("MUSK", "animal_derived", False, "Glandular or synthetic"),
    "civet":                        ("CIVET", "animal_derived", False, "Glandular secretion"),
    "ambergris":                    ("AMBERGRIS", "animal_derived", False, "Whale intestinal"),

    # FRAGRANCE - ANCHOR
    "fragrance":                    ("PARFUM", "fragrance", True, "Undisclosed blend - anchor"),
    "parfum":                       ("PARFUM", "fragrance", True, "INCI canonical - anchor"),
    "fragrance (parfum)":           ("PARFUM", "fragrance", True, "Bilingual label"),
    "aroma":                        ("PARFUM", "fragrance", True, "Alternative term - anchor"),
    "flavor":                       ("AROMA", "fragrance", True, "Food fragrance - anchor"),
    "flavour":                      ("AROMA", "fragrance", True, "UK spelling - anchor"),
    "natural fragrance":            ("PARFUM", "fragrance", True, "Still undisclosed - anchor"),

    # ACIDS / AHA / BHA
    "glycolic acid":                ("GLYCOLIC ACID", "exfoliant", False, "AHA"),
    "lactic acid":                  ("LACTIC ACID", "exfoliant", False, "AHA"),
    "mandelic acid":                ("MANDELIC ACID", "exfoliant", False, "AHA"),
    "citric acid":                  ("CITRIC ACID", "exfoliant", False, "AHA/pH adjuster"),
    "salicylic acid":               ("SALICYLIC ACID", "exfoliant", False, "BHA"),
    "bha":                          ("SALICYLIC ACID", "exfoliant", False, "Abbreviation"),
    "retinol":                      ("RETINOL", "retinoid", False, "OTC retinoid"),
    "vitamin a":                    ("RETINOL", "retinoid", False, "Common name"),
    "retinyl palmitate":            ("RETINYL PALMITATE", "retinoid", False, "Ester of retinol"),
    "tretinoin":                    ("TRETINOIN", "retinoid", False, "Prescription retinoid"),
    "retinoic acid":                ("TRETINOIN", "retinoid", False, "Chemical name"),

    # VITAMINS / ACTIVES
    "niacinamide":                  ("NIACINAMIDE", "active", False, "Vitamin B3"),
    "nicotinamide":                 ("NIACINAMIDE", "active", False, "Chemical name"),
    "vitamin b3":                   ("NIACINAMIDE", "active", False, "Common name"),
    "ascorbic acid":                ("ASCORBIC ACID", "active", False, "Vitamin C"),
    "vitamin c":                    ("ASCORBIC ACID", "active", False, "Common name"),
    "l-ascorbic acid":              ("ASCORBIC ACID", "active", False, "L-form"),
    "sodium ascorbyl phosphate":    ("SODIUM ASCORBYL PHOSPHATE", "active", False, "Stable vitamin C"),
    "tocopherol":                   ("TOCOPHEROL", "active", False, "Vitamin E"),
    "vitamin e":                    ("TOCOPHEROL", "active", False, "Common name"),
    "alpha tocopherol":             ("TOCOPHEROL", "active", False, "Most active form"),
    "tocopheryl acetate":           ("TOCOPHERYL ACETATE", "active", False, "Vitamin E ester"),
    "panthenol":                    ("PANTHENOL", "active", False, "Provitamin B5"),
    "provitamin b5":                ("PANTHENOL", "active", False, "Common name"),
    "dl-panthenol":                 ("PANTHENOL", "active", False, "Racemic form"),
    "allantoin":                    ("ALLANTOIN", "active", False, "Soothing agent"),
    "bisabolol":                    ("BISABOLOL", "active", False, "Chamomile-derived"),
    "alpha bisabolol":              ("BISABOLOL", "active", False, "Active isomer"),

    # COLORANTS
    "iron oxides":                  ("CI 77491", "colorant", False, "Iron oxide pigments"),
    "ci 77491":                     ("CI 77491", "colorant", False, "Red iron oxide"),
    "ci 77492":                     ("CI 77492", "colorant", False, "Yellow iron oxide"),
    "ci 77499":                     ("CI 77499", "colorant", False, "Black iron oxide"),
    "red 40":                       ("CI 16035", "colorant", False, "Allura red"),
    "allura red":                   ("CI 16035", "colorant", False, "Red 40 variant"),
    "ci 16035":                     ("CI 16035", "colorant", False, "Canonical"),
    "yellow 5":                     ("CI 19140", "colorant", False, "Tartrazine"),
    "tartrazine":                   ("CI 19140", "colorant", False, "Yellow 5"),
    "blue 1":                       ("CI 42090", "colorant", False, "Brilliant blue"),

    # FOOD INGREDIENTS
    "sugar":                        ("SUCROSE", "food", False, "Table sugar"),
    "sucrose":                      ("SUCROSE", "food", False, "Canonical"),
    "cane sugar":                   ("SUCROSE", "food", False, "Source-specific"),
    "glucose":                      ("GLUCOSE", "food", False, "Simple sugar"),
    "dextrose":                     ("GLUCOSE", "food", False, "D-glucose"),
    "fructose":                     ("FRUCTOSE", "food", False, "Fruit sugar"),
    "high fructose corn syrup":     ("HIGH FRUCTOSE CORN SYRUP", "food", False, "HFCS"),
    "hfcs":                         ("HIGH FRUCTOSE CORN SYRUP", "food", False, "Abbreviation"),
    "corn syrup":                   ("GLUCOSE SYRUP", "food", False, "Corn-derived"),
    "glucose syrup":                ("GLUCOSE SYRUP", "food", False, "Canonical"),
    "salt":                         ("SODIUM CHLORIDE", "food", True, "Anchor in cosmetics"),
    "sodium chloride":              ("SODIUM CHLORIDE", "food", True, "Canonical - anchor"),
    "sea salt":                     ("SODIUM CHLORIDE", "food", True, "Source-specific - anchor"),
    "oats":                         ("AVENA SATIVA", "food", False, "Oats"),
    "avena sativa":                 ("AVENA SATIVA", "food", False, "INCI canonical"),
    "cocoa butter":                 ("THEOBROMA CACAO SEED BUTTER", "food", False, "Cocoa butter"),
    "theobroma cacao seed butter":  ("THEOBROMA CACAO SEED BUTTER", "food", False, "Canonical"),
    "cocoa mass":                   ("THEOBROMA CACAO SEED POWDER", "food", False, "Cocoa solids"),
    "cocoa powder":                 ("THEOBROMA CACAO SEED POWDER", "food", False, "Canonical"),
    "vanilla":                      ("VANILLA PLANIFOLIA FRUIT EXTRACT", "food", False, "Vanilla"),
    "vanilla extract":              ("VANILLA PLANIFOLIA FRUIT EXTRACT", "food", False, "Extract"),
    "vanillin":                     ("VANILLIN", "food", False, "Vanilla compound"),
    "rapeseed oil":                 ("BRASSICA CAMPESTRIS SEED OIL", "food", False, "Rapeseed/canola"),
    "canola oil":                   ("BRASSICA CAMPESTRIS SEED OIL", "food", False, "N.American name"),
    "brassica campestris seed oil": ("BRASSICA CAMPESTRIS SEED OIL", "food", False, "Canonical"),
    "sunflower oil":                ("HELIANTHUS ANNUUS SEED OIL", "food", False, "Sunflower"),
    "palm oil":                     ("ELAEIS GUINEENSIS OIL", "food", False, "Palm - deforestation risk"),
    "elaeis guineensis oil":        ("ELAEIS GUINEENSIS OIL", "food", False, "Canonical"),
    "olive oil":                    ("OLEA EUROPAEA FRUIT OIL", "food", False, "Olive"),
    "olea europaea fruit oil":      ("OLEA EUROPAEA FRUIT OIL", "food", False, "Canonical"),
    "calcium carbonate":            ("CALCIUM CARBONATE", "food", False, "Chalk/fortification"),
    "vitamin d":                    ("CHOLECALCIFEROL", "food", False, "Vitamin D3"),
    "cholecalciferol":              ("CHOLECALCIFEROL", "food", False, "Canonical"),
    "vitamin d2":                   ("ERGOCALCIFEROL", "food", False, "Plant-derived D2"),
    "wheat flour":                  ("WHEAT FLOUR", "food", False, "Canonical"),
    "flour":                        ("WHEAT FLOUR", "food", False, "Assumed wheat"),
    "whole wheat flour":            ("WHOLE WHEAT FLOUR", "food", False, "Wholemeal"),

    # FOOD ADDITIVES / E-NUMBERS
    "e471":                         ("MONO AND DIGLYCERIDES OF FATTY ACIDS", "additive", False, "Emulsifier"),
    "mono and diglycerides":        ("MONO AND DIGLYCERIDES OF FATTY ACIDS", "additive", False, "E471"),
    "e322":                         ("LECITHIN", "additive", False, "E-number for lecithin"),
    "e330":                         ("CITRIC ACID", "additive", False, "E-number"),
    "e202":                         ("POTASSIUM SORBATE", "additive", True, "E-number - anchor"),
    "e211":                         ("SODIUM BENZOATE", "additive", True, "E-number - anchor"),
    "e621":                         ("MONOSODIUM GLUTAMATE", "additive", False, "MSG"),
    "msg":                          ("MONOSODIUM GLUTAMATE", "additive", False, "Abbreviation"),
    "monosodium glutamate":         ("MONOSODIUM GLUTAMATE", "additive", False, "Canonical"),
    "e120":                         ("CI 75470", "additive", False, "E-number for carmine"),
    "e171":                         ("TITANIUM DIOXIDE", "additive", False, "Banned EU food"),
    "e150a":                        ("CARAMEL COLOR", "additive", False, "Plain caramel"),
    "caramel color":                ("CARAMEL COLOR", "additive", False, "Food coloring"),
    "e250":                         ("SODIUM NITRITE", "additive", False, "Processed meat preservative"),
    "sodium nitrite":               ("SODIUM NITRITE", "additive", False, "Canonical"),
    "e951":                         ("ASPARTAME", "additive", False, "Sweetener - IARC 2B"),
    "aspartame":                    ("ASPARTAME", "additive", False, "Canonical"),
    "e955":                         ("SUCRALOSE", "additive", False, "Sweetener"),
    "sucralose":                    ("SUCRALOSE", "additive", False, "Canonical"),
    "e420":                         ("SORBITOL", "additive", False, "Sweetener/humectant"),
    "sorbitol":                     ("SORBITOL", "additive", False, "Canonical"),

    # PEG COMPOUNDS
    "peg-100 stearate":             ("PEG-100 STEARATE", "emulsifier", False, "Ethoxylated"),
    "polyethylene glycol":          ("PEG", "polymer", False, "PEG base"),
    "peg":                          ("PEG", "polymer", False, "Abbreviation"),

    # CERAMIDES
    "ceramide np":                  ("CERAMIDE NP", "lipid", False, "Skin barrier"),
    "ceramide ap":                  ("CERAMIDE AP", "lipid", False, "Skin barrier"),
    "ceramide eop":                 ("CERAMIDE EOP", "lipid", False, "Skin barrier"),
    "ceramide 1":                   ("CERAMIDE EOP", "lipid", False, "Old nomenclature"),
    "ceramide 3":                   ("CERAMIDE NP", "lipid", False, "Old nomenclature"),
    "ceramide 6-ii":                ("CERAMIDE AP", "lipid", False, "Old nomenclature"),
    "cholesterol":                  ("CHOLESTEROL", "lipid", False, "Skin barrier lipid"),

    # MCT
    "caprylic/capric triglyceride": ("CAPRYLIC/CAPRIC TRIGLYCERIDE", "emollient", False, "MCT oil"),
    "mct oil":                      ("CAPRYLIC/CAPRIC TRIGLYCERIDE", "emollient", False, "Medium chain TG"),
    "fractionated coconut oil":     ("CAPRYLIC/CAPRIC TRIGLYCERIDE", "emollient", False, "Consumer name"),

    # ALKALINE / pH ADJUSTERS
    "potassium hydroxide":          ("POTASSIUM HYDROXIDE", "alkaline", False, "Saponification"),
    "koh":                          ("POTASSIUM HYDROXIDE", "alkaline", False, "Chemical symbol"),
    "caustic potash":               ("POTASSIUM HYDROXIDE", "alkaline", False, "Industrial name"),
    "sodium hydroxide":             ("SODIUM HYDROXIDE", "alkaline", False, "Lye"),
    "naoh":                         ("SODIUM HYDROXIDE", "alkaline", False, "Chemical symbol"),
    "lye":                          ("SODIUM HYDROXIDE", "alkaline", False, "Common name"),
    "caustic soda":                 ("SODIUM HYDROXIDE", "alkaline", False, "Industrial name"),
    "triethanolamine":              ("TRIETHANOLAMINE", "alkaline", True, "pH adjuster - anchor"),
    "tea":                          ("TRIETHANOLAMINE", "alkaline", True, "Abbreviation TEA - anchor"),
}

ANCHOR_INGREDIENTS = {
    canonical for canonical, category, is_anchor, _ in SYNONYM_MAP.values()
    if is_anchor
}

def _clean_raw(raw):
    s = raw.lower().strip()
    s = re.sub(r'[ 	]+', ' ', s)
    s = s.replace('*', '').strip()
    # Strip percentage annotations like "glycerin (5%)"
    s = re.sub(r'[ 	]*[(][0-9.]+[ 	]*%[)]', '', s).strip()
    return s

def normalise_ingredient(raw_name):
    original = raw_name
    cleaned  = _clean_raw(raw_name)

    # 1. Exact match
    if cleaned in SYNONYM_MAP:
        canonical, category, is_anchor, notes = SYNONYM_MAP[cleaned]
        return {"canonical": canonical, "original": original, "cleaned": cleaned,
                "category": category, "is_anchor": is_anchor,
                "confidence": "HIGH", "notes": notes, "matched_by": "exact"}

    # 2. Prefix match
    for key, (canonical, category, is_anchor, notes) in SYNONYM_MAP.items():
        if cleaned.startswith(key) and len(cleaned) - len(key) <= 6:
            return {"canonical": canonical, "original": original, "cleaned": cleaned,
                    "category": category, "is_anchor": is_anchor,
                    "confidence": "HIGH", "notes": notes, "matched_by": "prefix"}

    # 3. Partial match (longest key wins)
    best_key, best_len, best_entry = None, 0, None
    for key, entry in SYNONYM_MAP.items():
        if key in cleaned and len(key) > best_len and len(key) > 5:
            best_key, best_len, best_entry = key, len(key), entry
    if best_entry:
        canonical, category, is_anchor, notes = best_entry
        return {"canonical": canonical, "original": original, "cleaned": cleaned,
                "category": category, "is_anchor": is_anchor,
                "confidence": "MEDIUM", "notes": notes + " [partial: " + best_key + "]",
                "matched_by": "partial"}

    # 4. Fallback
    return {"canonical": cleaned.upper(), "original": original, "cleaned": cleaned,
            "category": "unknown", "is_anchor": False,
            "confidence": "LOW", "notes": "Not in synonym database",
            "matched_by": "fallback"}

def normalise_ingredient_list(raw_list):
    results, anchors_found = [], []
    for i, raw in enumerate(raw_list):
        r = normalise_ingredient(raw)
        r["position"]     = i + 1
        r["total_count"]  = len(raw_list)
        r["position_pct"] = round((i + 1) / len(raw_list) * 100)
        results.append(r)
        if r["is_anchor"]:
            anchors_found.append((i + 1, r["canonical"]))
    exact   = sum(1 for r in results if r["matched_by"] == "exact")
    prefix  = sum(1 for r in results if r["matched_by"] == "prefix")
    partial = sum(1 for r in results if r["matched_by"] == "partial")
    fallback= sum(1 for r in results if r["matched_by"] == "fallback")
    summary = {
        "total": len(results), "exact_matches": exact,
        "prefix_matches": prefix, "partial_matches": partial,
        "fallback_count": fallback,
        "coverage_pct": round((exact + prefix + partial) / max(len(results), 1) * 100),
        "anchors_found": anchors_found,
        "first_anchor_position": anchors_found[0][0] if anchors_found else None,
    }
    return results, summary

def print_normalisation_report(results, summary):
    print("")
    print("  " + "-" * 62)
    for r in results:
        flag = " [ANCHOR]" if r["is_anchor"] else ""
        tag  = r["matched_by"].upper()[:3]
        print("  " + str(r["position"]).rjust(3) + ". [" + tag + "] " +
              r["original"][:28].ljust(30) + " -> " + r["canonical"][:24] + flag)
    print("")
    print("  Coverage:  " + str(summary["coverage_pct"]) + "% | " +
          str(summary["exact_matches"]) + " exact  " +
          str(summary["prefix_matches"]) + " prefix  " +
          str(summary["partial_matches"]) + " partial  " +
          str(summary["fallback_count"]) + " fallback")
    if summary["anchors_found"]:
        first = summary["first_anchor_position"]
        total = summary["total"]
        pct   = round(first / total * 100)
        print("  Anchors:   " + ", ".join(
            c + " @pos" + str(p) for p, c in summary["anchors_found"][:3]
        ))
        print("  Sub-1% zone estimated from position " +
              str(first) + "/" + str(total) + " (" + str(pct) + "% through list)")
    else:
        print("  Anchors:   None detected - concentration zone estimation limited")

# SELF-TEST
print("Running Normaliser self-test...")

# Test 1: CeraVe (real INCI list)
print("")
print("TEST 1: CeraVe Moisturising Cream (real INCI list)")
cerave_inci = [
    "Aqua/Water/Eau", "Glycerin", "Cetearyl Alcohol",
    "Caprylic/Capric Triglyceride", "Cetyl Alcohol", "Ceteareth-20",
    "Petrolatum", "Potassium Phosphate", "Ceramide NP", "Ceramide AP",
    "Ceramide EOP", "Carbomer", "Dimethicone", "Behentrimonium Methosulfate",
    "Sodium Lauroyl Lactylate", "Sodium Hyaluronate", "Cholesterol",
    "Phenoxyethanol", "Disodium EDTA", "Dipotassium Phosphate",
    "Tocopherol", "Phytosphingosine", "Xanthan Gum", "Ethylhexylglycerin",
]
r1, s1 = normalise_ingredient_list(cerave_inci)
print_normalisation_report(r1, s1)

# Test 2: Mixed common names + INCI
print("")
print("TEST 2: Chanel No5 Body Lotion (mixed naming)")
chanel_raw = [
    "water", "glycerine", "white petrolatum", "lanolin",
    "beeswax", "parfum", "carmine", "phenoxyethanol", "methylparaben",
]
r2, s2 = normalise_ingredient_list(chanel_raw)
print_normalisation_report(r2, s2)

# Test 3: Food label with E-numbers and consumer names
print("")
print("TEST 3: Chocolate bar (consumer label with E-numbers)")
choc_raw = ["Sugar", "Cocoa Mass", "Cocoa Butter", "Vanilla", "Soy Lecithin", "Salt", "E471"]
r3, s3 = normalise_ingredient_list(choc_raw)
print_normalisation_report(r3, s3)

# Test 4: Abbreviations and brand names
print("")
print("TEST 4: Abbreviations and brand names")
abbrev = ["SLS", "SLES", "Vaseline", "Mineral Oil", "Vitamin C", "Vitamin B3", "BHA", "MCT Oil"]
r4, s4 = normalise_ingredient_list(abbrev)
print_normalisation_report(r4, s4)

# Sanity checks
assert normalise_ingredient("Vaseline")["canonical"] == "PETROLATUM"
assert normalise_ingredient("White Petrolatum")["canonical"] == "PETROLATUM"
assert normalise_ingredient("Mineral Oil")["canonical"] == "PARAFFINUM LIQUIDUM"
assert normalise_ingredient("SLS")["canonical"] == "SODIUM LAURYL SULFATE"
assert normalise_ingredient("SLES")["canonical"] == "SODIUM LAURETH SULFATE"
assert normalise_ingredient("Aqua/Water/Eau")["canonical"] == "AQUA"
assert normalise_ingredient("Carmine")["canonical"] == "CI 75470"
assert normalise_ingredient("E120")["canonical"] == "CI 75470"
assert normalise_ingredient("Beeswax")["canonical"] == "CERA ALBA"
assert normalise_ingredient("Phenoxyethanol")["is_anchor"] == True
assert normalise_ingredient("Parfum")["is_anchor"] == True
assert normalise_ingredient("Vitamin C")["canonical"] == "ASCORBIC ACID"
assert normalise_ingredient("Vitamin B3")["canonical"] == "NIACINAMIDE"
assert normalise_ingredient("Cocoa Butter")["canonical"] == "THEOBROMA CACAO SEED BUTTER"
assert normalise_ingredient("Glycerine")["canonical"] == "GLYCERIN"
assert normalise_ingredient("Petroleum Jelly")["canonical"] == "PETROLATUM"
assert normalise_ingredient("MCT Oil")["canonical"] == "CAPRYLIC/CAPRIC TRIGLYCERIDE"
assert normalise_ingredient("E471")["canonical"] == "MONO AND DIGLYCERIDES OF FATTY ACIDS"
assert normalise_ingredient("MSG")["canonical"] == "MONOSODIUM GLUTAMATE"

print("")
print("ALL SANITY CHECKS PASSED")
print("Synonym database: " + str(len(SYNONYM_MAP)) + " entries")
print("Anchor ingredients: " + str(len(ANCHOR_INGREDIENTS)))
print("Normaliser ready")

"""

with open('/content/noura_normaliser.py', 'w', encoding='utf-8') as f:
    f.write(normaliser_code)

exec(normaliser_code)
print('Cell 40 complete - noura_normaliser.py written and loaded')
import sys, os
with open('/content/noura_normaliser.py', 'w') as _f:
    _f.write(open(os.path.abspath(__file__)).read() if '__file__' in dir() else "")

Running Normaliser self-test...

TEST 1: CeraVe Moisturising Cream (real INCI list)

  --------------------------------------------------------------
    1. [EXA] Aqua/Water/Eau                 -> AQUA
    2. [EXA] Glycerin                       -> GLYCERIN
    3. [EXA] Cetearyl Alcohol               -> CETEARYL ALCOHOL
    4. [EXA] Caprylic/Capric Triglyceride   -> CAPRYLIC/CAPRIC TRIGLYCE
    5. [EXA] Cetyl Alcohol                  -> CETYL ALCOHOL
    6. [EXA] Ceteareth-20                   -> CETEARETH-20
    7. [EXA] Petrolatum                     -> PETROLATUM
    8. [FAL] Potassium Phosphate            -> POTASSIUM PHOSPHATE
    9. [EXA] Ceramide NP                    -> CERAMIDE NP
   10. [EXA] Ceramide AP                    -> CERAMIDE AP
   11. [EXA] Ceramide EOP                   -> CERAMIDE EOP
   12. [EXA] Carbomer                       -> CARBOMER
   13. [EXA] Dimethicone                    -> DIMETHICONE
   14. [FAL] Behentrimonium Methosulfate    -> BEHENTRIMONIUM METHO

In [None]:
concentration_engine_code = """
import re
import sys
sys.path.insert(0, '/content')

# ============================================================
# SYNONYM DB (flat key -> canonical INCI)
# ============================================================
SYNONYM_DB = {
    "water":"aqua","eau":"aqua","aqua/water/eau":"aqua","purified water":"aqua",
    "deionized water":"aqua","distilled water":"aqua",
    "glycerine":"glycerin","glycerol":"glycerin","vegetable glycerin":"glycerin",
    "hyaluronic acid":"sodium hyaluronate","hyaluronan":"sodium hyaluronate","ha":"sodium hyaluronate",
    "petrolatum":"petrolatum","white petrolatum":"petrolatum",
    "petroleum jelly":"petrolatum","vaseline":"petrolatum",
    "paraffinum liquidum":"mineral oil","liquid paraffin":"mineral oil",
    "paraffin oil":"mineral oil","light mineral oil":"mineral oil","mineral oil":"mineral oil",
    "squalane":"squalane","squalene":"squalene",
    "isopropyl myristate":"isopropyl myristate","ipm":"isopropyl myristate",
    "caprylic/capric triglyceride":"caprylic/capric triglyceride","cct":"caprylic/capric triglyceride",
    "fractionated coconut oil":"caprylic/capric triglyceride",
    "alcohol":"alcohol denat","ethanol":"alcohol denat","denatured alcohol":"alcohol denat",
    "sd alcohol":"alcohol denat",
    "isopropanol":"isopropyl alcohol","rubbing alcohol":"isopropyl alcohol",
    "cetyl alcohol":"cetyl alcohol","cetearyl alcohol":"cetearyl alcohol",
    "sodium lauryl sulfate":"sodium lauryl sulfate","sls":"sodium lauryl sulfate",
    "sodium dodecyl sulfate":"sodium lauryl sulfate",
    "sodium laureth sulfate":"sodium laureth sulfate","sles":"sodium laureth sulfate",
    "cocamidopropyl betaine":"cocamidopropyl betaine","capb":"cocamidopropyl betaine",
    "ceteareth-20":"ceteareth-20",
    "polysorbate 20":"polysorbate-20","tween 20":"polysorbate-20",
    "lecithin":"lecithin","soy lecithin":"lecithin","sunflower lecithin":"lecithin",
    "dimethicone":"dimethicone","polydimethylsiloxane":"dimethicone",
    "cyclomethicone":"cyclopentasiloxane","cyclopentasiloxane":"cyclopentasiloxane","d5":"cyclopentasiloxane",
    "cyclotetrasiloxane":"cyclotetrasiloxane","d4":"cyclotetrasiloxane",
    "phenoxyethanol":"phenoxyethanol","2-phenoxyethanol":"phenoxyethanol",
    "methylparaben":"methylparaben","methyl paraben":"methylparaben",
    "methyl 4-hydroxybenzoate":"methylparaben",
    "ethylparaben":"ethylparaben","propylparaben":"propylparaben","propyl paraben":"propylparaben",
    "butylparaben":"butylparaben","butyl paraben":"butylparaben","isobutylparaben":"isobutylparaben",
    "benzyl alcohol":"benzyl alcohol","chlorphenesin":"chlorphenesin",
    "ethylhexylglycerin":"ethylhexylglycerin",
    "caprylyl glycol":"caprylyl glycol","1,2-octanediol":"caprylyl glycol",
    "dmdm hydantoin":"dmdm hydantoin","quaternium-15":"quaternium-15",
    "imidazolidinyl urea":"imidazolidinyl urea","diazolidinyl urea":"diazolidinyl urea",
    "sodium benzoate":"sodium benzoate","potassium sorbate":"potassium sorbate",
    "niacinamide":"niacinamide","nicotinamide":"niacinamide","vitamin b3":"niacinamide",
    "ascorbic acid":"ascorbic acid","vitamin c":"ascorbic acid","l-ascorbic acid":"ascorbic acid",
    "retinol":"retinol","vitamin a":"retinol",
    "tocopherol":"tocopherol","vitamin e":"tocopherol","alpha-tocopherol":"tocopherol",
    "tocopheryl acetate":"tocopheryl acetate","vitamin e acetate":"tocopheryl acetate",
    "panthenol":"panthenol","provitamin b5":"panthenol",
    "salicylic acid":"salicylic acid","bha":"salicylic acid",
    "glycolic acid":"glycolic acid","aha":"glycolic acid",
    "lactic acid":"lactic acid","azelaic acid":"azelaic acid","citric acid":"citric acid",
    "oxybenzone":"benzophenone-3","benzophenone-3":"benzophenone-3",
    "avobenzone":"butyl methoxydibenzoylmethane",
    "octinoxate":"ethylhexyl methoxycinnamate","ethylhexyl methoxycinnamate":"ethylhexyl methoxycinnamate",
    "zinc oxide":"zinc oxide","titanium dioxide":"titanium dioxide",
    "ci 77891":"titanium dioxide","ci 77947":"zinc oxide",
    "coconut oil":"cocos nucifera oil","cocos nucifera oil":"cocos nucifera oil",
    "cocos nucifera (coconut) oil":"cocos nucifera oil",
    "shea butter":"butyrospermum parkii butter","butyrospermum parkii butter":"butyrospermum parkii butter",
    "jojoba oil":"simmondsia chinensis seed oil","jojoba":"simmondsia chinensis seed oil",
    "argan oil":"argania spinosa kernel oil",
    "rosehip oil":"rosa canina fruit oil",
    "sunflower oil":"helianthus annuus seed oil",
    "olive oil":"olea europaea fruit oil",
    "sweet almond oil":"prunus amygdalus dulcis oil",
    "castor oil":"ricinus communis seed oil",
    "hemp seed oil":"cannabis sativa seed oil","hemp oil":"cannabis sativa seed oil",
    "cannabis sativa seed oil":"cannabis sativa seed oil",
    "palm oil":"elaeis guineensis oil",
    "rapeseed oil":"brassica napus seed oil","canola oil":"brassica napus seed oil",
    "colza oil":"brassica napus seed oil",
    "beeswax":"cera alba","cera alba":"cera alba","white beeswax":"cera alba",
    "cera flava":"cera alba","yellow beeswax":"cera alba",
    "lanolin":"lanolin","wool wax":"lanolin","wool fat":"lanolin","adeps lanae":"lanolin",
    "carmine":"ci 75470","ci 75470":"ci 75470","cochineal":"ci 75470",
    "carminic acid":"ci 75470","crimson lake":"ci 75470","natural red 4":"ci 75470",
    "carbomer":"carbomer","carbopol":"carbomer",
    "xanthan gum":"xanthan gum","xanthan":"xanthan gum",
    "disodium edta":"disodium edta","edta":"disodium edta","tetrasodium edta":"tetrasodium edta",
    "sodium hydroxide":"sodium hydroxide","naoh":"sodium hydroxide","lye":"sodium hydroxide",
    "potassium hydroxide":"potassium hydroxide","koh":"potassium hydroxide",
    "triethanolamine":"triethanolamine",
    "bht":"bht","butylated hydroxytoluene":"bht",
    "bha (antioxidant)":"bha","butylated hydroxyanisole":"bha",
    "fragrance":"fragrance","parfum":"fragrance","perfume":"fragrance","aroma":"fragrance",
    "flavor":"flavor","flavour":"flavor",
    "triclosan":"triclosan","chlorhexidine":"chlorhexidine",
    "sugar":"sucrose","cane sugar":"sucrose","beet sugar":"sucrose","table sugar":"sucrose",
    "glucose":"glucose","dextrose":"glucose",
    "high fructose corn syrup":"high fructose corn syrup","hfcs":"high fructose corn syrup",
    "maltodextrin":"maltodextrin",
    "salt":"sodium chloride","sea salt":"sodium chloride","table salt":"sodium chloride",
    "sodium chloride":"sodium chloride",
    "msg":"monosodium glutamate","monosodium glutamate":"monosodium glutamate","e621":"monosodium glutamate",
    "sodium bicarbonate":"sodium bicarbonate","baking soda":"sodium bicarbonate",
    "aspartame":"aspartame","e951":"aspartame",
    "sucralose":"sucralose",
    "stevia":"stevia rebaudiana extract","steviol glycosides":"stevia rebaudiana extract",
    "cocoa butter":"theobroma cacao seed butter",
    "cocoa mass":"theobroma cacao extract","cocoa powder":"theobroma cacao powder",
    "vanilla":"vanilla planifolia extract","vanilla extract":"vanilla planifolia extract",
    "oats":"avena sativa","oat flour":"avena sativa",
    "wheat flour":"triticum vulgare flour",
    "sodium hypochlorite":"sodium hypochlorite","bleach":"sodium hypochlorite",
    "hydrogen peroxide":"hydrogen peroxide",
    "acetic acid":"acetic acid","white vinegar":"acetic acid","vinegar":"acetic acid",
    "d-limonene":"limonene","limonene":"limonene",
    "tea tree oil":"melaleuca alternifolia leaf oil",
    "lavender oil":"lavandula angustifolia oil",
    "sodium carbonate":"sodium carbonate","washing soda":"sodium carbonate",
    "borax":"sodium tetraborate",
    "lauryl glucoside":"lauryl glucoside","alkyl polyglucoside":"lauryl glucoside",
}

ANCHOR_INGREDIENTS = {
    "phenoxyethanol","methylparaben","ethylparaben","propylparaben",
    "butylparaben","isobutylparaben","chlorphenesin","caprylyl glycol",
    "ethylhexylglycerin","benzyl alcohol","dmdm hydantoin","quaternium-15",
    "imidazolidinyl urea","sodium benzoate","potassium sorbate",
    "sodium hydroxide","potassium hydroxide","triethanolamine",
    "fragrance","flavor","disodium edta","tetrasodium edta",
    "sodium nitrate","sodium nitrite","bht","bha",
}

ZONE_MULTIPLIERS = {
    "ABOVE_1PCT": 1.00,
    "BELOW_1PCT": 0.40,
    "ANCHOR":     0.25,
    "UNKNOWN":    0.75,
}

def _clean(raw):
    s = raw.lower().strip()
    s = " ".join(s.split())
    for ch in ["*", ".", "/"]:
        s = s.rstrip(ch)
    return s.strip()

def normalise_ingredient(raw):
    if not raw or not isinstance(raw, str):
        return {"raw": raw, "canonical": "", "matched": False, "is_anchor": False}
    cleaned = _clean(raw)
    if cleaned in SYNONYM_DB:
        canonical = SYNONYM_DB[cleaned]
    else:
        stripped = cleaned.replace("certified organic ","").replace("organic ","")
        stripped = stripped.replace("natural ","").replace("plant-derived ","").replace("pure ","").replace("raw ","")
        canonical = SYNONYM_DB.get(stripped, stripped if stripped in SYNONYM_DB.values() else cleaned)
    return {"raw": raw, "canonical": canonical,
            "matched": canonical != cleaned or cleaned in SYNONYM_DB,
            "is_anchor": canonical in ANCHOR_INGREDIENTS}

def normalise_ingredient_list(raw_list):
    if not raw_list:
        return []
    norm = [normalise_ingredient(r) for r in raw_list]
    anchor_idx = next((i for i, x in enumerate(norm) if x["is_anchor"]), None)
    total = len(norm)
    result = []
    for i, item in enumerate(norm):
        if item["is_anchor"] and i == anchor_idx:       zone = "ANCHOR"
        elif anchor_idx is not None and i > anchor_idx: zone = "BELOW_1PCT"
        elif anchor_idx is None and i < total // 2:     zone = "ABOVE_1PCT"  # first half = likely concentrated
        elif anchor_idx is None and i >= total-3 and total > 5: zone = "BELOW_1PCT"
        elif anchor_idx is None:                         zone = "UNKNOWN"
        else:                                            zone = "ABOVE_1PCT"
        result.append({**item, "index": i, "zone": zone, "multiplier": ZONE_MULTIPLIERS[zone]})
    return result

def get_concentration_zone(ingredient_name, normalised_list):
    target = normalise_ingredient(ingredient_name)["canonical"]
    return next((x for x in normalised_list if x["canonical"] == target), None)

# ============================================================
# HAZARD DATABASE
# ============================================================
HAZARD_DB = {
    "ci 75470":       {"name":"Carmine","severity_base":25,"hazard_type":"allergen_sensitiser","evidence_tier":2,"regulatory":["EU_labelling_required"],"safe_threshold_pct":None,"note":"Known sensitiser; anaphylaxis risk; animal-derived"},
    "benzophenone-3": {"name":"Oxybenzone","severity_base":28,"hazard_type":"endocrine_disruptor","evidence_tier":3,"regulatory":["Hawaii_banned","EU_restricted_0.5pct"],"safe_threshold_pct":0.5,"note":"Endocrine disruption in animal studies; EU restricted to 0.5%"},
    "butyl methoxydibenzoylmethane": {"name":"Avobenzone","severity_base":20,"hazard_type":"photoinstability_allergen","evidence_tier":3,"regulatory":["EU_allowed_3pct"],"safe_threshold_pct":3.0,"note":"Photodegrades to harmful byproducts; allergen"},
    "triclosan":      {"name":"Triclosan","severity_base":32,"hazard_type":"endocrine_disruptor","evidence_tier":2,"regulatory":["FDA_banned_OTC_soaps","EU_restricted_0.3pct"],"safe_threshold_pct":0.3,"note":"FDA banned in OTC soaps 2016; endocrine disruption confirmed"},
    "dmdm hydantoin": {"name":"DMDM Hydantoin","severity_base":28,"hazard_type":"formaldehyde_releaser","evidence_tier":2,"regulatory":["EU_restricted","IARC_group1_formaldehyde"],"safe_threshold_pct":0.6,"note":"Releases formaldehyde (IARC Group 1 carcinogen)"},
    "quaternium-15":  {"name":"Quaternium-15","severity_base":30,"hazard_type":"formaldehyde_releaser","evidence_tier":2,"regulatory":["EU_banned_cosmetics"],"safe_threshold_pct":None,"note":"Banned in EU cosmetics; formaldehyde releaser"},
    "sodium lauryl sulfate": {"name":"SLS","severity_base":18,"hazard_type":"irritant","evidence_tier":2,"regulatory":["EU_allowed_restricted"],"safe_threshold_pct":1.0,"note":"Skin barrier disruption above 1%; mucosal irritant"},
    "bha":            {"name":"BHA","severity_base":22,"hazard_type":"endocrine_disruptor","evidence_tier":3,"regulatory":["IARC_possible_carcinogen","EU_restricted_food"],"safe_threshold_pct":0.02,"note":"IARC 2B possible carcinogen; endocrine disruption"},
    "bht":            {"name":"BHT","severity_base":15,"hazard_type":"potential_endocrine_disruptor","evidence_tier":4,"regulatory":["EU_allowed","EFSA_ADI_set"],"safe_threshold_pct":0.1,"note":"Weak endocrine disruption in vitro"},
    "ethylhexyl methoxycinnamate": {"name":"Octinoxate","severity_base":20,"hazard_type":"endocrine_disruptor","evidence_tier":3,"regulatory":["Hawaii_banned_reef","EU_allowed_7.5pct"],"safe_threshold_pct":7.5,"note":"Thyroid disruption in animal studies; reef toxic"},
    "petrolatum":     {"name":"Petrolatum","severity_base":14,"hazard_type":"contamination_risk","evidence_tier":3,"regulatory":["EU_restricted_purity_required"],"safe_threshold_pct":None,"note":"EU requires purity cert; PAH contamination risk in unpurified grades"},
    "mineral oil":    {"name":"Mineral Oil","severity_base":12,"hazard_type":"contamination_risk","evidence_tier":3,"regulatory":["EU_restricted_cosmetics","IARC_MOSH_group1"],"safe_threshold_pct":None,"note":"IARC Group 1 for untreated mineral oils; refined grades lower risk"},
    "fragrance":      {"name":"Fragrance/Parfum","severity_base":12,"hazard_type":"undisclosed_allergen","evidence_tier":2,"regulatory":["EU_26_allergens_labelling"],"safe_threshold_pct":None,"note":"Opaque blend; may contain 200+ undisclosed chemicals; 26 EU-regulated allergens"},
    "propylene glycol":{"name":"Propylene Glycol","severity_base":8,"hazard_type":"irritant_sensitiser","evidence_tier":2,"regulatory":["GRAS_food","EU_allowed_cosmetics"],"safe_threshold_pct":5.0,"note":"Skin irritant above 5%; sensitiser in susceptible individuals"},
    "methylparaben":  {"name":"Methylparaben","severity_base":10,"hazard_type":"weak_endocrine_disruptor","evidence_tier":4,"regulatory":["EU_allowed_0.4pct"],"safe_threshold_pct":0.4,"note":"Weak oestrogenic activity in vitro; EU allowed at 0.4%"},
    "propylparaben":  {"name":"Propylparaben","severity_base":12,"hazard_type":"endocrine_disruptor","evidence_tier":3,"regulatory":["EU_restricted_0.14pct","Denmark_banned_children"],"safe_threshold_pct":0.14,"note":"Stronger endocrine signal; EU restricted; Denmark banned in children"},
    "butylparaben":   {"name":"Butylparaben","severity_base":15,"hazard_type":"endocrine_disruptor","evidence_tier":3,"regulatory":["EU_restricted_0.14pct","Denmark_banned_under3"],"safe_threshold_pct":0.14,"note":"Detected in breast tissue; EU restricted; Denmark banned under-3"},
    "dimethicone":    {"name":"Dimethicone","severity_base":6,"hazard_type":"environmental_persistence","evidence_tier":4,"regulatory":["EU_D4_D5_restricted_wash_off"],"safe_threshold_pct":None,"note":"Low human health concern; D4/D5 restricted in EU wash-off"},
    "cyclopentasiloxane": {"name":"Cyclopentasiloxane (D5)","severity_base":14,"hazard_type":"environmental_persistence","evidence_tier":3,"regulatory":["EU_banned_wash_off_above_0.1pct"],"safe_threshold_pct":0.1,"note":"EU banned in wash-off cosmetics >0.1%; PBT substance"},
    "alcohol denat":  {"name":"Denatured Alcohol","severity_base":10,"hazard_type":"irritant_barrier_disruptor","evidence_tier":2,"regulatory":["GRAS"],"safe_threshold_pct":5.0,"note":"Damages skin barrier at high concentrations; drying"},
    "cera alba":      {"name":"Beeswax","severity_base":8,"hazard_type":"animal_derived_allergen","evidence_tier":4,"regulatory":[],"safe_threshold_pct":None,"note":"Animal-derived; contact sensitiser in susceptible individuals"},
    "lanolin":        {"name":"Lanolin","severity_base":12,"hazard_type":"allergen","evidence_tier":2,"regulatory":["EU_labelling_recommended"],"safe_threshold_pct":None,"note":"Contact allergen affecting ~1.7% of population; pesticide residue risk"},
    "phenoxyethanol": {"name":"Phenoxyethanol","severity_base":8,"hazard_type":"irritant","evidence_tier":2,"regulatory":["EU_allowed_1pct","France_restricted_nursing"],"safe_threshold_pct":1.0,"note":"France restricted in nursing products; irritant above 1%"},
    "benzyl alcohol": {"name":"Benzyl Alcohol","severity_base":10,"hazard_type":"allergen","evidence_tier":2,"regulatory":["EU_26_allergens_labelling"],"safe_threshold_pct":0.001,"note":"EU 26 regulated allergens; must label in leave-on above 0.001%"},
    "imidazolidinyl urea": {"name":"Imidazolidinyl Urea","severity_base":18,"hazard_type":"formaldehyde_releaser","evidence_tier":2,"regulatory":["EU_restricted"],"safe_threshold_pct":0.6,"note":"Formaldehyde releaser; slower than DMDM hydantoin"},
    "sucrose":        {"name":"Sugar","severity_base":10,"hazard_type":"metabolic_concern","evidence_tier":1,"regulatory":["WHO_free_sugar_guideline"],"safe_threshold_pct":None,"note":"WHO recommends <10% daily calories from free sugars; dental risk"},
    "high fructose corn syrup": {"name":"HFCS","severity_base":18,"hazard_type":"metabolic_concern","evidence_tier":1,"regulatory":["WHO_added_sugar_limit"],"safe_threshold_pct":None,"note":"Associated with metabolic syndrome, fatty liver, insulin resistance"},
    "aspartame":      {"name":"Aspartame","severity_base":16,"hazard_type":"possible_carcinogen","evidence_tier":2,"regulatory":["IARC_group2B_2023","EFSA_ADI_maintained"],"safe_threshold_pct":None,"note":"IARC reclassified 2B possible carcinogen 2023; EFSA maintains ADI"},
    "sodium hypochlorite": {"name":"Bleach","severity_base":20,"hazard_type":"irritant_corrosive","evidence_tier":1,"regulatory":["EPA_registered_disinfectant"],"safe_threshold_pct":0.5,"note":"Respiratory and dermal irritant; corrosive at concentration"},
    "limonene":       {"name":"Limonene","severity_base":8,"hazard_type":"allergen_oxidation","evidence_tier":2,"regulatory":["EU_26_allergens_labelling"],"safe_threshold_pct":None,"note":"EU regulated allergen; oxidises to potent sensitisers on air exposure"},
    "sodium laureth sulfate": {"name":"SLES","severity_base":8,"hazard_type":"irritant","evidence_tier":2,"regulatory":["EU_allowed"],"safe_threshold_pct":2.0,"note":"Milder than SLS; 1,4-dioxane contamination risk"},
    "glycerin":       {"name":"Glycerin","severity_base":2,"hazard_type":"low_concern","evidence_tier":5,"regulatory":["GRAS","EU_allowed"],"safe_threshold_pct":None,"note":"Generally safe; low concern; source variability (plant vs animal)"},
    "sodium hyaluronate": {"name":"Sodium Hyaluronate","severity_base":2,"hazard_type":"low_concern","evidence_tier":5,"regulatory":["EU_allowed"],"safe_threshold_pct":None,"note":"Generally safe; mostly biofermentation-derived"},
    "monosodium glutamate": {"name":"MSG","severity_base":6,"hazard_type":"sensitivity_concern","evidence_tier":2,"regulatory":["FDA_GRAS","EFSA_safe"],"safe_threshold_pct":None,"note":"GRAS; robust evidence of safety at normal intakes; small subset reports sensitivity"},
}

# ============================================================
# CONCENTRATION ENGINE
# ============================================================

def apply_concentration_multiplier(canonical, severity_base, normalised_list):
    zone_info  = get_concentration_zone(canonical, normalised_list)
    multiplier = zone_info["multiplier"] if zone_info else ZONE_MULTIPLIERS["UNKNOWN"]
    zone       = zone_info["zone"]       if zone_info else "UNKNOWN"
    return {"canonical": canonical, "severity_base": severity_base,
            "zone": zone, "multiplier": multiplier,
            "severity_adj": round(severity_base * multiplier, 1),
            "attenuated": (severity_base * multiplier) < severity_base}

def score_ingredients_with_concentration(raw_list, category="cosmetics"):
    if not raw_list:
        return {"scored":[], "total_penalty":0, "flags":[], "ingredient_count":0, "flagged_count":0}
    normalised    = normalise_ingredient_list(raw_list)
    total_penalty = 0
    flags         = []
    scored        = []
    tier_labels   = {1:"RCT/meta",2:"Cohort",3:"Case-ctrl",4:"In vitro",5:"Reg flag",6:"Precaution"}
    for item in normalised:
        canonical = item["canonical"]
        hazard    = HAZARD_DB.get(canonical)
        if hazard is None:
            scored.append({**item, "hazard":None, "severity_base":0, "severity_adj":0, "flag":None})
            continue
        base = hazard["severity_base"]
        conc = apply_concentration_multiplier(canonical, base, normalised)
        # Zero out if below known safe threshold
        threshold = hazard.get("safe_threshold_pct")
        if threshold and conc["zone"] in ("BELOW_1PCT","ANCHOR") and threshold >= 1.0:
            conc["severity_adj"] = 0
            conc["zone"] = "BELOW_SAFE_THRESHOLD"
        total_penalty += conc["severity_adj"]
        flag = {"ingredient": hazard["name"], "canonical": canonical,
                "zone": conc["zone"], "severity_base": base,
                "severity_adj": conc["severity_adj"], "multiplier": conc["multiplier"],
                "hazard_type": hazard["hazard_type"],
                "evidence_tier": hazard["evidence_tier"],
                "tier_label": tier_labels.get(hazard["evidence_tier"], "?"),
                "regulatory": hazard["regulatory"], "note": hazard["note"],
                "attenuated": conc["attenuated"]}
        flags.append(flag)
        scored.append({**item, "hazard": hazard["hazard_type"],
                       "severity_base": base, "severity_adj": conc["severity_adj"],
                       "flag": flag})
    return {"scored": scored, "total_penalty": round(total_penalty, 1),
            "flags": sorted(flags, key=lambda x: -x["severity_adj"]),
            "ingredient_count": len(raw_list), "flagged_count": len(flags)}

def health_score_from_ingredients(raw_list, category="cosmetics", base_score=100):
    result  = score_ingredients_with_concentration(raw_list, category)
    penalty = min(result["total_penalty"], base_score)
    score   = max(0, min(100, round(base_score - penalty)))
    if score >= 85:   verdict = "CLEAN"
    elif score >= 70: verdict = "GOOD"
    elif score >= 55: verdict = "ACCEPTABLE"
    elif score >= 40: verdict = "CAUTION"
    else:             verdict = "HIGHER_RISK"
    return {"health_score": score, "score": score, "verdict": verdict,
            "confidence": "HIGH" if result["flagged_count"] > 0 else "MEDIUM",
            "total_penalty": result["total_penalty"],
            "flags": [f["note"] + " [" + f["zone"] + " x" + str(f["multiplier"]) + "]"
                      for f in result["flags"]],
            "detailed_flags": result["flags"],
            "ingredient_count": result["ingredient_count"],
            "flagged_count": result["flagged_count"]}

# ============================================================
# SELF-TEST
# ============================================================
print("Concentration engine loaded")
print("  Synonym entries:   " + str(len(SYNONYM_DB)))
print("  Anchor ingredients:" + str(len(ANCHOR_INGREDIENTS)))
print("  Hazard DB entries: " + str(len(HAZARD_DB)))
print()
print("=" * 62)
print("  CELL 41 SELF-TEST: CONCENTRATION ESTIMATOR")
print("=" * 62)

# TEST 1: CeraVe - petrolatum before anchor = full penalty
print()
print("TEST 1: Petrolatum ABOVE_1PCT = full penalty")
cerave = [
    "aqua","glycerin","cetearyl alcohol","caprylic/capric triglyceride",
    "cetyl alcohol","ceteareth-20","petrolatum","potassium phosphate",
    "ceramide np","ceramide ap","ceramide eop","carbomer","dimethicone",
    "behentrimonium methosulfate","sodium lauroyl lactylate",
    "sodium hyaluronate","cholesterol","phenoxyethanol",
    "disodium edta","dipotassium phosphate","tocopherol",
    "phytosphingosine","xanthan gum","ethylhexylglycerin",
]
r = health_score_from_ingredients(cerave)
n = normalise_ingredient_list(cerave)
z = get_concentration_zone("petrolatum", n)
assert z["zone"] == "ABOVE_1PCT", "petrolatum should be ABOVE_1PCT"
assert z["multiplier"] == 1.0
pet_flag = next((f for f in r["detailed_flags"] if f["canonical"] == "petrolatum"), None)
assert pet_flag is not None, "petrolatum not flagged"
assert pet_flag["severity_adj"] == 14.0, "Expected 14.0, got " + str(pet_flag["severity_adj"])
print("  PASS: petrolatum -> ABOVE_1PCT, penalty=14.0 (full)")
print("  CeraVe health score: " + str(r["health_score"]) + "/100 [" + r["verdict"] + "]")

# TEST 2: Carmine before vs after anchor - penalty difference
print()
print("TEST 2: Carmine ABOVE_1PCT vs BELOW_1PCT")
above_anchor = ["aqua","glycerin","ci 75470","phenoxyethanol","fragrance"]
below_anchor = ["aqua","glycerin","phenoxyethanol","fragrance","ci 75470"]
r_above = health_score_from_ingredients(above_anchor)
r_below = health_score_from_ingredients(below_anchor)
carm_above = next(f for f in r_above["detailed_flags"] if f["canonical"] == "ci 75470")
carm_below = next(f for f in r_below["detailed_flags"] if f["canonical"] == "ci 75470")
assert carm_above["severity_adj"] == 25.0, "Expected 25.0, got " + str(carm_above["severity_adj"])
assert carm_below["severity_adj"] == 10.0, "Expected 10.0, got " + str(carm_below["severity_adj"])
print("  PASS: carmine ABOVE_1PCT -> penalty=25.0")
print("  PASS: carmine BELOW_1PCT -> penalty=10.0 (60% reduction)")
print("  Score impact: " + str(r_above["health_score"]) + " vs " + str(r_below["health_score"]))

# TEST 3: SLS safe below 1%
print()
print("TEST 3: SLS below safe threshold = zero penalty")
trace_sls = ["aqua","cocamidopropyl betaine","glycerin","phenoxyethanol","sodium lauryl sulfate","citric acid"]
r_sls = health_score_from_ingredients(trace_sls)
sls_flag = next((f for f in r_sls["detailed_flags"] if f["canonical"] == "sodium lauryl sulfate"), None)
assert sls_flag is not None, "SLS should be flagged"
assert sls_flag["severity_adj"] == 0, "SLS below safe threshold should be 0, got " + str(sls_flag["severity_adj"])
print("  PASS: SLS below 1% threshold -> penalty=0.0 (safe at trace)")

# TEST 4: Evidence tier labels
print()
print("TEST 4: Evidence tiers")
r_tri = health_score_from_ingredients(["aqua","triclosan","phenoxyethanol"])
tri = next(f for f in r_tri["detailed_flags"] if f["canonical"] == "triclosan")
assert tri["evidence_tier"] == 2
assert tri["tier_label"]    == "Cohort"
assert tri["severity_adj"]  == 32.0  # triclosan before anchor
print("  PASS: Triclosan -> tier=2 (Cohort), penalty=32.0")

# TEST 5: Score ordering
print()
print("TEST 5: Score ordering sanity")
dr_bronner = health_score_from_ingredients([
    "water","coconut oil","potassium hydroxide","glycerin","hemp oil"
], category="cleaning")
chanel = health_score_from_ingredients([
    "aqua","glycerin","cetearyl alcohol","lanolin","isopropyl myristate",
    "cetyl alcohol","phenoxyethanol","fragrance","benzyl alcohol","cera alba","ci 75470"
])
assert dr_bronner["health_score"] > chanel["health_score"]
print("  PASS: Dr Bronner " + str(dr_bronner["health_score"]) + " > Chanel " + str(chanel["health_score"]))

# TEST 6: Before/after comparison display
print()
print("TEST 6: Before/after impact summary")
print()
print("  Product: Chanel No5 Body Lotion")
print("  " + "Ingredient".ljust(26) + "Base".rjust(5) + "  Zone              " + "Adj".rjust(5) + "  Change")
print("  " + "-" * 68)
for flag in chanel["detailed_flags"]:
    change = round(flag["severity_adj"] - flag["severity_base"], 1)
    change_str = str(change) if change < 0 else ("0" if change == 0 else "+" + str(change))
    print("  " + flag["ingredient"][:25].ljust(26) +
          str(flag["severity_base"]).rjust(5) + "  " +
          flag["zone"].ljust(19) +
          str(flag["severity_adj"]).rjust(5) + "  " + change_str)
print()
print("  Final health score: " + str(chanel["health_score"]) + "/100 [" + chanel["verdict"] + "]")

print()
print("ALL CELL 41 TESTS PASSED")
print()
print("Concentration multiplier impact:")
print("  ABOVE_1PCT -> x1.00 (full penalty)")
print("  BELOW_1PCT -> x0.40 (attenuated)")
print("  ANCHOR     -> x0.25 (minimal)")
print("  BELOW_SAFE -> x0.00 (zero - ingredient safe at this level)")

"""

with open('/content/noura_concentration_engine.py', 'w', encoding='utf-8') as f:
    f.write(concentration_engine_code)

exec(concentration_engine_code)
print('Cell 41 complete - noura_concentration_engine.py written and loaded')

Concentration engine loaded
  Synonym entries:   215
  Anchor ingredients:26
  Hazard DB entries: 34

  CELL 41 SELF-TEST: CONCENTRATION ESTIMATOR

TEST 1: Petrolatum ABOVE_1PCT = full penalty
  PASS: petrolatum -> ABOVE_1PCT, penalty=14.0 (full)
  CeraVe health score: 76/100 [GOOD]

TEST 2: Carmine ABOVE_1PCT vs BELOW_1PCT
  PASS: carmine ABOVE_1PCT -> penalty=25.0
  PASS: carmine BELOW_1PCT -> penalty=10.0 (60% reduction)
  Score impact: 68 vs 83

TEST 3: SLS below safe threshold = zero penalty
  PASS: SLS below 1% threshold -> penalty=0.0 (safe at trace)

TEST 4: Evidence tiers
  PASS: Triclosan -> tier=2 (Cohort), penalty=32.0

TEST 5: Score ordering sanity
  PASS: Dr Bronner 99 > Chanel 64

TEST 6: Before/after impact summary

  Product: Chanel No5 Body Lotion
  Ingredient                 Base  Zone                Adj  Change
  --------------------------------------------------------------------
  Lanolin                      12  ABOVE_1PCT          12.0  0
  Carmine              

In [None]:
health_engine_code = """
import sys
import re
sys.path.insert(0, '/content')

# ============================================================
# CELL 42 - HEALTH ENGINE (WIRED)
# Replaces the stubbed health score in Cell 39's API wrapper.
# Depends on: noura_concentration_engine.py (Cell 41)
# Imports: health_score_from_ingredients, HAZARD_DB, SYNONYM_DB
# ============================================================

# --- Load concentration engine if not already in scope ---
try:
    health_score_from_ingredients
    score_ingredients_with_concentration
    HAZARD_DB
    _cell41_loaded = True
except NameError:
    _cell41_loaded = False
    try:
        exec(open('/content/noura_concentration_engine.py').read())
        _cell41_loaded = True
    except FileNotFoundError:
        print("WARNING: noura_concentration_engine.py not found at /content/")
        print("  Run Cell 41 first, then re-run this cell.")
        _cell41_loaded = False


# ============================================================
# CATEGORY-SPECIFIC SCORING RULES
# Base scores differ by category - food is held to tighter
# standards than cleaning products, for example.
# ============================================================

CATEGORY_CONFIG = {
    "cosmetics": {
        "base_score":        100,
        "max_penalty":        80,
        "bonus_clean_label":   5,
        "bonus_short_list":    3,
        "threshold_excellent": 85,
        "threshold_good":      70,
        "threshold_acceptable":55,
        "threshold_caution":   40,
    },
    "food": {
        "base_score":        100,
        "max_penalty":        85,
        "bonus_clean_label":   5,
        "bonus_short_list":    0,  # food: short list is not a health signal
        "threshold_excellent": 85,
        "threshold_good":      70,
        "threshold_acceptable":55,
        "threshold_caution":   40,
    },
    "cleaning": {
        "base_score":        100,
        "max_penalty":        70,
        "bonus_clean_label":   0,
        "bonus_short_list":    2,
        "threshold_excellent": 80,
        "threshold_good":      65,
        "threshold_acceptable":50,
        "threshold_caution":   35,
    },
    "baby": {
        "base_score":        100,
        "max_penalty":        75,
        "bonus_clean_label":   8,
        "bonus_short_list":    5,
        "threshold_excellent": 90,
        "threshold_good":      75,
        "threshold_acceptable":60,
        "threshold_caution":   45,
        "baby_penalty_multiplier": 1.5,
    },
}

# ============================================================
# POSITIVE SIGNAL DATABASE
# Ingredients that boost health score slightly
# ============================================================

POSITIVE_SIGNALS = {
    "simmondsia chinensis seed oil": {"bonus": 2, "reason": "Cold-pressed jojoba; excellent skin compatibility"},
    "butyrospermum parkii butter":   {"bonus": 2, "reason": "Unrefined shea butter; rich in fatty acids"},
    "cocos nucifera oil":            {"bonus": 1, "reason": "Coconut oil; antimicrobial MCT content"},
    "argania spinosa kernel oil":    {"bonus": 2, "reason": "Argan oil; high vitamin E and oleic acid"},
    "rosa canina fruit oil":         {"bonus": 3, "reason": "Rosehip oil; high trans-retinoic acid; skin regeneration"},
    "squalane":                      {"bonus": 2, "reason": "Plant-derived squalane; excellent biocompatibility"},
    "sodium hyaluronate":            {"bonus": 2, "reason": "Biofermented HA; proven hydration at all skin depths"},
    "niacinamide":                   {"bonus": 3, "reason": "Niacinamide; extensive RCT evidence for barrier repair"},
    "panthenol":                     {"bonus": 2, "reason": "Provitamin B5; wound healing and barrier support"},
    "tocopherol":                    {"bonus": 2, "reason": "Natural vitamin E; antioxidant protection"},
    "ascorbic acid":                 {"bonus": 2, "reason": "Vitamin C; collagen synthesis; antioxidant"},
    "ceramide np":                   {"bonus": 3, "reason": "Ceramide NP; skin barrier restoration"},
    "ceramide ap":                   {"bonus": 2, "reason": "Ceramide AP; structural barrier lipid"},
    "ceramide eop":                  {"bonus": 2, "reason": "Ceramide EOP; barrier cohesion"},
    "glycerin":                      {"bonus": 1, "reason": "Glycerin; effective humectant; long safety record"},
    "caprylic/capric triglyceride":  {"bonus": 1, "reason": "MCT; lightweight emollient; excellent stability"},
    "cannabis sativa seed oil":      {"bonus": 2, "reason": "Hemp seed oil; optimal omega 3:6 ratio"},
    "helianthus annuus seed oil":    {"bonus": 1, "reason": "Sunflower oil; high linoleic acid; barrier repair"},
    "aloe barbadensis leaf juice":   {"bonus": 2, "reason": "Aloe vera; anti-inflammatory; wound healing"},
    "olea europaea fruit oil":       {"bonus": 1, "reason": "Olive oil; squalene and oleic acid content"},
    "stevia rebaudiana extract":     {"bonus": 3, "reason": "Natural zero-calorie sweetener; no metabolic concern"},
    "avena sativa":                  {"bonus": 2, "reason": "Oats; beta-glucan; anti-inflammatory; high fiber"},
    "curcuma longa root extract":    {"bonus": 2, "reason": "Turmeric/curcumin; anti-inflammatory evidence"},
    "vanilla planifolia extract":    {"bonus": 1, "reason": "Natural vanilla; antioxidant vanillin"},
}


# ============================================================
# MAIN HEALTH ENGINE
# ============================================================

def calculate_health_score(product_name, ingredients, category="cosmetics"):
    if not _cell41_loaded:
        return {
            "health_score":  50,
            "score":         50,
            "verdict":       "UNKNOWN",
            "confidence":    "LOW",
            "engine":        "stub",
            "flags":         ["Cell 41 (concentration engine) not loaded - run Cell 41 first"],
            "detailed_flags":[],
            "ingredient_count": len(ingredients),
            "flagged_count": 0,
        }

    if not ingredients:
        return {
            "health_score":  50,
            "score":         50,
            "verdict":       "INSUFFICIENT_DATA",
            "confidence":    "LOW",
            "engine":        "cell42_live",
            "flags":         ["No ingredient list provided"],
            "detailed_flags":[],
            "ingredient_count": 0,
            "flagged_count": 0,
        }

    cfg = CATEGORY_CONFIG.get(category, CATEGORY_CONFIG["cosmetics"])

    # Step 1: Run concentration-aware ingredient scoring
    result = score_ingredients_with_concentration(ingredients, category)
    penalty = min(result["total_penalty"], cfg["max_penalty"])

    # Step 2: Apply baby multiplier if applicable
    if category == "baby" and result["total_penalty"] > 0:
        baby_mult = cfg.get("baby_penalty_multiplier", 1.5)
        penalty = min(result["total_penalty"] * baby_mult, cfg["max_penalty"])

    # Step 3: Positive signal bonuses
    from noura_concentration_engine import normalise_ingredient_list, get_concentration_zone
    normalised   = normalise_ingredient_list(ingredients)
    total_bonus  = 0
    bonuses_applied = []
    for item in normalised:
        sig = POSITIVE_SIGNALS.get(item["canonical"])
        if sig:
            bonus = sig["bonus"]
            if item["zone"] in ("ABOVE_1PCT", "UNKNOWN"):
                total_bonus += bonus
                bonuses_applied.append({"ingredient": item["canonical"],
                                        "bonus": bonus, "reason": sig["reason"]})
            elif item["zone"] == "BELOW_1PCT":
                total_bonus += round(bonus * 0.4, 1)
                bonuses_applied.append({"ingredient": item["canonical"],
                                        "bonus": round(bonus * 0.4, 1),
                                        "reason": sig["reason"] + " (trace level)"})

    total_bonus = min(total_bonus, 10)

    # Step 4: Short ingredient list bonus (transparency signal)
    n = len(ingredients)
    if n <= 8:
        total_bonus += cfg["bonus_short_list"]

    # Step 5: Calculate final score
    base  = cfg["base_score"]
    score = max(0, min(100, round(base - penalty + total_bonus)))

    # Step 6: Verdict
    t = cfg
    if score >= t["threshold_excellent"]:   verdict = "EXCELLENT"
    elif score >= t["threshold_good"]:      verdict = "GOOD"
    elif score >= t["threshold_acceptable"]:verdict = "ACCEPTABLE"
    elif score >= t["threshold_caution"]:   verdict = "CAUTION"
    else:                                   verdict = "HIGH_RISK"

    # Step 7: Confidence
    if result["flagged_count"] >= 2:
        confidence = "HIGH"
    elif result["flagged_count"] == 1 or len(ingredients) >= 5:
        confidence = "MEDIUM"
    else:
        confidence = "LOW"

    # Step 8: Human-readable flags
    flags = []
    for f in result["flags"]:
        attn = " [ATTENUATED - trace level]" if f["attenuated"] and f["severity_adj"] < f["severity_base"] else ""
        flags.append(f["ingredient"] + ": " + f["note"] + attn)

    return {
        "health_score":     score,
        "score":            score,
        "verdict":          verdict,
        "confidence":       confidence,
        "engine":           "cell42_live",
        "total_penalty":    round(penalty, 1),
        "total_bonus":      total_bonus,
        "base_score":       base,
        "flags":            flags,
        "detailed_flags":   result["flags"],
        "bonuses":          bonuses_applied,
        "ingredient_count": result["ingredient_count"],
        "flagged_count":    result["flagged_count"],
        "category":         category,
    }


# ============================================================
# SELF-TEST
# ============================================================
print("Health engine loaded (Cell 42 - LIVE)")
print("  Category configs:   " + str(len(CATEGORY_CONFIG)))
print("  Positive signals:   " + str(len(POSITIVE_SIGNALS)))
print("  Concentration engine: " + ("LIVE" if _cell41_loaded else "NOT LOADED - stubs active"))
print()
print("=" * 62)
print("  CELL 42 SELF-TEST: HEALTH ENGINE (WIRED)")
print("=" * 62)

# ---- Test 1: Dr Bronner's Pure Castile Soap ----
print()
print("TEST 1: Dr Bronner's Pure Castile Soap")
dr_bronner_ing = [
    "water", "coconut oil", "potassium hydroxide",
    "hemp oil", "jojoba oil", "citric acid", "tocopherol"
]
r1 = calculate_health_score("Dr Bronner's Pure Castile Soap", dr_bronner_ing, "cosmetics")
print("  Score:      " + str(r1["health_score"]) + "/100 [" + r1["verdict"] + "]")
print("  Confidence: " + r1["confidence"])
print("  Penalty:    " + str(r1["total_penalty"]) + "  Bonus: " + str(r1["total_bonus"]))
print("  Engine:     " + r1["engine"])
assert r1["health_score"] >= 85, "Dr Bronner should be EXCELLENT, got " + str(r1["health_score"])
assert r1["engine"] == "cell42_live"
print("  PASS: Dr Bronner EXCELLENT (>= 85)")

# ---- Test 2: CeraVe Moisturising Cream ----
print()
print("TEST 2: CeraVe Moisturising Cream")
cerave_ing = [
    "aqua", "glycerin", "cetearyl alcohol", "caprylic/capric triglyceride",
    "cetyl alcohol", "ceteareth-20", "petrolatum", "potassium phosphate",
    "ceramide np", "ceramide ap", "ceramide eop", "carbomer", "dimethicone",
    "behentrimonium methosulfate", "sodium lauroyl lactylate",
    "sodium hyaluronate", "cholesterol", "phenoxyethanol",
    "disodium edta", "dipotassium phosphate", "tocopherol",
    "phytosphingosine", "xanthan gum", "ethylhexylglycerin",
]
r2 = calculate_health_score("CeraVe Moisturising Cream", cerave_ing, "cosmetics")
print("  Score:      " + str(r2["health_score"]) + "/100 [" + r2["verdict"] + "]")
print("  Penalty:    " + str(r2["total_penalty"]) + "  Bonus: " + str(r2["total_bonus"]))
print("  Flagged:    " + str(r2["flagged_count"]) + " ingredients")
assert r2["health_score"] >= 70, "CeraVe should be at least GOOD, got " + str(r2["health_score"])
assert r2["health_score"] < r1["health_score"], "CeraVe should score below Dr Bronner"
print("  PASS: CeraVe " + str(r2["health_score"]) + " < Dr Bronner " + str(r1["health_score"]))

# ---- Test 3: Baby category applies stricter scoring ----
print()
print("TEST 3: Baby category penalty multiplier")
baby_wipes_ing = ["aqua", "phenoxyethanol", "methylparaben", "fragrance", "glycerin"]
r_cosmetic = calculate_health_score("Generic Wipes", baby_wipes_ing, "cosmetics")
r_baby     = calculate_health_score("Baby Wipes",    baby_wipes_ing, "baby")
print("  Same ingredients as cosmetics: " + str(r_cosmetic["health_score"]) + "/100")
print("  Same ingredients as baby:      " + str(r_baby["health_score"])     + "/100")
assert r_baby["health_score"] < r_cosmetic["health_score"], "Baby should score stricter"
print("  PASS: baby scoring stricter than cosmetics (" +
      str(r_baby["health_score"]) + " vs " + str(r_cosmetic["health_score"]) + ")")

# ---- Test 4: Positive bonuses fire ----
print()
print("TEST 4: Positive signal bonuses")
clean_serum = [
    "aqua", "niacinamide", "sodium hyaluronate",
    "simmondsia chinensis seed oil", "panthenol",
    "tocopherol", "citric acid", "potassium sorbate"
]
r4 = calculate_health_score("Clean Serum", clean_serum, "cosmetics")
print("  Score:       " + str(r4["health_score"]) + "/100 [" + r4["verdict"] + "]")
print("  Bonuses:     " + str(r4["total_bonus"]) + " pts from " + str(len(r4["bonuses"])) + " positive signals")
for b in r4["bonuses"]:
    print("    +" + str(b["bonus"]) + " " + b["ingredient"] + " (" + b["reason"][:50] + "...)")
assert r4["total_bonus"] > 0, "Should have positive bonuses"
assert r4["health_score"] >= 85, "Clean serum should be EXCELLENT"
print("  PASS: positive bonuses applied correctly")

# ---- Test 5: Food category scoring ----
print()
print("TEST 5: Food category - HFCS product")
cola_ing = [
    "water", "high fructose corn syrup", "caramel color",
    "phosphoric acid", "natural flavors", "caffeine"
]
r5 = calculate_health_score("Generic Cola", cola_ing, "food")
print("  Score:    " + str(r5["health_score"]) + "/100 [" + r5["verdict"] + "]")
print("  Penalty:  " + str(r5["total_penalty"]))
assert r5["health_score"] < 90, "Cola should not score EXCELLENT, got " + str(r5["health_score"])
print("  PASS: Cola penalised for HFCS (score=" + str(r5["health_score"]) + ", penalty=" + str(r5["total_penalty"]) + ")")

# ---- Test 6: Score ordering across brands ----
print()
print("TEST 6: Competitive score ordering")
triclosan_soap = ["water", "sodium lauryl sulfate", "triclosan", "fragrance"]
r_tri = calculate_health_score("Antibacterial Soap", triclosan_soap, "cosmetics")
print("  Dr Bronner:       " + str(r1["health_score"]) + " EXCELLENT")
print("  CeraVe:           " + str(r2["health_score"]) + " " + r2["verdict"])
print("  Clean Serum:      " + str(r4["health_score"]) + " " + r4["verdict"])
print("  Antibacterial:    " + str(r_tri["health_score"]) + " " + r_tri["verdict"])
assert r1["health_score"]  > r2["health_score"],   "Dr Bronner > CeraVe"
assert r2["health_score"]  > r_tri["health_score"], "CeraVe > Triclosan soap"
print("  PASS: Ordering correct")

print()
print("ALL CELL 42 TESTS PASSED")
print()
print("Health engine status:")
print("  Cell 41 concentration: LIVE (not stub)")
print("  Cell 42 health engine: LIVE (not stub)")
print("  Cell 39 API:           Ready to wire - replace stub with calculate_health_score()")
print()
print("Key improvements over Yuka/EWG:")
print("  Concentration-aware: petrolatum at position 7 != petrolatum at position 22")
print("  Positive signals: ceramides, niacinamide, rosehip oil boost score")
print("  Baby mode: same ingredient list scored 1.5x stricter")
print("  Evidence tier: IARC/FDA regulatory flags weighted higher than in vitro")

"""

with open('/content/noura_health_engine.py', 'w', encoding='utf-8') as f:
    f.write(health_engine_code)

exec(health_engine_code)
print('Cell 42 complete - noura_health_engine.py written and loaded')

Health engine loaded (Cell 42 - LIVE)
  Category configs:   4
  Positive signals:   24
  Concentration engine: LIVE

  CELL 42 SELF-TEST: HEALTH ENGINE (WIRED)

TEST 1: Dr Bronner's Pure Castile Soap
Concentration engine loaded
  Synonym entries:   215
  Anchor ingredients:26
  Hazard DB entries: 34

  CELL 41 SELF-TEST: CONCENTRATION ESTIMATOR

TEST 1: Petrolatum ABOVE_1PCT = full penalty
  PASS: petrolatum -> ABOVE_1PCT, penalty=14.0 (full)
  CeraVe health score: 76/100 [GOOD]

TEST 2: Carmine ABOVE_1PCT vs BELOW_1PCT
  PASS: carmine ABOVE_1PCT -> penalty=25.0
  PASS: carmine BELOW_1PCT -> penalty=10.0 (60% reduction)
  Score impact: 68 vs 83

TEST 3: SLS below safe threshold = zero penalty
  PASS: SLS below 1% threshold -> penalty=0.0 (safe at trace)

TEST 4: Evidence tiers
  PASS: Triclosan -> tier=2 (Cohort), penalty=32.0

TEST 5: Score ordering sanity
  PASS: Dr Bronner 99 > Chanel 64

TEST 6: Before/after impact summary

  Product: Chanel No5 Body Lotion
  Ingredient            

In [None]:
animal_welfare_code = """
import sys
sys.path.insert(0, '/content')

# ============================================================
# CELL 44 - ANIMAL WELFARE ENGINE (WIRED TO API)
# Replaces stub in Cell 43 for the animal dimension.
# Covers: ingredient-level (testing, derived), product-level
# certifications, species/category logic, and name parsing.
# ============================================================

# ---- Ingredient-level animal flags ----
ANIMAL_INGREDIENT_DB = {
    # DIRECTLY ANIMAL-DERIVED - HIGH CONCERN
    "ci 75470":                    {"flag": "ANIMAL_DERIVED", "severity": 30, "species": "insect",    "note": "Carmine - crushed cochineal insects (~70,000 per kg)"},
    "lanolin":                     {"flag": "ANIMAL_DERIVED", "severity": 15, "species": "sheep",     "note": "Sheep wool secretion - process varies in welfare standards"},
    "lanolin alcohol":             {"flag": "ANIMAL_DERIVED", "severity": 12, "species": "sheep",     "note": "Derived from lanolin"},
    "cera alba":                   {"flag": "ANIMAL_DERIVED", "severity": 20, "species": "bee",       "note": "Beeswax - commercial beekeeping; queen culling common"},
    "shellac":                     {"flag": "ANIMAL_DERIVED", "severity": 25, "species": "insect",    "note": "Lac insect secretion; insects killed in processing"},
    "casein":                      {"flag": "ANIMAL_DERIVED", "severity": 18, "species": "cow",       "note": "Milk protein - dairy industry welfare concerns"},
    "whey protein":                {"flag": "ANIMAL_DERIVED", "severity": 15, "species": "cow",       "note": "Dairy by-product"},
    "collagen":                    {"flag": "ANIMAL_DERIVED", "severity": 20, "species": "bovine",    "note": "Animal connective tissue - typically bovine or porcine"},
    "elastin":                     {"flag": "ANIMAL_DERIVED", "severity": 20, "species": "bovine",    "note": "Animal tissue protein"},
    "keratin":                     {"flag": "ANIMAL_DERIVED", "severity": 18, "species": "sheep",     "note": "Animal hair/wool derived"},
    "silk":                        {"flag": "ANIMAL_DERIVED", "severity": 22, "species": "silkworm",  "note": "Silkworm cocoons; silkworms boiled alive"},
    "silk amino acids":            {"flag": "ANIMAL_DERIVED", "severity": 20, "species": "silkworm",  "note": "Derived from silk processing"},
    "sericin":                     {"flag": "ANIMAL_DERIVED", "severity": 20, "species": "silkworm",  "note": "Silk protein; silkworms killed"},
    "hydrolyzed silk":             {"flag": "ANIMAL_DERIVED", "severity": 20, "species": "silkworm",  "note": "Derived from silk"},
    "gelatin":                     {"flag": "ANIMAL_DERIVED", "severity": 22, "species": "bovine",    "note": "Boiled animal bones/skin; slaughterhouse by-product"},
    "glycerin":                    {"flag": "POSSIBLE_ANIMAL", "severity": 5,  "species": "bovine",   "note": "Can be animal or plant-derived - source not disclosed"},
    "stearic acid":                {"flag": "POSSIBLE_ANIMAL", "severity": 8,  "species": "bovine",   "note": "Can be tallow (beef fat) or plant-derived"},
    "oleic acid":                  {"flag": "POSSIBLE_ANIMAL", "severity": 6,  "species": "bovine",   "note": "Can be animal or plant-derived"},
    "sodium stearate":             {"flag": "POSSIBLE_ANIMAL", "severity": 8,  "species": "bovine",   "note": "Stearic acid salt - source ambiguous"},
    "cetyl alcohol":               {"flag": "POSSIBLE_ANIMAL", "severity": 4,  "species": "whale",    "note": "Historically whale-derived; now mostly plant - verify source"},
    "squalene":                    {"flag": "ANIMAL_DERIVED",  "severity": 28, "species": "shark",    "note": "Shark liver oil - overfishing concern; squalane preferred"},
    "musk":                        {"flag": "ANIMAL_DERIVED",  "severity": 30, "species": "deer",     "note": "Musk deer gland secretion - poaching concern"},
    "ambergris":                   {"flag": "ANIMAL_DERIVED",  "severity": 30, "species": "whale",    "note": "Sperm whale intestinal secretion"},
    "civet":                       {"flag": "ANIMAL_DERIVED",  "severity": 30, "species": "civet",    "note": "Civet cat anal gland secretion - captivity conditions"},

    # TESTING CONCERN (not derived but tested on animals)
    "triclosan":                   {"flag": "ANIMAL_TESTED",   "severity": 10, "species": "rat",      "note": "Extensive animal testing history"},
    "benzophenone-3":              {"flag": "ANIMAL_TESTED",   "severity": 8,  "species": "rat",      "note": "Animal studies cited in EU restriction"},
    "sodium lauryl sulfate":       {"flag": "ANIMAL_TESTED",   "severity": 6,  "species": "rabbit",   "note": "Draize eye test historically used"},

    # FOOD - ANIMAL-DERIVED PROTEINS AND FATS
    "sucrose":                     {"flag": "VEGAN_OK",        "severity": 0,  "species": None,       "note": "Plant-derived"},
    "theobroma cacao seed butter": {"flag": "VEGAN_OK",        "severity": 0,  "species": None,       "note": "Plant-derived"},
    "annatto":                     {"flag": "VEGAN_OK",        "severity": 0,  "species": None,       "note": "Plant-derived colorant"},
}

# ---- Certification signals ----
CRUELTY_FREE_BRANDS = {
    "dr bronner's", "dr. bronner's", "the body shop", "lush", "pacifica",
    "e.l.f.", "elf cosmetics", "nyx", "wet n wild", "covergirl",
    "urban decay", "tarte", "too faced", "milk makeup", "glossier",
    "tatcha", "drunk elephant", "paula's choice", "ordinary", "the ordinary",
    "cerave",  # leaping bunny certified
    "neutrogena",  # varies by market - not universal
    "seventh generation", "method", "mrs meyer",
}

VEGAN_CERTIFIED_BRANDS = {
    "dr bronner's", "dr. bronner's", "pacifica", "e.l.f.", "elf cosmetics",
    "nyx", "wet n wild", "milk makeup", "ordinary", "the ordinary",
    "seventh generation", "method",
}

NOT_CRUELTY_FREE = {
    "l'oreal", "loreal", "maybelline", "garnier", "lancome",
    "estee lauder", "clinique", "mac", "bobbi brown", "jo malone",
    "procter & gamble", "p&g", "gillette", "olay", "sk-ii",
    "unilever", "dove", "axe", "tresemme", "pond's", "vaseline",
    "johnson & johnson", "neutrogena",  # sells in China
    "nivea", "beiersdorf",
    "shiseido", "nars",
    "colgate", "palmolive",
    "revlon", "elizabeth arden",
    "chanel", "dior", "ysl", "saint laurent", "givenchy",
    "clorox",  # parent of Burt's Bees (Burt's is CF but parent isn't)
}

# ---- Product name parsers ----
ANIMAL_WELFARE_SIGNALS = {

    "chicken":     {"species": "poultry", "severity": 20, "note": "Poultry farming welfare concerns"},
    "beef":        {"species": "bovine",  "severity": 22, "note": "Cattle farming welfare concerns"},
    "pork":        {"species": "porcine", "severity": 22, "note": "Pig farming welfare concerns"},
    "lamb":        {"species": "ovine",   "severity": 20, "note": "Lamb farming welfare concerns"},
    "salmon":      {"species": "fish",    "severity": 15, "note": "Aquaculture or wild-catch welfare"},
    "tuna":        {"species": "fish",    "severity": 18, "note": "Overfishing and bycatch concern"},
    "shrimp":      {"species": "crustacean","severity":15,"note": "Aquaculture welfare concerns"},
    "egg":         {"species": "poultry", "severity": 14, "note": "Poultry welfare - check free-range/cage-free"},
    "milk":        {"species": "bovine",  "severity": 14, "note": "Dairy cattle welfare"},
    "dairy":       {"species": "bovine",  "severity": 14, "note": "Dairy farming welfare concerns"},
    "honey":       {"species": "bee",     "severity": 16, "note": "Commercial beekeeping - queen culling, wing clipping"},
    "beeswax":     {"species": "bee",     "severity": 20, "note": "Commercial beekeeping practices"},
    "goat":        {"species": "caprine", "severity": 18, "note": "Goat farming welfare"},
    "turkey":      {"species": "poultry", "severity": 20, "note": "Poultry farming welfare"},
    "duck":        {"species": "poultry", "severity": 20, "note": "Poultry farming - foie gras concern"},
    "foie gras":   {"species": "poultry", "severity": 35, "note": "Force-feeding - banned in many jurisdictions"},
    "veal":        {"species": "bovine",  "severity": 30, "note": "Veal production - separation from mother at birth"},
    "lobster":     {"species": "crustacean","severity":18,"note": "Live boiling practices"},
    "leather":     {"species": "bovine",  "severity": 20, "note": "Animal hide - slaughterhouse by-product"},
    "wool":        {"species": "ovine",   "severity": 15, "note": "Mulesing practices; shearing welfare varies"},
    "down":        {"species": "waterfowl","severity":22, "note": "Live plucking of feathers - welfare concern"},
    "fur":         {"species": "various", "severity": 35, "note": "Fur farming - severe confinement"},
    "pearl":       {"species": "mollusk", "severity": 12, "note": "Oyster farming - nucleation process"},
}


# ============================================================
# SCORING ENGINE
# ============================================================

def _parse_product_name(product_name):
    name_lower = product_name.lower()
    signals = []
    plant_milk_words = ["oat", "almond", "soy", "rice", "coconut",
                        "hazelnut", "cashew", "hemp", "pea", "macadamia"]
    is_plant_milk = ("milk" in name_lower and
                     any(w in name_lower for w in plant_milk_words))
    for keyword, data in ANIMAL_PRODUCT_SIGNALS.items():
        if keyword in name_lower:
            if is_plant_milk and keyword in ("milk", "dairy"):
                continue
            signals.append({**data, "keyword": keyword})
    return signals

def _score_ingredients(ingredients):
    flags = []
    total_penalty = 0

    try:
        _nil = normalise_ingredient_list
    except NameError:
        from noura_concentration_engine import normalise_ingredient_list, get_concentration_zone
    normalised = normalise_ingredient_list(ingredients)

    for item in normalised:
        canonical = item["canonical"]
        hazard = ANIMAL_INGREDIENT_DB.get(canonical)
        if hazard is None:
            continue
        if hazard["flag"] == "VEGAN_OK":
            continue

        # Apply concentration zone multiplier
        multiplier = item["multiplier"]
        adj_severity = round(hazard["severity"] * multiplier, 1)
        total_penalty += adj_severity

        flags.append({
            "ingredient":   canonical,
            "flag":         hazard["flag"],
            "species":      hazard["species"],
            "severity_base":hazard["severity"],
            "severity_adj": adj_severity,
            "zone":         item["zone"],
            "note":         hazard["note"],
        })

    return flags, round(total_penalty, 1)

def _brand_certification_score(brand_name):
    brand_lower = brand_name.lower().strip()

    if brand_lower in VEGAN_CERTIFIED_BRANDS:
        return {"certified_vegan": True, "cruelty_free": True,
                "bonus": 15, "penalty": 0,
                "note": "Vegan certified brand - full bonus applied"}

    if brand_lower in CRUELTY_FREE_BRANDS:
        return {"certified_vegan": False, "cruelty_free": True,
                "bonus": 8, "penalty": 0,
                "note": "Leaping Bunny / cruelty-free certified"}

    if brand_lower in NOT_CRUELTY_FREE:
        return {"certified_vegan": False, "cruelty_free": False,
                "bonus": 0, "penalty": 15,
                "note": "Brand sells in China or has confirmed animal testing programme"}

    return {"certified_vegan": False, "cruelty_free": None,
            "bonus": 0, "penalty": 0,
            "note": "Certification status unknown - no adjustment applied"}


def calculate_animal_welfare_score(product_name, ingredients, category="cosmetics", brand=None):
    base = 100
    brand_name = brand or product_name

    # 1. Ingredient-level flags
    ing_flags, ing_penalty = _score_ingredients(ingredients)

    # 2. Product name signals (food/personal care)
    name_signals = _parse_product_name(product_name)
    name_penalty = sum(s["severity"] for s in name_signals)

    # 3. Brand certification
    cert = _brand_certification_score(brand_name)
    cert_bonus   = cert["bonus"]
    cert_penalty = cert["penalty"]

    # 4. Category modifier - cosmetics held to stricter standard
    # (EU banned animal testing for cosmetics 2013)
    if category == "cosmetics":
        ing_penalty = round(ing_penalty * 1.2, 1)  # 20% stricter

    total_penalty = min(ing_penalty + name_penalty + cert_penalty, 80)
    total_bonus   = min(cert_bonus, 15)

    score = max(0, min(100, round(base - total_penalty + total_bonus)))

    if score >= 85:   verdict = "EXCELLENT"
    elif score >= 70: verdict = "GOOD"
    elif score >= 55: verdict = "ACCEPTABLE"
    elif score >= 40: verdict = "CAUTION"
    else:             verdict = "HIGH_RISK"

    has_animal_derived = any(f["flag"] == "ANIMAL_DERIVED" for f in ing_flags)
    has_animal_tested  = any(f["flag"] == "ANIMAL_TESTED" for f in ing_flags)

    confidence = "HIGH" if (ing_flags or name_signals) else "MEDIUM"

    flags = []
    for f in sorted(ing_flags, key=lambda x: -x["severity_adj"]):
        flags.append(f["note"] + " [" + f["flag"] + " - " + f["zone"] + "]")
    for s in name_signals:
        flags.append(s["note"] + " [PRODUCT_CONTAINS_" + s["species"].upper() + "]")
    flags.append(cert["note"])

    return {
        "score":             score,
        "verdict":           verdict,
        "confidence":        confidence,
        "engine":            "cell44_live",
        "total_penalty":     total_penalty,
        "total_bonus":       total_bonus,
        "ingredient_flags":  ing_flags,
        "name_signals":      name_signals,
        "certification":     cert,
        "has_animal_derived":has_animal_derived,
        "has_animal_tested": has_animal_tested,
        "flags":             flags,
        "ingredient_count":  len(ingredients),
        "flagged_count":     len(ing_flags) + len(name_signals),
    }


# ============================================================
# SELF-TEST
# ============================================================
print("Animal welfare engine loaded (Cell 44 - LIVE)")
print("  Ingredient DB:      " + str(len(ANIMAL_INGREDIENT_DB)) + " entries")
print("  Cruelty-free brands:" + str(len(CRUELTY_FREE_BRANDS)))
print("  Product signals:    " + str(len(ANIMAL_PRODUCT_SIGNALS)))
print()
print("=" * 62)
print("  CELL 44 SELF-TEST: ANIMAL WELFARE ENGINE")
print("=" * 62)

# TEST 1: Dr Bronner's - vegan certified
print()
print("TEST 1: Dr Bronner's Pure Castile Soap (vegan certified)")
r1 = calculate_animal_welfare_score(
    "Dr Bronner's Pure Castile Soap",
    ["water","coconut oil","potassium hydroxide","hemp oil","jojoba oil","tocopherol"],
    category="cosmetics", brand="Dr Bronner's"
)
print("  Score:      " + str(r1["score"]) + "/100 [" + r1["verdict"] + "]")
print("  Certified:  vegan=" + str(r1["certification"]["certified_vegan"]) +
      " cruelty_free=" + str(r1["certification"]["cruelty_free"]))
print("  Bonus:      +" + str(r1["total_bonus"]))
assert r1["score"] >= 85, "Dr Bronner vegan brand should be EXCELLENT"
assert r1["certification"]["certified_vegan"] == True
print("  PASS: Vegan brand bonus applied correctly")

# TEST 2: Chanel - carmine + beeswax + non-CF brand
print()
print("TEST 2: Chanel No5 (carmine, beeswax, non-CF brand)")
r2 = calculate_animal_welfare_score(
    "Chanel No5 Body Lotion",
    ["aqua","glycerin","lanolin","cera alba","ci 75470","phenoxyethanol","fragrance"],
    category="cosmetics", brand="Chanel"
)
print("  Score:      " + str(r2["score"]) + "/100 [" + r2["verdict"] + "]")
print("  Penalty:    " + str(r2["total_penalty"]))
print("  Flags:")
for f in r2["ingredient_flags"]:
    print("    " + f["flag"].ljust(16) + " " + f["ingredient"] + " (sev=" + str(f["severity_adj"]) + ")")
assert r2["score"] < 60, "Chanel with carmine+beeswax+non-CF should be CAUTION or worse"
assert r2["has_animal_derived"] == True
print("  PASS: Animal-derived ingredients and non-CF brand correctly penalised")

# TEST 3: Squalene (shark) vs Squalane (plant) - critical distinction
print()
print("TEST 3: Squalene (shark) vs Squalane (plant)")
r_shark = calculate_animal_welfare_score(
    "Anti-aging Serum", ["aqua","squalene","phenoxyethanol"], "cosmetics"
)
r_plant = calculate_animal_welfare_score(
    "Anti-aging Serum", ["aqua","squalane","phenoxyethanol"], "cosmetics"
)
print("  Squalene (shark):  " + str(r_shark["score"]) + "/100 [" + r_shark["verdict"] + "]")
print("  Squalane (plant):  " + str(r_plant["score"]) + "/100 [" + r_plant["verdict"] + "]")
assert r_shark["score"] < r_plant["score"], "Squalene (shark) should score lower than squalane"
print("  PASS: squalene vs squalane distinction correct (" +
      str(r_shark["score"]) + " vs " + str(r_plant["score"]) + ")")

# TEST 4: Food product - chicken breast
print()
print("TEST 4: Food product - chicken breast")
r4 = calculate_animal_welfare_score(
    "Free Range Chicken Breast", ["chicken"], "food"
)
print("  Score:       " + str(r4["score"]) + "/100 [" + r4["verdict"] + "]")
print("  Name signals:" + str(len(r4["name_signals"])))
assert r4["name_signals"], "Should detect 'chicken' in product name"
assert r4["score"] < 90, "Chicken product should not score EXCELLENT, got " + str(r4["score"])
print("  PASS: Product name parsed correctly, poultry welfare flagged")

# TEST 5: Vegan food product - no animal signals
print()
print("TEST 5: Oat milk - vegan food")
r5 = calculate_animal_welfare_score(
    "Oat Milk Unsweetened",
    ["oats","water","sea salt","sunflower oil"],
    "food"
)
print("  Score:      " + str(r5["score"]) + "/100 [" + r5["verdict"] + "]")
assert r5["score"] >= 85, "Pure plant product should score EXCELLENT, got " + str(r5["score"])
assert not r5["name_signals"], "Oat milk should not trigger animal name signals"
print("  PASS: Vegan food correctly scores EXCELLENT")

# TEST 6: Foie gras - highest severity
print()
print("TEST 6: Foie gras - highest animal welfare concern")
r6 = calculate_animal_welfare_score("Duck Foie Gras Pate", ["duck liver","salt","pepper"], "food")
print("  Score:      " + str(r6["score"]) + "/100 [" + r6["verdict"] + "]")
assert r6["score"] <= 55, "Foie gras should be CAUTION or worse"
print("  PASS: Foie gras correctly scores " + r6["verdict"])

# TEST 7: Score ordering
print()
print("TEST 7: Score ordering")
scores = {
    "Dr Bronner's (vegan)": r1["score"],
    "Oat Milk (vegan food)": r5["score"],
    "Chanel (carmine+beeswax)": r2["score"],
    "Foie Gras": r6["score"],
}
for name, score in sorted(scores.items(), key=lambda x: -x[1]):
    print("  " + str(score).rjust(3) + "  " + name)
assert r1["score"] > r6["score"]
assert r1["score"] > r2["score"]
print("  PASS: Ordering correct")

print()
print("ALL CELL 44 TESTS PASSED")
print()
print("Animal welfare engine now LIVE in pipeline:")
print("  Ingredient-level:  " + str(len(ANIMAL_INGREDIENT_DB)) + " flagged ingredients")
print("  Product name:      " + str(len(ANIMAL_PRODUCT_SIGNALS)) + " keyword signals")
print("  Brand DB:          " + str(len(CRUELTY_FREE_BRANDS)) + " CF + " +
      str(len(NOT_CRUELTY_FREE)) + " non-CF brands")
print("  Cosmetics stricter: 1.2x multiplier (EU 2013 ban on animal testing)")

"""

with open('/content/noura_animal_welfare_engine.py', 'w', encoding='utf-8') as f:
    f.write(animal_welfare_code)

exec(animal_welfare_code)
print('Cell 44 complete - noura_animal_welfare_engine.py written and loaded')

Animal welfare engine loaded (Cell 44 - LIVE)
  Ingredient DB:      30 entries
  Cruelty-free brands:25
  Product signals:    23

  CELL 44 SELF-TEST: ANIMAL WELFARE ENGINE

TEST 1: Dr Bronner's Pure Castile Soap (vegan certified)
  Score:      100/100 [EXCELLENT]
  Certified:  vegan=True cruelty_free=True
  Bonus:      +15
  PASS: Vegan brand bonus applied correctly

TEST 2: Chanel No5 (carmine, beeswax, non-CF brand)
  Score:      20/100 [HIGH_RISK]
  Penalty:    80
  Flags:
    POSSIBLE_ANIMAL  glycerin (sev=5.0)
    ANIMAL_DERIVED   lanolin (sev=15.0)
    ANIMAL_DERIVED   cera alba (sev=20.0)
    ANIMAL_DERIVED   ci 75470 (sev=30.0)
  PASS: Animal-derived ingredients and non-CF brand correctly penalised

TEST 3: Squalene (shark) vs Squalane (plant)
  Squalene (shark):  66/100 [ACCEPTABLE]
  Squalane (plant):  100/100 [EXCELLENT]
  PASS: squalene vs squalane distinction correct (66 vs 100)

TEST 4: Food product - chicken breast
  Score:       80/100 [GOOD]
  Name signals:1
  PASS: P

In [None]:
governance_code = """
import sys
sys.path.insert(0, '/content')

# ============================================================
# CELL 45 - GOVERNANCE ENGINE (LIVE)
# Replaces stub in Cell 43 for the governance dimension.
# Covers: parent company ownership, regulatory violations,
# transparency practices, greenwashing flags, market access
# (China mandatory testing), lobbying, and ESG signals.
# ============================================================

# ============================================================
# PARENT COMPANY MAP
# brand_lower -> parent company
# ============================================================

PARENT_COMPANY_MAP = {
    # L'OREAL GROUP
    "l'oreal": "loreal_group", "loreal": "loreal_group",
    "maybelline": "loreal_group", "garnier": "loreal_group",
    "lancome": "loreal_group", "urban decay": "loreal_group",
    "it cosmetics": "loreal_group", "kiehl's": "loreal_group",
    "redken": "loreal_group", "matrix": "loreal_group",
    "cerave": "loreal_group", "la roche-posay": "loreal_group",
    "vichy": "loreal_group", "skinceuticals": "loreal_group",
    "ysl beauty": "loreal_group", "giorgio armani beauty": "loreal_group",

    # UNILEVER
    "dove": "unilever", "axe": "unilever", "lynx": "unilever",
    "tresemme": "unilever", "vaseline": "unilever", "ponds": "unilever",
    "pond's": "unilever", "simple": "unilever", "st. ives": "unilever",
    "dermalogica": "unilever", "tatcha": "unilever",
    "seventh generation": "unilever",
    "ben & jerry's": "unilever", "hellmann's": "unilever",
    "knorr": "unilever", "lipton": "unilever",

    # PROCTER & GAMBLE
    "gillette": "pg", "olay": "pg", "sk-ii": "pg",
    "head & shoulders": "pg", "pantene": "pg", "herbal essences": "pg",
    "old spice": "pg", "secret": "pg", "tide": "pg",
    "ariel": "pg", "pampers": "pg", "oral-b": "pg",

    # ESTEE LAUDER
    "estee lauder": "estee_lauder_group", "clinique": "estee_lauder_group",
    "mac": "estee_lauder_group", "bobbi brown": "estee_lauder_group",
    "jo malone": "estee_lauder_group", "la mer": "estee_lauder_group",
    "aveda": "estee_lauder_group", "origins": "estee_lauder_group",
    "too faced": "estee_lauder_group", "smashbox": "estee_lauder_group",
    "bumble and bumble": "estee_lauder_group",
    "the ordinary": "estee_lauder_group",  # via DECIEM acquisition
    "niod": "estee_lauder_group",

    # SHISEIDO
    "shiseido": "shiseido_group", "nars": "shiseido_group",
    "bare minerals": "shiseido_group", "cle de peau": "shiseido_group",
    "drunk elephant": "shiseido_group",

    # COTY
    "covergirl": "coty", "rimmel": "coty", "sally hansen": "coty",
    "max factor": "coty", "hugo boss beauty": "coty",
    "gucci beauty": "coty", "burberry beauty": "coty",

    # BEIERSDORF
    "nivea": "beiersdorf", "eucerin": "beiersdorf",
    "la prairie": "beiersdorf", "hansaplast": "beiersdorf",

    # JOHNSON & JOHNSON
    "neutrogena": "jnj", "aveeno": "jnj", "rogaine": "jnj",
    "clean & clear": "jnj", "lubriderm": "jnj",

    # CHURCH & DWIGHT
    "arm & hammer": "church_dwight", "oxiclean": "church_dwight",
    "waterpik": "church_dwight",

    # HENKEL
    "schwarzkopf": "henkel", "fa": "henkel", "dial": "henkel",
    "purex": "henkel", "got2b": "henkel",

    # LUXURY / CONGLOMERATES
    "chanel": "chanel_sa",
    "dior": "lvmh", "givenchy": "lvmh", "guerlain": "lvmh",
    "benefit": "lvmh", "fresh": "lvmh", "make up for ever": "lvmh",
    "fenty beauty": "lvmh",
    "hermes": "hermes_sa",
    "burberry": "burberry_plc",

    # INDEPENDENTS / HIGH-GOVERNANCE
    "dr bronner's": "dr_bronners_independent",
    "dr. bronner's": "dr_bronners_independent",
    "paula's choice": "skkn_independent",
    "the body shop": "natura_co",  # acquired from L'Oreal by Natura&Co
    "aesop": "natura_co",
    "lush": "lush_independent",
    "ordinary": "estee_lauder_group",
    "burt's bees": "clorox_group",

    # FOOD & BEVERAGE
    "nestle": "nestle_group", "nescafe": "nestle_group",
    "kitkat": "nestle_group", "maggi": "nestle_group",
    "gerber": "nestle_group",
    "coca-cola": "cocacola_group", "sprite": "cocacola_group",
    "fanta": "cocacola_group", "dasani": "cocacola_group",
    "pepsi": "pepsico", "lays": "pepsico", "gatorade": "pepsico",
    "quaker": "pepsico", "tropicana": "pepsico",
    "kraft": "kraft_heinz", "heinz": "kraft_heinz",
    "philadelphia": "kraft_heinz", "oscar mayer": "kraft_heinz",
    "mondelez": "mondelez", "oreo": "mondelez", "cadbury": "mondelez",
    "milka": "mondelez", "toblerone": "mondelez",
    "unilever food": "unilever",
}

# ============================================================
# COMPANY GOVERNANCE PROFILES
# ============================================================

COMPANY_PROFILES = {
    "loreal_group": {
        "name": "L'Oreal Group",
        "china_sales": True,
        "china_mandatory_testing": True,
        "regulatory_violations": [
            "FTC greenwashing settlement 2024 (Garnier)",
            "EU antitrust fine 2014",
        ],
        "transparency_score": 55,
        "esg_rating": "B",
        "greenwashing_flags": ["'Natural' claims on synthetic-dominant products"],
        "lobbying_concern": True,
        "supply_chain_disclosure": "PARTIAL",
        "certifications": [],
        "bonus": 0, "penalty": 20,
        "note": "Large portfolio; China sales require mandatory animal testing for imported cosmetics",
    },
    "unilever": {
        "name": "Unilever",
        "china_sales": True,
        "china_mandatory_testing": True,
        "regulatory_violations": [
            "UK ASA greenwashing ruling 2023",
            "FTC investigation 2022 (environmental claims)",
        ],
        "transparency_score": 60,
        "esg_rating": "A-",
        "greenwashing_flags": ["'Net Zero' targets disputed by ClientEarth"],
        "lobbying_concern": True,
        "supply_chain_disclosure": "PARTIAL",
        "certifications": ["B-Corp (Seventh Generation subsidiary only)"],
        "bonus": 5, "penalty": 15,
        "note": "Progressive ESG commitments but China testing exposure across portfolio",
    },
    "pg": {
        "name": "Procter & Gamble",
        "china_sales": True,
        "china_mandatory_testing": True,
        "regulatory_violations": [
            "FTC claims challenge 2019 (Olay)",
        ],
        "transparency_score": 50,
        "esg_rating": "B",
        "greenwashing_flags": ["Ambiguous 'clean' labelling on SK-II"],
        "lobbying_concern": True,
        "supply_chain_disclosure": "PARTIAL",
        "certifications": [],
        "bonus": 0, "penalty": 20,
        "note": "Significant China exposure; limited ingredient transparency",
    },
    "estee_lauder_group": {
        "name": "Estee Lauder Companies",
        "china_sales": True,
        "china_mandatory_testing": True,
        "regulatory_violations": [],
        "transparency_score": 58,
        "esg_rating": "A-",
        "greenwashing_flags": [],
        "lobbying_concern": False,
        "supply_chain_disclosure": "PARTIAL",
        "certifications": [],
        "bonus": 2, "penalty": 18,
        "note": "China sales; DECIEM/The Ordinary acquisition adds transparency upside",
    },
    "shiseido_group": {
        "name": "Shiseido Group",
        "china_sales": True,
        "china_mandatory_testing": True,
        "regulatory_violations": [],
        "transparency_score": 55,
        "esg_rating": "B+",
        "greenwashing_flags": [],
        "lobbying_concern": False,
        "supply_chain_disclosure": "PARTIAL",
        "certifications": [],
        "bonus": 0, "penalty": 18,
        "note": "Japanese group with strong China exposure",
    },
    "coty": {
        "name": "Coty Inc.",
        "china_sales": True,
        "china_mandatory_testing": False,  # shifted strategy post-2020
        "regulatory_violations": [
            "SEC settlement 2022 (accounting irregularities)",
        ],
        "transparency_score": 42,
        "esg_rating": "C+",
        "greenwashing_flags": ["Vague sustainability targets without auditing"],
        "lobbying_concern": True,
        "supply_chain_disclosure": "LOW",
        "certifications": [],
        "bonus": 0, "penalty": 25,
        "note": "Weak governance; SEC accounting issues; limited transparency",
    },
    "beiersdorf": {
        "name": "Beiersdorf AG",
        "china_sales": True,
        "china_mandatory_testing": True,
        "regulatory_violations": [],
        "transparency_score": 65,
        "esg_rating": "A-",
        "greenwashing_flags": [],
        "lobbying_concern": False,
        "supply_chain_disclosure": "GOOD",
        "certifications": [],
        "bonus": 5, "penalty": 15,
        "note": "Relatively strong European governance; China testing exposure",
    },
    "jnj": {
        "name": "Johnson & Johnson",
        "china_sales": True,
        "china_mandatory_testing": True,
        "regulatory_violations": [
            "Baby powder talc litigation (asbestos contamination)",
            "Multiple FDA warning letters",
        ],
        "transparency_score": 45,
        "esg_rating": "B-",
        "greenwashing_flags": ["'Gentle' claims on products with known irritants"],
        "lobbying_concern": True,
        "supply_chain_disclosure": "PARTIAL",
        "certifications": [],
        "bonus": 0, "penalty": 28,
        "note": "Significant regulatory history including talc asbestos litigation",
    },
    "chanel_sa": {
        "name": "Chanel S.A.",
        "china_sales": True,
        "china_mandatory_testing": True,
        "regulatory_violations": [],
        "transparency_score": 38,
        "esg_rating": "N/A",  # private company
        "greenwashing_flags": ["Limited supply chain transparency (private company)"],
        "lobbying_concern": False,
        "supply_chain_disclosure": "LOW",
        "certifications": [],
        "bonus": 0, "penalty": 22,
        "note": "Private company; very limited transparency; China testing exposure",
    },
    "lvmh": {
        "name": "LVMH Moet Hennessy Louis Vuitton",
        "china_sales": True,
        "china_mandatory_testing": True,
        "regulatory_violations": [
            "EU antitrust investigation 2024",
        ],
        "transparency_score": 45,
        "esg_rating": "B",
        "greenwashing_flags": ["'Sustainable luxury' framing without verified metrics"],
        "lobbying_concern": True,
        "supply_chain_disclosure": "LOW",
        "certifications": [],
        "bonus": 0, "penalty": 22,
        "note": "Luxury conglomerate; opacity typical of sector; China exposure",
    },
    "dr_bronners_independent": {
        "name": "Dr. Bronner's (Independent)",
        "china_sales": False,
        "china_mandatory_testing": False,
        "regulatory_violations": [],
        "transparency_score": 92,
        "esg_rating": "A+",
        "greenwashing_flags": [],
        "lobbying_concern": False,
        "supply_chain_disclosure": "FULL",
        "certifications": ["B-Corp", "Fair Trade", "USDA Organic", "Leaping Bunny",
                           "Vegan", "Non-GMO"],
        "bonus": 20, "penalty": 0,
        "note": "Exceptional governance; B-Corp certified; full supply chain disclosure; no China sales",
    },
    "lush_independent": {
        "name": "Lush Cosmetics (Independent)",
        "china_sales": False,
        "china_mandatory_testing": False,
        "regulatory_violations": [],
        "transparency_score": 85,
        "esg_rating": "A",
        "greenwashing_flags": [],
        "lobbying_concern": False,
        "supply_chain_disclosure": "GOOD",
        "certifications": ["Leaping Bunny", "Cruelty-Free International"],
        "bonus": 15, "penalty": 0,
        "note": "Strong ethics; exited China market in 2014 over testing laws; transparent sourcing",
    },
    "natura_co": {
        "name": "Natura &Co",
        "china_sales": False,
        "china_mandatory_testing": False,
        "regulatory_violations": [],
        "transparency_score": 80,
        "esg_rating": "A",
        "greenwashing_flags": [],
        "lobbying_concern": False,
        "supply_chain_disclosure": "GOOD",
        "certifications": ["B-Corp"],
        "bonus": 12, "penalty": 0,
        "note": "B-Corp certified group; The Body Shop and Aesop; strong social mission",
    },
    "clorox_group": {
        "name": "Clorox Company",
        "china_sales": False,
        "china_mandatory_testing": False,
        "regulatory_violations": [],
        "transparency_score": 60,
        "esg_rating": "B",
        "greenwashing_flags": ["Burt's Bees 'natural' claims - parent company contradictions"],
        "lobbying_concern": True,
        "supply_chain_disclosure": "PARTIAL",
        "certifications": [],
        "bonus": 0, "penalty": 8,
        "note": "Burt's Bees subsidiary is CF but parent Clorox manufactures bleach products",
    },
    "henkel": {
        "name": "Henkel AG",
        "china_sales": True,
        "china_mandatory_testing": True,
        "regulatory_violations": [],
        "transparency_score": 68,
        "esg_rating": "A-",
        "greenwashing_flags": [],
        "lobbying_concern": False,
        "supply_chain_disclosure": "GOOD",
        "certifications": [],
        "bonus": 5, "penalty": 12,
        "note": "Strong European governance standards; China exposure",
    },
    "nestle_group": {
        "name": "Nestle S.A.",
        "china_sales": True,
        "china_mandatory_testing": False,  # food, not cosmetics
        "regulatory_violations": [
            "WHO baby formula marketing violations (multiple countries)",
            "Child labour in cocoa supply chain (ongoing)",
            "Water privatisation controversies",
            "2021 internal memo: 60% of portfolio 'not healthy'",
        ],
        "transparency_score": 35,
        "esg_rating": "C",
        "greenwashing_flags": [
            "'Sustainably sourced' cocoa claims vs documented child labour",
            "Plastic pollution despite 'net zero plastic' commitments",
        ],
        "lobbying_concern": True,
        "supply_chain_disclosure": "PARTIAL",
        "certifications": [],
        "bonus": 0, "penalty": 35,
        "note": "Persistent governance failures: formula marketing, child labour, water rights",
    },
    "cocacola_group": {
        "name": "The Coca-Cola Company",
        "china_sales": True,
        "china_mandatory_testing": False,
        "regulatory_violations": [
            "World's largest plastic polluter (Break Free From Plastic 2022)",
            "FTC investigation into recycling claims",
        ],
        "transparency_score": 40,
        "esg_rating": "C+",
        "greenwashing_flags": [
            "'World Without Waste' campaign vs documented plastic lobbying",
        ],
        "lobbying_concern": True,
        "supply_chain_disclosure": "PARTIAL",
        "certifications": [],
        "bonus": 0, "penalty": 30,
        "note": "Leading plastic polluter; documented greenwashing; sugary product health concerns",
    },
    "pepsico": {
        "name": "PepsiCo Inc.",
        "china_sales": True,
        "china_mandatory_testing": False,
        "regulatory_violations": [
            "Break Free From Plastic top polluter 2022",
        ],
        "transparency_score": 45,
        "esg_rating": "B-",
        "greenwashing_flags": ["'pep+' sustainability strategy questioned by analysts"],
        "lobbying_concern": True,
        "supply_chain_disclosure": "PARTIAL",
        "certifications": [],
        "bonus": 0, "penalty": 25,
        "note": "Plastic pollution; sugar and ultra-processed food concerns",
    },
    "kraft_heinz": {
        "name": "Kraft Heinz Company",
        "china_sales": True,
        "china_mandatory_testing": False,
        "regulatory_violations": [
            "SEC settlement 2021 (accounting fraud, $62M penalty)",
        ],
        "transparency_score": 38,
        "esg_rating": "C",
        "greenwashing_flags": [],
        "lobbying_concern": True,
        "supply_chain_disclosure": "LOW",
        "certifications": [],
        "bonus": 0, "penalty": 30,
        "note": "SEC accounting fraud settlement; ultra-processed food portfolio; weak transparency",
    },
    "mondelez": {
        "name": "Mondelez International",
        "china_sales": True,
        "china_mandatory_testing": False,
        "regulatory_violations": [
            "Child labour in cocoa supply chain (documented 2022)",
        ],
        "transparency_score": 42,
        "esg_rating": "C+",
        "greenwashing_flags": ["'Cocoa Life' programme disputed by NGOs"],
        "lobbying_concern": True,
        "supply_chain_disclosure": "PARTIAL",
        "certifications": [],
        "bonus": 0, "penalty": 28,
        "note": "Child labour in cocoa supply; ultra-processed portfolio",
    },
    "UNKNOWN": {
        "name": "Unknown Brand",
        "china_sales": None,
        "china_mandatory_testing": None,
        "regulatory_violations": [],
        "transparency_score": 50,
        "esg_rating": "UNKNOWN",
        "greenwashing_flags": [],
        "lobbying_concern": None,
        "supply_chain_disclosure": "UNKNOWN",
        "certifications": [],
        "bonus": 0, "penalty": 0,
        "note": "Brand not in database - neutral score applied",
    },
}


# ============================================================
# SCORING ENGINE
# ============================================================

def _lookup_company(brand_name):
    brand_lower = brand_name.lower().strip()
    parent_key  = PARENT_COMPANY_MAP.get(brand_lower)
    if parent_key:
        profile = COMPANY_PROFILES.get(parent_key, COMPANY_PROFILES["UNKNOWN"])
        return parent_key, profile, brand_lower != parent_key
    return "UNKNOWN", COMPANY_PROFILES["UNKNOWN"], False


def calculate_governance_score(brand_name):
    company_key, profile, has_parent = _lookup_company(brand_name)

    base = 100

    # 1. Base transparency score (already calibrated 0-100)
    transparency_contribution = round(profile["transparency_score"] * 0.4)

    # 2. Penalty from profile
    penalty = profile["penalty"]

    # 3. China mandatory testing penalty (cosmetics)
    china_penalty = 15 if profile.get("china_mandatory_testing") else 0

    # 4. Regulatory violations (stacking, max 25)
    violation_penalty = min(len(profile["regulatory_violations"]) * 8, 25)

    # 5. Greenwashing flags
    gw_penalty = min(len(profile["greenwashing_flags"]) * 5, 15)

    # 6. Supply chain transparency modifier
    sc_bonus = {"FULL": 10, "GOOD": 5, "PARTIAL": 0, "LOW": -5, "UNKNOWN": -3}.get(
        profile["supply_chain_disclosure"], 0
    )

    # 7. Certification bonus
    cert_bonus = min(len(profile["certifications"]) * 4, 16)

    # 8. Aggregate
    total_penalty = min(penalty + china_penalty + violation_penalty + gw_penalty, 75)
    total_bonus   = min(profile["bonus"] + cert_bonus + sc_bonus + transparency_contribution, 60)

    if company_key == "UNKNOWN":
        score = 50
    else:
        raw_score = base - total_penalty + total_bonus
        score = max(0, min(100, raw_score - 40))

    if score >= 85:   verdict = "EXCELLENT"
    elif score >= 70: verdict = "GOOD"
    elif score >= 55: verdict = "ACCEPTABLE"
    elif score >= 40: verdict = "CAUTION"
    else:             verdict = "HIGH_RISK"

    flags = []
    if profile.get("china_mandatory_testing"):
        flags.append("Brand sells in China - mandatory animal testing for imported cosmetics applies")
    for v in profile["regulatory_violations"]:
        flags.append("Regulatory: " + v)
    for g in profile["greenwashing_flags"]:
        flags.append("Greenwashing risk: " + g)
    if profile["supply_chain_disclosure"] in ("LOW", "UNKNOWN"):
        flags.append("Limited supply chain transparency")
    if profile.get("lobbying_concern"):
        flags.append("Documented lobbying against consumer protection regulations")
    flags.append(profile["note"])

    return {
        "score":            score,
        "verdict":          verdict,
        "confidence":       "HIGH" if company_key != "UNKNOWN" else "LOW",
        "engine":           "cell45_live",
        "brand_name":       brand_name,
        "parent_company":   profile["name"],
        "has_parent_brand": has_parent,
        "esg_rating":       profile["esg_rating"],
        "china_testing":    profile.get("china_mandatory_testing"),
        "transparency_score": profile["transparency_score"],
        "certifications":   profile["certifications"],
        "regulatory_violations": profile["regulatory_violations"],
        "supply_chain":     profile["supply_chain_disclosure"],
        "total_penalty":    total_penalty,
        "total_bonus":      min(total_bonus, 60),
        "breakdown": {
            "transparency_contribution": transparency_contribution,
            "profile_penalty":           penalty,
            "china_penalty":             china_penalty,
            "violation_penalty":         violation_penalty,
            "greenwashing_penalty":      gw_penalty,
            "supply_chain_modifier":     sc_bonus,
            "certification_bonus":       cert_bonus,
        },
        "flags": flags,
    }


# ============================================================
# SELF-TEST
# ============================================================
print("Governance engine loaded (Cell 45 - LIVE)")
print("  Company profiles:   " + str(len(COMPANY_PROFILES)))
print("  Parent company map: " + str(len(PARENT_COMPANY_MAP)) + " brands")
print()
print("=" * 62)
print("  CELL 45 SELF-TEST: GOVERNANCE ENGINE")
print("=" * 62)

# TEST 1: Dr Bronner's - gold standard governance
print()
print("TEST 1: Dr Bronner's (independent, B-Corp, full transparency)")
r1 = calculate_governance_score("Dr Bronner's")
print("  Score:        " + str(r1["score"]) + "/100 [" + r1["verdict"] + "]")
print("  Parent:       " + r1["parent_company"])
print("  Certifications: " + str(r1["certifications"]))
print("  China testing: " + str(r1["china_testing"]))
print("  Supply chain:  " + r1["supply_chain"])
assert r1["score"] >= 80, "Dr Bronner's should score EXCELLENT, got " + str(r1["score"])
assert r1["china_testing"] == False
assert "B-Corp" in r1["certifications"]
print("  PASS: Dr Bronner EXCELLENT governance")

# TEST 2: Nestle - worst governance in DB
print()
print("TEST 2: Nestle (child labour, formula marketing violations)")
r2 = calculate_governance_score("Nestle")
print("  Score:        " + str(r2["score"]) + "/100 [" + r2["verdict"] + "]")
print("  ESG rating:   " + r2["esg_rating"])
print("  Violations:   " + str(len(r2["regulatory_violations"])))
assert r2["score"] < 40, "Nestle should be HIGH_RISK, got " + str(r2["score"])
print("  PASS: Nestle correctly HIGH_RISK governance")

# TEST 3: CeraVe -> L'Oreal parent surfacing
print()
print("TEST 3: CeraVe (owned by L'Oreal - parent surfacing)")
r3 = calculate_governance_score("CeraVe")
print("  Score:        " + str(r3["score"]) + "/100 [" + r3["verdict"] + "]")
print("  Parent:       " + r3["parent_company"])
print("  Has parent:   " + str(r3["has_parent_brand"]))
print("  China testing:" + str(r3["china_testing"]))
assert r3["parent_company"] == "L'Oreal Group"
assert r3["has_parent_brand"] == True
assert r3["china_testing"] == True
print("  PASS: CeraVe -> L'Oreal surfaced correctly")

# TEST 4: Unknown brand - neutral
print()
print("TEST 4: Unknown indie brand")
r4 = calculate_governance_score("AnaLux Beauty Co")
print("  Score:        " + str(r4["score"]) + "/100 [" + r4["verdict"] + "]")
print("  Confidence:   " + r4["confidence"])
assert r4["confidence"] == "LOW"
assert r4["score"] == 50, "Unknown brand should return neutral 50, got " + str(r4["score"])
print("  PASS: Unknown brand -> neutral 50, LOW confidence")

# TEST 5: Comparative ranking
print()
print("TEST 5: Comparative governance ranking")
brands = [
    ("Dr Bronner's", calculate_governance_score("Dr Bronner's")["score"]),
    ("Lush",         calculate_governance_score("Lush")["score"]),
    ("Beiersdorf",   calculate_governance_score("Beiersdorf")["score"]),
    ("Unilever",     calculate_governance_score("Unilever")["score"]),
    ("L'Oreal",      calculate_governance_score("L'Oreal")["score"]),
    ("Chanel",       calculate_governance_score("Chanel")["score"]),
    ("Coty",         calculate_governance_score("Coty")["score"]),
    ("J&J",          calculate_governance_score("Neutrogena")["score"]),
    ("Nestle",       calculate_governance_score("Nestle")["score"]),
]
brands.sort(key=lambda x: -x[1])
print()
print("  " + "Brand".ljust(20) + "Score")
print("  " + "-" * 28)
for brand, score in brands:
    print("  " + brand.ljust(20) + str(score))

dr_score  = next(s for b, s in brands if b == "Dr Bronner's")
nes_score = next(s for b, s in brands if b == "Nestle")
assert dr_score > nes_score, "Dr Bronner should outscore Nestle"
print()
print("  PASS: Ranking order correct")

# TEST 6: Breakdown transparency
print()
print("TEST 6: Score breakdown transparency")
r6 = calculate_governance_score("Chanel")
print("  Chanel breakdown:")
for k, v in r6["breakdown"].items():
    print("    " + k.ljust(28) + str(v))
print("  Key flag: " + r6["flags"][0])
assert "China" in r6["flags"][0], "First flag should be China testing"
print("  PASS: Breakdown and flags correctly structured")

print()
print("ALL CELL 45 TESTS PASSED")
print()
print("Governance engine now LIVE in pipeline:")
print("  " + str(len(COMPANY_PROFILES)) + " company profiles")
print("  " + str(len(PARENT_COMPANY_MAP)) + " brand->parent mappings")
print("  Dimensions: China testing, violations, greenwashing, supply chain, certifications")
print()
print("Pipeline status:")
print("  Health (50%):      LIVE")
print("  Environment (25%): STUB (planned)")
print("  Animal (10%):      LIVE")
print("  Governance (15%):  LIVE  <- just activated")

"""

with open('/content/noura_governance_engine.py', 'w', encoding='ascii') as f:
    f.write(governance_code)

exec(governance_code)
print('Cell 45 complete - noura_governance_engine.py written and loaded')

Governance engine loaded (Cell 45 - LIVE)
  Company profiles:   21
  Parent company map: 128 brands

  CELL 45 SELF-TEST: GOVERNANCE ENGINE

TEST 1: Dr Bronner's (independent, B-Corp, full transparency)
  Score:        100/100 [EXCELLENT]
  Parent:       Dr. Bronner's (Independent)
  Certifications: ['B-Corp', 'Fair Trade', 'USDA Organic', 'Leaping Bunny', 'Vegan', 'Non-GMO']
  China testing: False
  Supply chain:  FULL
  PASS: Dr Bronner EXCELLENT governance

TEST 2: Nestle (child labour, formula marketing violations)
  Score:        4/100 [HIGH_RISK]
  ESG rating:   C
  Violations:   4
  PASS: Nestle correctly HIGH_RISK governance

TEST 3: CeraVe (owned by L'Oreal - parent surfacing)
  Score:        26/100 [HIGH_RISK]
  Parent:       L'Oreal Group
  Has parent:   True
  China testing:True
  PASS: CeraVe -> L'Oreal surfaced correctly

TEST 4: Unknown indie brand
  Score:        50/100 [CAUTION]
  Confidence:   LOW
  PASS: Unknown brand -> neutral 50, LOW confidence

TEST 5: Comparativ

In [None]:
api_code = """
import sys
import time
sys.path.insert(0, '/content')

# ============================================================
# CELL 43 - API WRAPPER (FULLY WIRED)
# All four dimensions live.
# Health:      Cell 42 (calculate_health_score)
# Environment: Cell 31 (calculate_environment_score)
# Animal:      Cell 44 (calculate_animal_welfare_score)
# Governance:  Cell 45 (calculate_governance_score)
# ============================================================

# --- Load dependencies ---
_engines = {}

try:
    exec(open('/content/noura_concentration_engine.py').read())
    exec(open('/content/noura_health_engine.py').read())
    _engines["health"] = "live"
except Exception as e:
    _engines["health"] = "stub:" + str(e)[:40]

try:
    # Write minimal environment config
    _env_config = '''
ENVIRONMENT_SIGNALS = {
    "cosmetics": {"biodegradability":{"weight":0.40},"packaging":{"weight":0.30},"manufacturing":{"weight":0.20},"palm_oil":{"weight":0.10}},
    "food":      {"carbon_footprint":{"weight":0.40},"packaging":{"weight":0.30},"pesticides":{"weight":0.20},"water_usage":{"weight":0.10}},
    "cleaning":  {"aquatic_toxicity":{"weight":0.50},"vocs":{"weight":0.30},"packaging":{"weight":0.20}},
    "baby":      {"biodegradability":{"weight":0.40},"packaging":{"weight":0.30},"supply_chain":{"weight":0.20},"cumulative_exposure":{"weight":0.10}},
}
ECHA_AQUATIC_HAZARDS = {
    "benzalkonium chloride":{"aquatic_class":"Acute 1 / Chronic 1","biodegradable":False,"concern":"Highly toxic to aquatic organisms, persistent","deduction":-30},
    "methylisothiazolinone":{"aquatic_class":"Acute 1 / Chronic 1","biodegradable":False,"concern":"Extremely toxic to aquatic life","deduction":-35},
    "triclosan":{"aquatic_class":"Chronic 1","biodegradable":False,"concern":"Persistent endocrine disruptor, aquatic toxicity","deduction":-30},
    "glycerin":{"aquatic_class":"None","biodegradable":True,"concern":None,"deduction":0},
    "citric acid":{"aquatic_class":"None","biodegradable":True,"concern":None,"deduction":0},
    "tocopherol":{"aquatic_class":"None","biodegradable":True,"concern":None,"deduction":0},
    "benzophenone-3":{"aquatic_class":"Chronic 2","biodegradable":False,"concern":"Coral reef toxicity, bioaccumulates in fish","deduction":-20},
    "edta":{"aquatic_class":"Chronic 3","biodegradable":False,"concern":"Persistent chelating agent","deduction":-15},
    "sodium lauryl sulfate":{"aquatic_class":"Chronic 2","biodegradable":True,"concern":"Aquatic toxicity at high concentration","deduction":-10},
}
EU_ECOLABEL_BRANDS = {
    "weleda":{"certified":True,"bonus":8,"category":"cosmetics"},
    "ecover":{"certified":True,"bonus":10,"category":"cleaning"},
    "dr bronner":{"certified":True,"bonus":8,"category":"cosmetics"},
}
def get_environment_signals(cat): return ENVIRONMENT_SIGNALS.get(cat, ENVIRONMENT_SIGNALS["cosmetics"])
def get_aquatic_hazard(ing):
    name = ing.lower().strip()
    for k, v in ECHA_AQUATIC_HAZARDS.items():
        if k in name: return {"ingredient":ing,**v}
    return {"ingredient":ing,"aquatic_class":"Unknown","biodegradable":None,"concern":None,"deduction":0}
def check_ecolabel(brand, weighted_score=0):
    name = brand.lower().strip()
    for k, v in EU_ECOLABEL_BRANDS.items():
        if k in name:
            bonus = v["bonus"] if weighted_score >= 50 else 0
            blocked = weighted_score < 50
            return {"brand":brand,**v,"bonus":bonus,"bonus_blocked":blocked,
                    "bonus_blocked_reason":"formula score too low" if blocked else ""}
    return {"brand":brand,"certified":False,"bonus":0,"bonus_blocked":False}
'''
    with open('/content/noura_environment_config.py', 'w') as _f:
        _f.write(_env_config)
    _orig_name = globals().get('__name__', '__main__')
    globals()['__name__'] = 'noura_import'
    exec(open('/content/noura_environment_engine.py').read())
    globals()['__name__'] = _orig_name
    _engines["environment"] = "live"
except Exception as _env_err:
    globals()['__name__'] = globals().get('_orig_name', '__main__')
    _engines["environment"] = "stub:" + str(_env_err)[:60]

try:
    # Write animal welfare config including ANIMAL_WELFARE_SIGNALS
    _animal_config = '''
ANIMAL_WELFARE_SIGNALS = {
    "cosmetics": {"ingredient_flags": {"weight": 0.60}, "certification": {"weight": 0.25}, "product_name": {"weight": 0.15}},
    "food":      {"ingredient_flags": {"weight": 0.40}, "certification": {"weight": 0.10}, "product_name": {"weight": 0.50}},
    "cleaning":  {"ingredient_flags": {"weight": 0.70}, "certification": {"weight": 0.20}, "product_name": {"weight": 0.10}},
    "baby":      {"ingredient_flags": {"weight": 0.55}, "certification": {"weight": 0.30}, "product_name": {"weight": 0.15}},
}
ANIMAL_INGREDIENT_DB = {
    "ci 75470":                    {"flag": "ANIMAL_DERIVED", "severity": 30, "species": "insect",    "note": "Carmine - crushed cochineal insects (~70,000 per kg)"},
    "lanolin":                     {"flag": "ANIMAL_DERIVED", "severity": 15, "species": "sheep",     "note": "Sheep wool secretion - process varies in welfare standards"},
    "lanolin alcohol":             {"flag": "ANIMAL_DERIVED", "severity": 12, "species": "sheep",     "note": "Derived from lanolin"},
    "cera alba":                   {"flag": "ANIMAL_DERIVED", "severity": 20, "species": "bee",       "note": "Beeswax - commercial beekeeping; queen culling common"},
    "shellac":                     {"flag": "ANIMAL_DERIVED", "severity": 25, "species": "insect",    "note": "Lac insect secretion; insects killed in processing"},
    "casein":                      {"flag": "ANIMAL_DERIVED", "severity": 18, "species": "cow",       "note": "Milk protein - dairy industry welfare concerns"},
    "whey protein":                {"flag": "ANIMAL_DERIVED", "severity": 15, "species": "cow",       "note": "Dairy by-product"},
    "collagen":                    {"flag": "ANIMAL_DERIVED", "severity": 20, "species": "bovine",    "note": "Animal connective tissue - typically bovine or porcine"},
    "elastin":                     {"flag": "ANIMAL_DERIVED", "severity": 20, "species": "bovine",    "note": "Animal tissue protein"},
    "keratin":                     {"flag": "ANIMAL_DERIVED", "severity": 18, "species": "sheep",     "note": "Animal hair/wool derived"},
    "silk":                        {"flag": "ANIMAL_DERIVED", "severity": 22, "species": "silkworm",  "note": "Silkworm cocoons; silkworms boiled alive"},
    "silk amino acids":            {"flag": "ANIMAL_DERIVED", "severity": 20, "species": "silkworm",  "note": "Derived from silk processing"},
    "sericin":                     {"flag": "ANIMAL_DERIVED", "severity": 20, "species": "silkworm",  "note": "Silk protein; silkworms killed"},
    "hydrolyzed silk":             {"flag": "ANIMAL_DERIVED", "severity": 20, "species": "silkworm",  "note": "Derived from silk"},
    "gelatin":                     {"flag": "ANIMAL_DERIVED", "severity": 22, "species": "bovine",    "note": "Boiled animal bones/skin; slaughterhouse by-product"},
    "glycerin":                    {"flag": "POSSIBLE_ANIMAL", "severity": 5,  "species": "bovine",   "note": "Can be animal or plant-derived - source not disclosed"},
    "stearic acid":                {"flag": "POSSIBLE_ANIMAL", "severity": 8,  "species": "bovine",   "note": "Can be tallow (beef fat) or plant-derived"},
    "oleic acid":                  {"flag": "POSSIBLE_ANIMAL", "severity": 6,  "species": "bovine",   "note": "Can be animal or plant-derived"},
    "sodium stearate":             {"flag": "POSSIBLE_ANIMAL", "severity": 8,  "species": "bovine",   "note": "Stearic acid salt - source ambiguous"},
    "cetyl alcohol":               {"flag": "POSSIBLE_ANIMAL", "severity": 4,  "species": "whale",    "note": "Historically whale-derived; now mostly plant - verify source"},
    "squalene":                    {"flag": "ANIMAL_DERIVED",  "severity": 28, "species": "shark",    "note": "Shark liver oil - overfishing concern; squalane preferred"},
    "musk":                        {"flag": "ANIMAL_DERIVED",  "severity": 30, "species": "deer",     "note": "Musk deer gland secretion - poaching concern"},
    "ambergris":                   {"flag": "ANIMAL_DERIVED",  "severity": 30, "species": "whale",    "note": "Sperm whale intestinal secretion"},
    "civet":                       {"flag": "ANIMAL_DERIVED",  "severity": 30, "species": "civet",    "note": "Civet cat anal gland secretion - captivity conditions"},
    "triclosan":                   {"flag": "ANIMAL_TESTED",   "severity": 10, "species": "rat",      "note": "Extensive animal testing history"},
    "benzophenone-3":              {"flag": "ANIMAL_TESTED",   "severity": 8,  "species": "rat",      "note": "Animal studies cited in EU restriction"},
    "sodium lauryl sulfate":       {"flag": "ANIMAL_TESTED",   "severity": 6,  "species": "rabbit",   "note": "Draize eye test historically used"},
    "sucrose":                     {"flag": "VEGAN_OK",        "severity": 0,  "species": None,       "note": "Plant-derived"},
    "theobroma cacao seed butter": {"flag": "VEGAN_OK",        "severity": 0,  "species": None,       "note": "Plant-derived"},
    "annatto":                     {"flag": "VEGAN_OK",        "severity": 0,  "species": None,       "note": "Plant-derived colorant"},
}
CRUELTY_FREE_BRANDS = {
    "dr bronner\'s", "dr. bronner\'s", "the body shop", "lush", "pacifica",
    "e.l.f.", "elf cosmetics", "nyx", "wet n wild", "covergirl",
    "urban decay", "tarte", "too faced", "milk makeup", "glossier",
    "tatcha", "drunk elephant", "paula\'s choice", "ordinary", "the ordinary",
    "cerave", "neutrogena", "seventh generation", "method", "mrs meyer",
}
VEGAN_CERTIFIED_BRANDS = {
    "dr bronner\'s", "dr. bronner\'s", "pacifica", "e.l.f.", "elf cosmetics",
    "nyx", "wet n wild", "milk makeup", "ordinary", "the ordinary",
    "seventh generation", "method",
}
NOT_CRUELTY_FREE = {
    "l\'oreal", "loreal", "maybelline", "garnier", "lancome",
    "estee lauder", "clinique", "mac", "bobbi brown", "jo malone",
    "procter & gamble", "p&g", "gillette", "olay", "sk-ii",
    "unilever", "dove", "axe", "tresemme", "pond\'s", "vaseline",
    "johnson & johnson", "neutrogena", "nivea", "beiersdorf",
    "shiseido", "nars", "colgate", "palmolive", "revlon", "elizabeth arden",
    "chanel", "dior", "ysl", "saint laurent", "givenchy", "clorox",
}
ANIMAL_PRODUCT_SIGNALS = {
    "chicken":   {"species": "poultry",     "severity": 20, "note": "Poultry farming welfare concerns"},
    "beef":      {"species": "bovine",      "severity": 22, "note": "Cattle farming welfare concerns"},
    "pork":      {"species": "porcine",     "severity": 22, "note": "Pig farming welfare concerns"},
    "lamb":      {"species": "ovine",       "severity": 20, "note": "Lamb farming welfare concerns"},
    "salmon":    {"species": "fish",        "severity": 15, "note": "Aquaculture or wild-catch welfare"},
    "tuna":      {"species": "fish",        "severity": 18, "note": "Overfishing and bycatch concern"},
    "shrimp":    {"species": "crustacean",  "severity": 15, "note": "Aquaculture welfare concerns"},
    "egg":       {"species": "poultry",     "severity": 14, "note": "Poultry welfare - check free-range/cage-free"},
    "milk":      {"species": "bovine",      "severity": 14, "note": "Dairy cattle welfare"},
    "dairy":     {"species": "bovine",      "severity": 14, "note": "Dairy farming welfare concerns"},
    "honey":     {"species": "bee",         "severity": 16, "note": "Commercial beekeeping - queen culling, wing clipping"},
    "beeswax":   {"species": "bee",         "severity": 20, "note": "Commercial beekeeping practices"},
    "goat":      {"species": "caprine",     "severity": 18, "note": "Goat farming welfare"},
    "turkey":    {"species": "poultry",     "severity": 20, "note": "Poultry farming welfare"},
    "duck":      {"species": "poultry",     "severity": 20, "note": "Poultry farming - foie gras concern"},
    "foie gras": {"species": "poultry",     "severity": 35, "note": "Force-feeding - banned in many jurisdictions"},
    "veal":      {"species": "bovine",      "severity": 30, "note": "Veal production - separation from mother at birth"},
    "lobster":   {"species": "crustacean",  "severity": 18, "note": "Live boiling practices"},
    "leather":   {"species": "bovine",      "severity": 20, "note": "Animal hide - slaughterhouse by-product"},
    "wool":      {"species": "ovine",       "severity": 15, "note": "Mulesing practices; shearing welfare varies"},
    "down":      {"species": "waterfowl",   "severity": 22, "note": "Live plucking of feathers - welfare concern"},
    "fur":       {"species": "various",     "severity": 35, "note": "Fur farming - severe confinement"},
    "pearl":     {"species": "mollusk",     "severity": 12, "note": "Oyster farming - nucleation process"},
}
'''
    with open('/content/noura_animal_welfare_config.py', 'w') as _f:
        _f.write(_animal_config)
    _orig_name2 = globals().get('__name__', '__main__')
    globals()['__name__'] = 'noura_import'
    exec(open('/content/noura_animal_welfare_engine.py').read())
    globals()['__name__'] = _orig_name2
    _engines["animal"] = "live"
except Exception as _animal_err:
    globals()['__name__'] = globals().get('_orig_name2', '__main__')
    _engines["animal"] = "stub:" + str(_animal_err)[:60]

try:
    exec(open('/content/noura_governance_engine.py').read())
    _engines["governance"] = "live"
except Exception:
    _engines["governance"] = "stub"


# ============================================================
# STUB FALLBACKS (used only when engine file missing)
# ============================================================

def _stub_health(product_name, ingredients, category):
    return {"score": 50, "health_score": 50, "verdict": "UNKNOWN",
            "confidence": "LOW", "engine": "stub", "flags": [],
            "detailed_flags": [], "flagged_count": 0, "ingredient_count": len(ingredients)}

def _stub_environment(product_name, ingredients, category):
    return {"score": 50, "verdict": "UNKNOWN", "confidence": "LOW", "engine": "stub",
            "flags": ["Environment engine not yet wired"]}

def _stub_animal(product_name, ingredients, category):
    return {"score": 70, "verdict": "UNKNOWN", "confidence": "LOW", "engine": "stub",
            "flags": ["Animal welfare engine file not found"]}

def _stub_governance(brand_name):
    return {"score": 65, "verdict": "UNKNOWN", "confidence": "LOW", "engine": "stub",
            "flags": ["Governance engine file not found"]}


# ============================================================
# DIMENSION WEIGHTS
# ============================================================
WEIGHTS = {
    "health":      0.50,
    "environment": 0.25,
    "animal":      0.10,
    "governance":  0.15,
}

VERDICT_THRESHOLDS = {
    "EXCELLENT": 85,
    "GOOD":      70,
    "ACCEPTABLE":55,
    "CAUTION":   40,
}


def _verdict(score):
    for label, threshold in VERDICT_THRESHOLDS.items():
        if score >= threshold:
            return label
    return "HIGH_RISK"


def _confidence(dim_results):
    confidences = [r.get("confidence", "LOW") for r in dim_results.values()]
    if all(c == "HIGH" for c in confidences):   return "HIGH"
    if any(c == "HIGH" for c in confidences):   return "MEDIUM"
    if any(c == "MEDIUM" for c in confidences): return "LOW"
    return "LOW"


# ============================================================
# MAIN SCORING FUNCTION
# ============================================================

def score_product(product_name, ingredients, category="cosmetics", brand=None):
    if not product_name or not product_name.strip():
        return {"success": False, "error": "product_name is required"}
    if category not in ("cosmetics", "food", "cleaning", "baby"):
        return {"success": False, "error": "category must be cosmetics|food|cleaning|baby"}
    if not isinstance(ingredients, list) or len(ingredients) == 0:
        return {"success": False, "error": "ingredients must be a non-empty list"}
    if len(ingredients) > 200:
        return {"success": False, "error": "ingredients list exceeds 200 item limit"}

    brand_name = brand or product_name
    t0 = time.time()

    # ---- Health ----
    if _engines["health"] == "live":
        try:
            h = calculate_health_score(product_name, ingredients, category)
        except Exception as e:
            h = _stub_health(product_name, ingredients, category)
            h["flags"] = ["Health engine error: " + str(e)[:60]]
    else:
        h = _stub_health(product_name, ingredients, category)

    # ---- Environment ----
    if _engines["environment"] == "live":
        try:
            e_result = calculate_environment_score(product_name, ingredients, category, brand_name)
            e = {
                "score":      e_result.get("environment_score", 50),
                "verdict":    e_result.get("verdict", "UNKNOWN"),
                "confidence": e_result.get("confidence", "LOW"),
                "flags":      e_result.get("flags", []),
                "engine":     "live"
            }
        except Exception as ex:
            e = _stub_environment(product_name, ingredients, category)
            e["flags"] = ["Environment engine error: " + str(ex)[:60]]
    else:
        e = _stub_environment(product_name, ingredients, category)

    # ---- Animal Welfare ----
    if _engines["animal"] == "live":
        try:
            a = calculate_animal_welfare_score(product_name, ingredients, category, brand_name)
        except Exception as ex:
            a = _stub_animal(product_name, ingredients, category)
    else:
        a = _stub_animal(product_name, ingredients, category)

    # ---- Governance ----
    if _engines["governance"] == "live":
        try:
            g = calculate_governance_score(brand_name)
        except Exception as ex:
            g = _stub_governance(brand_name)
    else:
        g = _stub_governance(brand_name)

    # ---- Aggregate ----
    h_score = h.get("health_score", h.get("score", 50))
    e_score = e.get("score", 50)
    a_score = a.get("score", 50)
    g_score = g.get("score", 50)

    noura_score = round(
        h_score * WEIGHTS["health"] +
        e_score * WEIGHTS["environment"] +
        a_score * WEIGHTS["animal"] +
        g_score * WEIGHTS["governance"]
    )

    dim_results = {"health": h, "environment": e, "animal": a, "governance": g}

    # ---- Flags (cross-dimension) ----
    all_flags = []
    for dim, result in dim_results.items():
        for flag in result.get("flags", []):
            all_flags.append("[" + dim.upper() + "] " + str(flag))

    response_ms = round((time.time() - t0) * 1000)

    return {
        "success":          True,
        "product":          product_name,
        "brand":            brand_name,
        "category":         category,
        "noura_score":      noura_score,
        "verdict":          _verdict(noura_score),
        "confidence":       _confidence(dim_results),
        "dimension_scores": {
            "health":      h_score,
            "environment": e_score,
            "animal":      a_score,
            "governance":  g_score,
        },
        "dimension_weights":  {k: str(int(v*100)) + "%" for k, v in WEIGHTS.items()},
        "dimension_engines":  _engines,
        "contributions": {
            "health":      round(h_score * WEIGHTS["health"], 1),
            "environment": round(e_score * WEIGHTS["environment"], 1),
            "animal":      round(a_score * WEIGHTS["animal"], 1),
            "governance":  round(g_score * WEIGHTS["governance"], 1),
        },
        "flags":          all_flags[:10],
        "health_detail":  h.get("detailed_flags", []),
        "bonuses":        h.get("bonuses", []),
        "response_ms":    response_ms,
    }


# ============================================================
# SELF-TEST
# ============================================================
print("Cell 43 API loaded")
print("  Engine status:")
for dim, status in _engines.items():
    icon = "LIVE" if status == "live" else "STUB"
    print("    " + dim.ljust(12) + ": " + icon + (" (" + status[5:] + ")" if status.startswith("stub:") else ""))
print()
print("=" * 62)
print("  CELL 43 SELF-TEST: FULLY WIRED API")
print("=" * 62)

# TEST 1: Dr Bronner's
print()
print("TEST 1: Dr Bronner's Pure Castile Soap")
r1 = score_product(
    "Dr Bronner's Pure Castile Soap",
    ["water","coconut oil","potassium hydroxide","hemp oil","jojoba oil","citric acid","tocopherol"],
    category="cosmetics",
    brand="Dr Bronner's"
)
assert r1["success"]
print("  NOURA score:  " + str(r1["noura_score"]) + "/100 [" + r1["verdict"] + "]")
print("  Health:       " + str(r1["dimension_scores"]["health"]) + " (engine=" + r1["dimension_engines"]["health"] + ")")
print("  Environment:  " + str(r1["dimension_scores"]["environment"]) + " (engine=" + r1["dimension_engines"]["environment"] + ")")
print("  Animal:       " + str(r1["dimension_scores"]["animal"]) + " (engine=" + r1["dimension_engines"]["animal"] + ")")
print("  Governance:   " + str(r1["dimension_scores"]["governance"]))
print("  Response:     " + str(r1["response_ms"]) + "ms")
assert r1["dimension_engines"]["health"] == "live", "Health engine should be live"
assert r1["dimension_engines"]["environment"] == "live", "Environment engine should be live"
assert r1["dimension_engines"]["animal"] == "live", "Animal engine should be live"
print("  PASS: All four engines LIVE")

# TEST 2: CeraVe
print()
print("TEST 2: CeraVe Moisturising Cream")
r2 = score_product(
    "CeraVe Moisturising Cream",
    ["aqua","glycerin","cetearyl alcohol","caprylic/capric triglyceride",
     "cetyl alcohol","ceteareth-20","petrolatum","phenoxyethanol",
     "ceramide np","ceramide ap","ceramide eop","dimethicone",
     "sodium hyaluronate","tocopherol","xanthan gum","ethylhexylglycerin"],
    category="cosmetics",
    brand="CeraVe"
)
print("  NOURA score:  " + str(r2["noura_score"]) + "/100 [" + r2["verdict"] + "]")
print("  Health:       " + str(r2["dimension_scores"]["health"]))
print("  Environment:  " + str(r2["dimension_scores"]["environment"]))
print("  Animal:       " + str(r2["dimension_scores"]["animal"]))
print("  Contributions: H=" + str(r2["contributions"]["health"]) +
      " E=" + str(r2["contributions"]["environment"]) +
      " A=" + str(r2["contributions"]["animal"]) +
      " G=" + str(r2["contributions"]["governance"]))
assert r2["noura_score"] > 0
assert r2["dimension_scores"]["health"] != 50, "Health should not be stub"
assert r2["dimension_scores"]["environment"] != 50, "Environment should not be stub"
print("  PASS: Real scores across all dimensions")

# TEST 3: Validation errors
print()
print("TEST 3: Input validation")
e1 = score_product("", ["water"], "cosmetics")
assert not e1["success"] and "product_name" in e1["error"]
print("  PASS: Empty product name rejected")

e2 = score_product("Test", ["water"], "perfume")
assert not e2["success"] and "category" in e2["error"]
print("  PASS: Invalid category rejected")

e3 = score_product("Test", [], "cosmetics")
assert not e3["success"] and "ingredients" in e3["error"]
print("  PASS: Empty ingredients rejected")

e4 = score_product("Test", ["x"] * 201, "cosmetics")
assert not e4["success"] and "200" in e4["error"]
print("  PASS: Oversized list rejected")

# TEST 4: Score ordering
print()
print("TEST 4: Score ordering")
antibac = score_product(
    "Antibacterial Soap",
    ["water","sodium lauryl sulfate","triclosan","fragrance"],
    "cosmetics"
)
print("  Dr Bronner:     " + str(r1["noura_score"]))
print("  CeraVe:         " + str(r2["noura_score"]))
print("  Antibacterial:  " + str(antibac["noura_score"]))
assert antibac["noura_score"] < r2["noura_score"], "Triclosan soap should score below CeraVe"
print("  PASS: Ordering correct")

# TEST 5: Full response structure
print()
print("TEST 5: Response schema")
required_keys = ["success","product","brand","category","noura_score","verdict",
                 "confidence","dimension_scores","dimension_weights","contributions",
                 "flags","response_ms","dimension_engines"]
missing = [k for k in required_keys if k not in r1]
assert not missing, "Missing keys: " + str(missing)
print("  PASS: All " + str(len(required_keys)) + " required keys present")

print()
print("ALL CELL 43 TESTS PASSED")
print()
print("NOURA scoring pipeline status:")
print("  Health (50%):      " + _engines["health"].upper())
print("  Environment (25%): " + _engines["environment"].upper())
print("  Animal (10%):      " + _engines["animal"].upper())
print("  Governance (15%):  " + _engines["governance"].upper())
print()
print("All four dimensions live. NOURA pipeline complete.")

"""

with open('/content/noura_api.py', 'w', encoding='utf-8') as f:
    f.write(api_code)

exec(api_code)
print('Cell 43 complete - noura_api.py written and loaded')

Concentration engine loaded
  Synonym entries:   215
  Anchor ingredients:26
  Hazard DB entries: 34

  CELL 41 SELF-TEST: CONCENTRATION ESTIMATOR

TEST 1: Petrolatum ABOVE_1PCT = full penalty
  PASS: petrolatum -> ABOVE_1PCT, penalty=14.0 (full)
  CeraVe health score: 76/100 [GOOD]

TEST 2: Carmine ABOVE_1PCT vs BELOW_1PCT
  PASS: carmine ABOVE_1PCT -> penalty=25.0
  PASS: carmine BELOW_1PCT -> penalty=10.0 (60% reduction)
  Score impact: 68 vs 83

TEST 3: SLS below safe threshold = zero penalty
  PASS: SLS below 1% threshold -> penalty=0.0 (safe at trace)

TEST 4: Evidence tiers
  PASS: Triclosan -> tier=2 (Cohort), penalty=32.0

TEST 5: Score ordering sanity
  PASS: Dr Bronner 99 > Chanel 64

TEST 6: Before/after impact summary

  Product: Chanel No5 Body Lotion
  Ingredient                 Base  Zone                Adj  Change
  --------------------------------------------------------------------
  Lanolin                      12  ABOVE_1PCT          12.0  0
  Carmine              

In [None]:
exec(open('/content/noura_api.py').read())

result = score_product(
    "Dr Bronner's Pure Castile Soap",
    ["water", "coconut oil", "potassium hydroxide",
     "hemp oil", "jojoba oil", "citric acid", "tocopherol"],
    category="cosmetics",
    brand="Dr Bronner's"
)

print("NOURA Score: " + str(result["noura_score"]) + "/100 [" + result["verdict"] + "]")
print()
print("Dimension breakdown:")
for dim, score in result["dimension_scores"].items():
    contrib = result["contributions"][dim]
    engine  = result["dimension_engines"].get(dim, "?")
    print("  " + dim.ljust(14) + str(score).rjust(3) + "/100   contributes " + str(contrib) + "  [" + engine + "]")
print()
print("Flags:")
for f in result["flags"]:
    print("  - " + f[:90])

Concentration engine loaded
  Synonym entries:   215
  Anchor ingredients:26
  Hazard DB entries: 34

  CELL 41 SELF-TEST: CONCENTRATION ESTIMATOR

TEST 1: Petrolatum ABOVE_1PCT = full penalty
  PASS: petrolatum -> ABOVE_1PCT, penalty=14.0 (full)
  CeraVe health score: 76/100 [GOOD]

TEST 2: Carmine ABOVE_1PCT vs BELOW_1PCT
  PASS: carmine ABOVE_1PCT -> penalty=25.0
  PASS: carmine BELOW_1PCT -> penalty=10.0 (60% reduction)
  Score impact: 68 vs 83

TEST 3: SLS below safe threshold = zero penalty
  PASS: SLS below 1% threshold -> penalty=0.0 (safe at trace)

TEST 4: Evidence tiers
  PASS: Triclosan -> tier=2 (Cohort), penalty=32.0

TEST 5: Score ordering sanity
  PASS: Dr Bronner 99 > Chanel 64

TEST 6: Before/after impact summary

  Product: Chanel No5 Body Lotion
  Ingredient                 Base  Zone                Adj  Change
  --------------------------------------------------------------------
  Lanolin                      12  ABOVE_1PCT          12.0  0
  Carmine              

In [None]:
import os

print("=== ENGINE FILES ON DISK ===")
files = [f for f in os.listdir('/content/') if f.startswith('noura_')]
for f in sorted(files):
    size = os.path.getsize('/content/' + f)
    print("  " + f.ljust(45) + str(size) + " bytes")

print()
print("=== NOTEBOOK CELL COUNT ===")
print("  Check manually - how many cells total?")

print()
print("=== FUNCTIONS IN MEMORY ===")
for fn in ["score_product", "calculate_health_score",
           "calculate_animal_welfare_score", "calculate_governance_score",
           "normalise_ingredient_list"]:
    try:
        eval(fn)
        print("  " + fn + ": LOADED")
    except NameError:
        print("  " + fn + ": not in scope")

=== ENGINE FILES ON DISK ===
  noura_animal_welfare_engine.py               19362 bytes
  noura_api.py                                 11377 bytes
  noura_concentration_engine.py                28339 bytes
  noura_governance_engine.py                   25918 bytes
  noura_health_engine.py                       15735 bytes
  noura_normaliser.py                          0 bytes

=== NOTEBOOK CELL COUNT ===
  Check manually - how many cells total?

=== FUNCTIONS IN MEMORY ===
  score_product: LOADED
  calculate_health_score: LOADED
  calculate_animal_welfare_score: LOADED
  calculate_governance_score: LOADED
  normalise_ingredient_list: LOADED


In [None]:
import os

print("=== NOURA ENGINE FILES - FINAL CHECK ===")
files = [f for f in os.listdir('/content/') if f.startswith('noura_')]
for f in sorted(files):
    size = os.path.getsize('/content/' + f)
    print("  " + f.ljust(45) + str(size) + " bytes")

print()
print("=== RISK CHECK: OLD ENGINE FILES ===")
risky = [
    'noura_health_scorer.py',
    'noura_animal_welfare.py',
    'noura_governance.py',
    'noura_normaliser.py',
    'noura_scorer.py',
    'noura_pipeline.py',
]
found_risk = False
for f in risky:
    if os.path.exists('/content/' + f):
        print("  WARNING: " + f + " exists - old version may conflict")
        found_risk = True
if not found_risk:
    print("  No conflicting old files found - disk is clean")

=== NOURA ENGINE FILES - FINAL CHECK ===
  noura_animal_welfare_engine.py               19362 bytes
  noura_api.py                                 11377 bytes
  noura_concentration_engine.py                28339 bytes
  noura_governance_engine.py                   25918 bytes
  noura_health_engine.py                       15735 bytes

=== RISK CHECK: OLD ENGINE FILES ===
  No conflicting old files found - disk is clean


In [None]:
import subprocess

subprocess.run(['git', '-C', '/content', 'init'], capture_output=True)
subprocess.run(['git', '-C', '/content', 'config', 'user.email', 'noura@build.ai'], capture_output=True)
subprocess.run(['git', '-C', '/content', 'config', 'user.name', 'NOURA Build'], capture_output=True)

files = [
    'noura_environment_engine.py',
    'noura_environment_config.py',
    'noura_api.py',
]
for f in files:
    subprocess.run(['git', '-C', '/content', 'add', f], capture_output=True)

result = subprocess.run(
    ['git', '-C', '/content', 'commit', '-m',
     'Week 14 final: all 4 dimensions LIVE - environment engine wired, 3 bugs patched'],
    capture_output=True, text=True
)
print(result.stdout)
print(result.stderr)

[master (root-commit) 9df2d26] Week 14 final: all 4 dimensions LIVE - environment engine wired, 3 bugs patched
 3 files changed, 1331 insertions(+)
 create mode 100644 noura_api.py
 create mode 100644 noura_environment_config.py
 create mode 100644 noura_environment_engine.py




In [46]:
cell_47_code = """
import sys
import requests
import time
sys.path.insert(0, '/content')

# ============================================================
# CELL 47 - PRODUCT RESOLVER
# Turns a barcode or product name into a scoreable product.
# Sources: Open Food Facts (food) + Open Beauty Facts (cosmetics)
# Output feeds directly into score_product() in Cell 43.
# ============================================================

OFF_API    = "https://world.openfoodfacts.org/api/v2/product"
OFF_SEARCH = "https://world.openfoodfacts.org/cgi/search.pl"
OBF_API    = "https://world.openbeautyfacts.org/api/v2/product"
OBF_SEARCH = "https://world.openbeautyfacts.org/cgi/search.pl"

HEADERS = {"User-Agent": "NOURA-Scanner/1.0 (contact@noura.ai)"}
TIMEOUT = 8

# ---- Category detection from OFF product data ----
FOOD_CATEGORIES = {
    "en:beverages", "en:drinks", "en:dairy", "en:snacks", "en:cereals",
    "en:fruits", "en:vegetables", "en:meats", "en:seafood", "en:sauces",
    "en:soups", "en:breads", "en:chocolates", "en:biscuits", "en:frozen-foods",
    "en:baby-foods", "en:condiments", "en:oils", "en:sugars", "en:coffees",
    "en:teas", "en:waters", "en:juices",
}
BABY_SIGNALS = {
    "baby", "infant", "toddler", "newborn", "neonatal", "for babies",
    "for infants", "0+", "0-3", "children's"
}
CLEANING_SIGNALS = {
    "cleaner", "detergent", "washing", "laundry", "dishwash",
    "disinfect", "sanitiz", "bleach", "degreaser", "surface spray"
}


def _detect_category(product_name, categories_tags, source):
    \"\"\"Infer NOURA category from product data.\"\"\"
    name_lower = product_name.lower()

    # Baby check first (highest priority)
    if any(sig in name_lower for sig in BABY_SIGNALS):
        return "baby"

    # Cleaning
    if any(sig in name_lower for sig in CLEANING_SIGNALS):
        return "cleaning"

    # Food Facts source = food by default
    if source == "open_food_facts":
        return "food"

    # Beauty Facts source = cosmetics by default
    if source == "open_beauty_facts":
        return "cosmetics"

    # Check OFF category tags for food confirmation
    if categories_tags:
        for tag in categories_tags:
            if any(fc in tag for fc in FOOD_CATEGORIES):
                return "food"

    return "cosmetics"  # safe default


def _parse_ingredients(product):
    \"\"\"Extract ingredient list from OFF/OBF product dict.\"\"\"
    # Prefer structured ingredient objects
    ing_objects = product.get("ingredients", [])
    if ing_objects and isinstance(ing_objects, list):
        names = []
        for obj in ing_objects:
            if isinstance(obj, dict):
                text = obj.get("text") or obj.get("id") or ""
                if text:
                    # Strip en: prefix and clean
                    text = text.replace("en:", "").replace("-", " ").strip()
                    if text:
                        names.append(text.lower())
            elif isinstance(obj, str):
                names.append(obj.lower().strip())
        if names:
            return names

    # Fall back to ingredients_text
    ing_text = product.get("ingredients_text") or product.get("ingredients_text_en") or ""
    if ing_text:
        # Split on commas, semicolons, periods
        import re
        parts = re.split(r"[,;.]+", ing_text)
        names = []
        for p in parts:
            p = p.strip().lower()
            # Remove percentage annotations like "(5%)"
            p = re.sub(r"\\s*\\(.*?\\)", "", p).strip()
            # Remove asterisks (organic markers)
            p = p.replace("*", "").strip()
            if len(p) > 1:
                names.append(p)
        if names:
            return names

    return []


def _parse_product(product, source):
    \"\"\"Convert raw OFF/OBF product dict to NOURA resolver format.\"\"\"
    name = (
        product.get("product_name_en")
        or product.get("product_name")
        or product.get("abbreviated_product_name")
        or ""
    ).strip()

    brand = (
        product.get("brands") or ""
    ).split(",")[0].strip()

    categories_tags = product.get("categories_tags", [])
    category = _detect_category(name, categories_tags, source)

    ingredients = _parse_ingredients(product)

    barcode = (
        product.get("code")
        or product.get("_id")
        or ""
    )

    return {
        "found":        True,
        "product_name": name,
        "brand":        brand,
        "category":     category,
        "ingredients":  ingredients,
        "source":       source,
        "barcode":      barcode,
        "categories":   categories_tags[:5],
        "raw_nutriscore":  product.get("nutriscore_grade", ""),
        "raw_ecoscore":    product.get("ecoscore_grade", ""),
    }


def _lookup_barcode(barcode):
    \"\"\"Try barcode on OFF then OBF. Returns (parsed_product, source) or None.\"\"\"
    for api, source in [
        (OFF_API, "open_food_facts"),
        (OBF_API, "open_beauty_facts"),
    ]:
        try:
            r = requests.get(
                f"{api}/{barcode}",
                headers=HEADERS, timeout=TIMEOUT
            )
            if r.status_code == 200:
                data = r.json()
                product = data.get("product", {})
                if product and product.get("product_name"):
                    parsed = _parse_product(product, source)
                    parsed["confidence"] = "HIGH"
                    parsed["match_method"] = "barcode"
                    return parsed
        except requests.exceptions.RequestException:
            continue
    return None


def _search_name(product_name):
    \"\"\"Try name search on OFF then OBF. Returns parsed_product or None.\"\"\"
    fields = (
        "product_name,product_name_en,brands,categories_tags,"
        "ingredients,ingredients_text,ingredients_text_en,"
        "ecoscore_grade,nutriscore_grade,code"
    )

    for search_url, source in [
        (OFF_SEARCH, "open_food_facts"),
        (OBF_SEARCH, "open_beauty_facts"),
    ]:
        try:
            params = {
                "search_terms":  product_name,
                "search_simple": 1,
                "action":        "process",
                "json":          1,
                "page_size":     5,
                "fields":        fields,
            }
            r = requests.get(
                search_url, params=params,
                headers=HEADERS, timeout=TIMEOUT
            )
            if r.status_code == 200:
                data = r.json()
                products = data.get("products", [])
                # Pick best match: prefer one with ingredients
                for p in products:
                    if p.get("ingredients") or p.get("ingredients_text"):
                        parsed = _parse_product(p, source)
                        parsed["confidence"] = "MEDIUM"
                        parsed["match_method"] = "name_search"
                        return parsed
                # Accept first result even without ingredients
                if products:
                    parsed = _parse_product(products[0], source)
                    parsed["confidence"] = "LOW"
                    parsed["match_method"] = "name_search_no_ingredients"
                    return parsed
        except requests.exceptions.RequestException:
            continue
    return None


# ============================================================
# PUBLIC API
# ============================================================

def resolve_product(barcode=None, product_name=None):
    \"\"\"
    Resolve a barcode or product name to a scoreable product dict.

    Args:
        barcode:      EAN barcode string (optional)
        product_name: Product name string (optional)

    Returns:
        {
            "found":        bool,
            "product_name": str,
            "brand":        str,
            "category":     str,   # cosmetics|food|cleaning|baby
            "ingredients":  list,
            "source":       str,   # open_food_facts|open_beauty_facts
            "barcode":      str,
            "confidence":   str,   # HIGH|MEDIUM|LOW
            "match_method": str,
            "error":        str    # only if found=False
        }
    \"\"\"
    if not barcode and not product_name:
        return {"found": False, "error": "barcode or product_name required"}

    # 1. Try barcode first (most reliable)
    if barcode:
        result = _lookup_barcode(barcode)
        if result:
            return result

    # 2. Fall back to name search
    if product_name:
        result = _search_name(product_name)
        if result:
            return result

    # 3. Nothing found
    return {
        "found":        False,
        "product_name": product_name or "",
        "brand":        "",
        "category":     "cosmetics",
        "ingredients":  [],
        "source":       None,
        "barcode":      barcode or "",
        "confidence":   "NONE",
        "match_method": "not_found",
        "error":        "Product not found in Open Food Facts or Open Beauty Facts"
    }


def resolve_and_score(barcode=None, product_name=None):
    \"\"\"
    Convenience: resolve a product then score it with Cell 43 pipeline.
    Returns combined resolver + scoring result.
    \"\"\"
    resolved = resolve_product(barcode=barcode, product_name=product_name)

    if not resolved["found"] or not resolved["ingredients"]:
        return {
            "resolved": resolved,
            "scored":   None,
            "error":    "Cannot score: " + (
                "product not found" if not resolved["found"]
                else "no ingredients available"
            )
        }

    scored = score_product(
        resolved["product_name"],
        resolved["ingredients"],
        category=resolved["category"],
        brand=resolved["brand"] or resolved["product_name"]
    )

    return {
        "resolved": resolved,
        "scored":   scored,
    }


# ============================================================
# SELF-TEST
# ============================================================
print("Cell 47 - Product Resolver loaded")
print()
print("=" * 62)
print("  CELL 47 SELF-TEST: PRODUCT RESOLVER")
print("=" * 62)

# TEST 1: Barcode lookup (Innocent Smoothie for Kids - verified real barcode)
print()
print("TEST 1: Barcode lookup - Innocent Smoothie (5038862634106)")
t0 = time.time()
r1 = resolve_product(barcode="5038862634106")
ms = round((time.time() - t0) * 1000)
print(f"  Found:      {r1['found']}")
print(f"  Product:    {r1.get('product_name', 'N/A')}")
print(f"  Brand:      {r1.get('brand', 'N/A')}")
print(f"  Category:   {r1.get('category', 'N/A')}")
print(f"  Source:     {r1.get('source', 'N/A')}")
print(f"  Confidence: {r1.get('confidence', 'N/A')}")
print(f"  Method:     {r1.get('match_method', 'N/A')}")
print(f"  Ingredients ({len(r1.get('ingredients', []))}): {r1.get('ingredients', [])[:5]}")
print(f"  Time:       {ms}ms")
if r1["found"]:
    assert r1["confidence"] == "HIGH", "Barcode match should be HIGH confidence"
    assert r1["match_method"] == "barcode"
    assert r1["category"] == "food"
    print("  PASS: Barcode lookup successful, HIGH confidence, food category")
else:
    print("  WARN: Product not found (API may be unavailable) - continuing tests")

# TEST 2: Name search - food product
print()
print("TEST 2: Name search - Heinz Tomato Ketchup")
r2 = resolve_product(product_name="Heinz Tomato Ketchup")
print(f"  Found:      {r2['found']}")
print(f"  Product:    {r2.get('product_name', 'N/A')}")
print(f"  Category:   {r2.get('category', 'N/A')}")
print(f"  Confidence: {r2.get('confidence', 'N/A')}")
print(f"  Method:     {r2.get('match_method', 'N/A')}")
print(f"  Ingredients ({len(r2.get('ingredients', []))}): {r2.get('ingredients', [])[:4]}")
if r2["found"]:
    assert r2["match_method"] in ("name_search", "name_search_no_ingredients")
    print("  PASS: Name search successful")
else:
    print("  WARN: Not found (API may be unavailable)")

# TEST 3: Cosmetics barcode - Dr Bronner's (Open Beauty Facts)
print()
print("TEST 3: Cosmetics barcode - Dr Bronner's Pure Castile Soap")
r3 = resolve_product(barcode="018787220238")
print(f"  Found:      {r3['found']}")
print(f"  Product:    {r3.get('product_name', 'N/A')}")
print(f"  Source:     {r3.get('source', 'N/A')}")
print(f"  Category:   {r3.get('category', 'N/A')}")
print(f"  Ingredients ({len(r3.get('ingredients', []))}): {r3.get('ingredients', [])[:5]}")
if r3["found"]:
    print("  PASS: Cosmetics barcode resolved")
else:
    print("  WARN: Not found - trying name search fallback")
    r3b = resolve_product(product_name="Dr Bronner's Pure Castile Soap")
    print(f"  Name fallback found: {r3b['found']} | source: {r3b.get('source')}")

# TEST 4: Baby product detection
print()
print("TEST 4: Baby product category detection")
r4 = resolve_product(product_name="Aptamil infant formula baby milk")
print(f"  Found:      {r4['found']}")
print(f"  Category:   {r4.get('category', 'N/A')}")
print(f"  Confidence: {r4.get('confidence', 'N/A')}")
if r4["found"] and r4["category"] == "baby":
    print("  PASS: Baby category correctly detected from product name")
elif r4["found"]:
    print(f"  WARN: Category detected as '{r4['category']}' (not 'baby') - checking name signals")
    name = r4.get("product_name", "")
    print(f"  Product name: '{name}'")
else:
    print("  WARN: Not found (API may be unavailable)")

# TEST 5: Missing barcode graceful failure
print()
print("TEST 5: Non-existent barcode fails gracefully")
r5 = resolve_product(barcode="0000000000000")
print(f"  Found:      {r5['found']}")
print(f"  Error:      {r5.get('error', 'N/A')}")
assert not r5["found"]
print("  PASS: Non-existent barcode returns found=False cleanly")

# TEST 6: No input validation
print()
print("TEST 6: No input returns error")
r6 = resolve_product()
assert not r6["found"]
assert "required" in r6["error"]
print("  PASS: Empty call returns error cleanly")

# TEST 7: End-to-end resolve + score (if Cell 43 score_product is available)
print()
print("TEST 7: End-to-end resolve_and_score()")
try:
    score_product  # check Cell 43 is loaded
    e2e = resolve_and_score(barcode="5038862634106")
    if e2e.get("scored"):
        s = e2e["scored"]
        r = e2e["resolved"]
        print(f"  Product:    {r['product_name']}")
        print(f"  NOURA:      {s['noura_score']}/100 [{s['verdict']}]")
        print(f"  Health:     {s['dimension_scores']['health']}")
        print(f"  Environment:{s['dimension_scores']['environment']}")
        print(f"  Animal:     {s['dimension_scores']['animal']}")
        print(f"  Governance: {s['dimension_scores']['governance']}")
        assert s["success"]
        print("  PASS: Full pipeline barcode → score working")
    else:
        print(f"  WARN: {e2e.get('error', 'Score not returned')}")
        print("  (resolve worked, score skipped due to missing ingredients)")
except NameError:
    print("  SKIP: score_product not loaded - run Cell 43 first")

print()
print("ALL CELL 47 TESTS COMPLETE")
print()
print("resolver status:")
print("  resolve_product(barcode, product_name) -> product dict")
print("  resolve_and_score(barcode, product_name) -> resolved + scored")
print()
print("Supported sources:")
print("  Open Food Facts   : food, baby, cleaning products")
print("  Open Beauty Facts : cosmetics, personal care")
print()
print("Next: Cell 48 - PubMed live query wired into health engine")
"""

# Write and execute
with open('/content/noura_product_resolver.py', 'w', encoding='utf-8') as f:
    f.write(cell_47_code)

exec(cell_47_code)
print("Cell 47 complete - noura_product_resolver.py written and loaded")

Cell 47 - Product Resolver loaded

  CELL 47 SELF-TEST: PRODUCT RESOLVER

TEST 1: Barcode lookup - Innocent Smoothie (5038862634106)
  Found:      True
  Product:    Innocent smoothie for kids
  Brand:      Innocent
  Category:   food
  Source:     open_food_facts
  Confidence: HIGH
  Method:     barcode
  Ingredients (8): ['½ of a apple', '½ of a squeezed orange', '½', '3 of a mashed banana', '3 pressed grapes']
  Time:       1230ms
  PASS: Barcode lookup successful, HIGH confidence, food category

TEST 2: Name search - Heinz Tomato Ketchup
  Found:      False
  Product:    Heinz Tomato Ketchup
  Category:   cosmetics
  Confidence: NONE
  Method:     not_found
  Ingredients (0): []
  WARN: Not found (API may be unavailable)

TEST 3: Cosmetics barcode - Dr Bronner's Pure Castile Soap
  Found:      False
  Product:    
  Source:     None
  Category:   cosmetics
  Ingredients (0): []
  WARN: Not found - trying name search fallback
  Name fallback found: False | source: None

TEST 4: Baby

In [9]:
cell_48_code = """
import sys, base64, json, re, requests, time, os
sys.path.insert(0, '/content')

# ============================================================
# CELL 48 - VISION INPUT LAYER
# One unified prompt handles any photo angle:
# front label, back label, ingredient panel, any orientation.
# Output schema identical to resolve_product() in Cell 47.
# ============================================================

ANTHROPIC_API = "https://api.anthropic.com/v1/messages"
VISION_MODEL  = "claude-opus-4-6"
HEADERS_AI    = {
    "Content-Type":      "application/json",
    "anthropic-version": "2023-06-01",
}

PROMPT_UNIVERSAL = \"\"\"You are NOURA's product intelligence engine.

Analyze this product photo. It could be a front label, back label, ingredient panel, or any angle — handle all cases.

Return ONLY this JSON object:
{
  "product_name": "string or null",
  "brand": "string or null",
  "category": "cosmetics|food|cleaning|baby",
  "ingredients": ["ingredient1", "ingredient2", ...],
  "label_type": "ingredients_visible|front_only|partial_ingredients|unclear",
  "confidence": "HIGH|MEDIUM|LOW",
  "notes": "brief description of what is visible in the image, or null"
}

Rules:
- Scan the ENTIRE image for any ingredient list, regardless of orientation or position
- ingredients: extract ALL visible ingredients, lowercase, no percentages, no asterisks, no numbers
- If NO ingredient list is visible anywhere: ingredients=[], label_type="front_only"
- If ingredient list is PARTIALLY visible (cut off, blurry, small): extract what you can, label_type="partial_ingredients"
- If full ingredient list is clearly visible: label_type="ingredients_visible"
- If image is too unclear to identify anything: label_type="unclear"
- category: infer from product type visible anywhere on pack
- confidence: HIGH=full ingredient list clearly readable, MEDIUM=partial list, LOW=no list or unreadable
- Return ONLY valid JSON, no preamble, no markdown fences\"\"\"

# Complete fallback — every key always present
_EMPTY = {
    "found": False, "product_name": "", "brand": "", "category": "cosmetics",
    "ingredients": [], "source": "vision", "barcode": "", "confidence": "NONE",
    "match_method": "vision_error", "label_type": "unknown", "notes": "",
    "needs_back_label": False, "error": "",
}


def _get_off_image_url(barcode):
    \"\"\"Fetch a full-resolution ingredient image URL from OFF API for a given barcode.\"\"\"
    try:
        r = requests.get(
            f"https://world.openfoodfacts.org/api/v2/product/{barcode}",
            params={"fields": "selected_images,images,code"},
            headers={"User-Agent": "NOURA-Scanner/1.0"},
            timeout=8
        )
        if r.status_code != 200:
            return None
        product = r.json().get("product", {})

        # Try selected_images first (most reliable)
        sel     = product.get("selected_images", {})
        ing_img = sel.get("ingredients", {})
        for size in ("display", "small"):
            for lang in ("en", "fr", "de", "es"):
                url = (ing_img.get(size) or {}).get(lang)
                if url:
                    # Upgrade to full resolution
                    return re.sub(r'\\.\\d+\\.jpg$', '.full.jpg',
                                  re.sub(r'\\.400\\.jpg$', '.full.jpg', url))

        # Fall back: scan images dict for any ingredients key
        images = product.get("images", {})
        code   = product.get("code", barcode)
        folder = re.sub(r'(...)(...)(...)(.+)', r'\\1/\\2/\\3/\\4', code.zfill(13))
        base   = f"https://images.openfoodfacts.org/images/products/{folder}"
        for key in sorted(images.keys(), reverse=True):
            if "ingredients" in key:
                rev = images[key].get("rev", "")
                if rev:
                    return f"{base}/{key}.{rev}.full.jpg"
        return None
    except Exception:
        return None


def _encode_image_url(url):
    \"\"\"Fetch URL and base64 encode.\"\"\"
    r = requests.get(url, timeout=15, headers={"User-Agent": "NOURA-Scanner/1.0"})
    r.raise_for_status()
    ct = r.headers.get("Content-Type", "image/jpeg").split(";")[0].strip()
    if ct not in ("image/jpeg", "image/png", "image/gif", "image/webp"):
        ct = "image/jpeg"
    return base64.standard_b64encode(r.content).decode("utf-8"), ct


def _call_vision(image_source, api_key):
    \"\"\"Call Claude vision API and return parsed JSON result.\"\"\"
    if image_source["type"] == "url":
        data, media_type = _encode_image_url(image_source["url"])
        img_block = {
            "type": "image",
            "source": {"type": "base64", "media_type": media_type, "data": data}
        }
    else:
        img_block = {
            "type": "image",
            "source": {
                "type":       "base64",
                "media_type": image_source.get("media_type", "image/jpeg"),
                "data":       image_source["data"],
            }
        }

    payload = {
        "model":      VISION_MODEL,
        "max_tokens": 1024,
        "messages": [{
            "role": "user",
            "content": [
                img_block,
                {"type": "text", "text": PROMPT_UNIVERSAL}
            ]
        }]
    }

    resp = requests.post(
        ANTHROPIC_API,
        headers={**HEADERS_AI, "x-api-key": api_key},
        json=payload,
        timeout=40
    )
    resp.raise_for_status()
    raw = resp.json()["content"][0]["text"].strip()
    # Strip any accidental markdown fences
    raw = re.sub(r"^```json\\s*", "", raw)
    raw = re.sub(r"\\s*```$",     "", raw)
    return json.loads(raw)


def scan_label(image_source, api_key):
    \"\"\"
    Scan any product photo and extract product info + ingredients.

    Works with any photo angle: front label, back label,
    ingredient panel, crumpled pack — NOURA handles it all.

    Args:
        image_source: one of:
            {"type": "url",    "url": "https://..."}
            {"type": "base64", "data": "<b64>", "media_type": "image/jpeg"}
            {"type": "file",   "path": "/path/to/image.jpg"}
        api_key: Anthropic API key string

    Returns dict with keys:
        found, product_name, brand, category, ingredients,
        source, barcode, confidence, match_method, label_type,
        needs_back_label, notes, error
    \"\"\"
    # Handle file path
    if image_source.get("type") == "file":
        path = image_source["path"]
        ext  = path.rsplit(".", 1)[-1].lower()
        mt   = {"jpg":"image/jpeg","jpeg":"image/jpeg","png":"image/png",
                "webp":"image/webp","gif":"image/gif"}.get(ext, "image/jpeg")
        with open(path, "rb") as f:
            b64 = base64.standard_b64encode(f.read()).decode("utf-8")
        image_source = {"type": "base64", "data": b64, "media_type": mt}

    try:
        result = _call_vision(image_source, api_key)
    except json.JSONDecodeError as e:
        return {**_EMPTY, "error": f"Vision returned invalid JSON: {e}"}
    except requests.exceptions.HTTPError as e:
        return {**_EMPTY, "error": f"API error: {e}"}
    except Exception as e:
        return {**_EMPTY, "error": str(e)}

    # Clean ingredients
    ingredients = [
        i.strip().lower() for i in result.get("ingredients", [])
        if isinstance(i, str) and len(i.strip()) > 1
    ]

    label_type = result.get("label_type", "unclear")
    found      = bool(result.get("product_name") or ingredients)

    # Signal when a back label is needed
    needs_back_label = label_type in ("front_only", "unclear") and not ingredients

    return {
        "found":            found,
        "product_name":     result.get("product_name") or "",
        "brand":            result.get("brand") or "",
        "category":         result.get("category") or "cosmetics",
        "ingredients":      ingredients,
        "source":           "vision",
        "barcode":          "",
        "confidence":       result.get("confidence") or "LOW",
        "match_method":     "vision_universal",
        "label_type":       label_type,
        "needs_back_label": needs_back_label,
        "notes":            result.get("notes") or "",
        "error":            "",
    }


def scan_and_score(image_source, api_key):
    \"\"\"
    Scan any product photo then score it with the full Cell 43 pipeline.
    Returns combined resolver + scoring result.
    \"\"\"
    resolved = scan_label(image_source, api_key)

    if not resolved["found"]:
        return {"resolved": resolved, "scored": None,
                "error": resolved.get("error", "Could not identify product")}

    if resolved["needs_back_label"]:
        return {
            "resolved": resolved,
            "scored":   None,
            "error":    (
                f"I can see this is {resolved['product_name'] or 'a product'} "
                f"by {resolved['brand'] or 'an unknown brand'}, but the ingredient list "
                f"isn't visible. Please send a photo of the back label."
            )
        }

    if not resolved["ingredients"]:
        return {"resolved": resolved, "scored": None,
                "error": "No ingredients found in image. Please send a clearer photo of the ingredient label."}

    scored = score_product(
        resolved["product_name"] or "Unknown Product",
        resolved["ingredients"],
        category=resolved["category"],
        brand=resolved["brand"] or resolved["product_name"] or "Unknown"
    )
    return {"resolved": resolved, "scored": scored}


# ============================================================
# SELF-TEST
# ============================================================
print("Cell 48 - Vision Input Layer loaded")
print()
print("=" * 62)
print("  CELL 48 SELF-TEST: VISION LAYER (unified prompt)")
print("=" * 62)

_api_key = os.environ.get("ANTHROPIC_API_KEY", "")
if not _api_key:
    try:
        from google.colab import userdata
        _api_key = userdata.get("ANTHROPIC_API_KEY") or ""
    except Exception:
        pass

if not _api_key:
    print("  API key not set. Set ANTHROPIC_API_KEY and re-run.")
else:
    print(f"  API key: ...{_api_key[-6:]}")
    print()

    # Fetch live image URLs from OFF
    print("  Fetching test image URLs from Open Food Facts...")
    NUTELLA_IMG  = _get_off_image_url("3017620422003")
    INNOCENT_IMG = _get_off_image_url("5038862634106")
    print(f"  Nutella image:   {NUTELLA_IMG or 'NOT FOUND'}")
    print(f"  Innocent image:  {INNOCENT_IMG or 'NOT FOUND'}")
    print()

    # TEST 1: Food label - Nutella
    if NUTELLA_IMG:
        print("TEST 1: Nutella (any angle - unified prompt)")
        t0 = time.time()
        r1 = scan_label({"type": "url", "url": NUTELLA_IMG}, _api_key)
        ms = round((time.time() - t0) * 1000)
        print(f"  Found:            {r1['found']}")
        print(f"  Product:          {r1['product_name']}")
        print(f"  Brand:            {r1['brand']}")
        print(f"  Category:         {r1['category']}")
        print(f"  Label type:       {r1['label_type']}")
        print(f"  Confidence:       {r1['confidence']}")
        print(f"  Needs back label: {r1['needs_back_label']}")
        print(f"  Ingredients ({len(r1['ingredients'])}): {r1['ingredients'][:6]}")
        print(f"  Notes:            {r1['notes']}")
        print(f"  Time:             {ms}ms")
        if r1["found"] and r1["ingredients"]:
            print("  PASS: Ingredients extracted")
        elif r1["found"] and r1["needs_back_label"]:
            print("  PASS: Product identified, correctly requesting back label")
        elif r1["found"]:
            print("  WARN: Product found, no ingredients — label type: " + r1["label_type"])
        else:
            print(f"  WARN: {r1['error']}")

    # TEST 2: Innocent Smoothie
    if INNOCENT_IMG:
        print()
        print("TEST 2: Innocent Smoothie (any angle - unified prompt)")
        t0 = time.time()
        r2 = scan_label({"type": "url", "url": INNOCENT_IMG}, _api_key)
        ms = round((time.time() - t0) * 1000)
        print(f"  Found:            {r2['found']}")
        print(f"  Product:          {r2['product_name']}")
        print(f"  Label type:       {r2['label_type']}")
        print(f"  Confidence:       {r2['confidence']}")
        print(f"  Needs back label: {r2['needs_back_label']}")
        print(f"  Ingredients ({len(r2['ingredients'])}): {r2['ingredients'][:6]}")
        print(f"  Notes:            {r2['notes']}")
        print(f"  Time:             {ms}ms")
        if r2["found"] and r2["ingredients"]:
            print("  PASS: Ingredients extracted")
        elif r2["found"] and r2["needs_back_label"]:
            print("  PASS: Product identified, correctly requesting back label")
        else:
            print(f"  INFO: {r2['label_type']} — {r2['notes']}")

    # TEST 3: End-to-end scan_and_score
    print()
    print("TEST 3: scan_and_score() end-to-end")
    _test_img = NUTELLA_IMG or INNOCENT_IMG
    if _test_img:
        try:
            score_product
            e2e = scan_and_score({"type": "url", "url": _test_img}, _api_key)
            if e2e.get("scored"):
                s = e2e["scored"]
                r = e2e["resolved"]
                print(f"  Product:     {r['product_name']}")
                print(f"  Ingredients: {len(r['ingredients'])} extracted by vision")
                print(f"  NOURA:       {s['noura_score']}/100 [{s['verdict']}]")
                print(f"  Health:      {s['dimension_scores']['health']}")
                print(f"  Environment: {s['dimension_scores']['environment']}")
                print(f"  Animal:      {s['dimension_scores']['animal']}")
                print(f"  Governance:  {s['dimension_scores']['governance']}")
                assert s["success"]
                print("  PASS: Photo -> score pipeline working end-to-end")
            else:
                print(f"  INFO: {e2e.get('error')}")
                print("  (This is OK if image only shows front-of-pack)")
        except NameError:
            print("  SKIP: score_product not loaded - run Cell 43 first, then re-run Cell 48")

    # TEST 4: Error handling
    print()
    print("TEST 4: Error handling")
    r4 = scan_label({"type": "url", "url": "https://example.com/notanimage.jpg"}, _api_key)
    assert not r4["found"]
    assert r4["error"]
    print(f"  PASS: Bad URL handled cleanly")

    r5 = scan_label({"type": "url", "url": "https://via.placeholder.com/400x300.jpg"}, _api_key)
    print(f"  Blank image -> label_type: {r5['label_type']} | needs_back_label: {r5['needs_back_label']}")
    print(f"  PASS: Blank/unclear image handled correctly")

print()
print("scan_label() label_type responses:")
print("  ingredients_visible  -> score immediately")
print("  partial_ingredients  -> score with LOW confidence flag")
print("  front_only           -> ask: 'Please send back label photo'")
print("  unclear              -> ask: 'Please retake the photo'")
print()
print("NOURA input pipeline complete:")
print("  Food barcode    -> resolve_product(barcode=...)       [Cell 47]")
print("  Product name    -> resolve_product(product_name=...)  [Cell 47]")
print("  Any photo       -> scan_label(image_source, api_key)  [Cell 48]")
print("  Any input       -> score_product()                    [Cell 43]")
"""

with open('/content/noura_vision_input.py', 'w', encoding='utf-8') as f:
    f.write(cell_48_code)

exec(cell_48_code)
print("Cell 48 complete - noura_vision_input.py written and loaded")

Cell 48 - Vision Input Layer loaded

  CELL 48 SELF-TEST: VISION LAYER (unified prompt)
  API key: ...VnnAAA

  Fetching test image URLs from Open Food Facts...
  Nutella image:   https://images.openfoodfacts.org/images/products/301/762/042/2003/ingredients_en.821.full.jpg
  Innocent image:  https://images.openfoodfacts.org/images/products/503/886/263/4106/ingredients_en.30.full.jpg

TEST 1: Nutella (any angle - unified prompt)
  Found:            True
  Product:          Nutella
  Brand:            Ferrero
  Category:         food
  Label type:       front_only
  Confidence:       LOW
  Needs back label: True
  Ingredients (0): []
  Notes:            Front label of a Nutella jar is visible showing the brand name, Ferrero logo, an image of bread with Nutella spread, hazelnuts, and a yellow flower. Partial nutritional info visible on the side (339 kJ, 81 kcal for 15g; 2278 kJ for 100g). No ingredient list is visible. White screw-top lid. Barcode partially visible on right side.
  Time: 

In [8]:
# Fix 1: Upgrade image to full resolution and test
import requests, base64, os

_api_key = os.environ.get("ANTHROPIC_API_KEY", "")

# Swap .400. for .full. in the URL
nutella_full = "https://images.openfoodfacts.org/images/products/301/762/042/2003/ingredients_en.821.full.jpg"

print("Testing with full resolution image...")
r1 = scan_label({"type": "url", "url": nutella_full}, _api_key)
print(f"  Found:      {r1['found']}")
print(f"  Product:    {r1['product_name']}")
print(f"  Confidence: {r1['confidence']}")
print(f"  Label type: {r1['label_type']}")
print(f"  Ingredients ({len(r1['ingredients'])}): {r1['ingredients'][:6]}")

Testing with full resolution image...
  Found:      True
  Product:    Nutella
  Confidence: LOW
  Label type: front_of_pack
  Ingredients (0): []


In [10]:
import subprocess

files = ['noura_api.py', 'noura_product_resolver.py', 'noura_vision_input.py']
for f in files:
    subprocess.run(['git', '-C', '/content', 'add', f], capture_output=True)

result = subprocess.run(
    ['git', '-C', '/content', 'commit', '-m',
     'Cell 48: vision input layer live - universal prompt, any photo angle'],
    capture_output=True, text=True
)
print(result.stdout)
print(result.stderr)


fatal: not a git repository (or any of the parent directories): .git

