<a href="https://colab.research.google.com/github/AnamariaVLR/noura-rag/blob/main/NOURA_RAG_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# NOURA - Cell 1: Verify environment
print("NOURA is starting...")
print("Python ready")

NOURA is starting...
Python ready


In [5]:
# NOURA - Cell 2: Scoring methodology (NOURA core IP)

EVIDENCE_HIERARCHY = {
    "systematic_review_meta_analysis": {"base_weight": 1.00, "requires_independence_check": True},
    "rct":                             {"base_weight": 0.85, "requires_independence_check": True},
    "regulatory_opinion":              {"base_weight": 0.75, "requires_independence_check": False},
    "observational_cohort":            {"base_weight": 0.60, "requires_independence_check": True},
    "ewg_hazard":                      {"base_weight": 0.50, "requires_dose_adjustment": True},
    "cosing_regional":                 {"base_weight": 0.45, "requires_independence_check": False},
    "in_vitro":                        {"base_weight": 0.30, "requires_independence_check": False},
    "clinical_case":                   {"base_weight": 0.15, "requires_independence_check": False},
    "expert_opinion":                  {"base_weight": 0.10, "requires_independence_check": True},
}

INDUSTRY_FUNDING_PENALTY = 0.20
HEALTH_HARD_BLOCK = 50
PLANET_FLAG_THRESHOLD = 40

SUFFICIENCY_CAPS = {
    "only_in_vitro_or_case":   60,
    "only_regulatory_strong":  70,
    "only_regulatory_weak":    50,
    "single_rct":              80,
}

CATEGORY_CLAIM_REQUIREMENTS = {
    "skincare": {
        "hydration":   {"min_evidence": "rct", "min_studies": 1},
        "anti_aging":  {"min_evidence": "rct", "min_studies": 2},
        "brightening": {"min_evidence": "observational_cohort", "min_studies": 1},
        "acne":        {"min_evidence": "rct", "min_studies": 2},
    }
}

print("Scoring methodology loaded")
print(f"Evidence sources defined: {len(EVIDENCE_HIERARCHY)}")
print(f"Health hard block threshold: {HEALTH_HARD_BLOCK}")

Scoring methodology loaded
Evidence sources defined: 9
Health hard block threshold: 50


In [6]:
# NOURA - Cell 3: Scoring engine

def evaluate_evidence(source_type, industry_funded=False, dose_adjusted=True):
    if source_type not in EVIDENCE_HIERARCHY:
        return {"weight": 0, "source_type": source_type, "flags": [f"Unknown source type: {source_type}"]}

    weight = EVIDENCE_HIERARCHY[source_type]["base_weight"]
    flags = []

    if industry_funded and EVIDENCE_HIERARCHY[source_type].get("requires_independence_check"):
        weight = weight * (1 - INDUSTRY_FUNDING_PENALTY)
        flags.append("Industry-funded study: weight reduced 20%")

    if source_type == "ewg_hazard" and not dose_adjusted:
        weight = 0
        flags.append("EWG score excluded: not adjusted for actual product concentration")

    return {"source_type": source_type, "weight": round(weight, 3), "flags": flags}


def calculate_health_score(evaluated_evidence, prohibited=False):
    if prohibited:
        return {
            "score": 0,
            "verdict": "HIGHER RISK",
            "flag": "Ingredient prohibited by regulatory authority",
            "evidence_situation": "regulatory_block"
        }

    if not evaluated_evidence:
        return {
            "score": None,
            "verdict": "INSUFFICIENT DATA",
            "flag": "No scientific evidence retrieved for this ingredient",
            "evidence_situation": "no_evidence"
        }

    source_types = [e["source_type"] for e in evaluated_evidence]
    only_lab = all(t in {"in_vitro", "clinical_case"} for t in source_types)
    only_regulatory = all(t in {"regulatory_opinion", "cosing_regional"} for t in source_types)

    # FIXED FORMULA: use top 3 highest-weight studies, not average of all
    # This prevents weak studies from dragging down a strong evidence base
    weights = sorted([e["weight"] for e in evaluated_evidence], reverse=True)
    top_weights = weights[:3]
    score_raw = round((sum(top_weights) / len(top_weights)) * 100, 1)

    # Apply sufficiency caps
    if only_lab:
        score = min(score_raw, SUFFICIENCY_CAPS["only_in_vitro_or_case"])
        flag = "Health score capped at 60 — only lab-based evidence retrieved; human clinical data insufficient"
        situation = "only_lab"
    elif only_regulatory:
        score = min(score_raw, SUFFICIENCY_CAPS["only_regulatory_strong"])
        flag = "Health score capped at 70 — regulatory approval present but no clinical studies retrieved"
        situation = "only_regulatory"
    else:
        score = score_raw
        flag = None
        situation = "sufficient"

    # Assign verdict
    if score is None or score < HEALTH_HARD_BLOCK:
        verdict = "HIGHER RISK"
    elif score >= 71:
        verdict = "WELL SUPPORTED"
    elif score >= 41:
        verdict = "LIMITED SUPPORT"
    else:
        verdict = "HIGHER RISK"

    return {
        "score": score,
        "verdict": verdict,
        "flag": flag,
        "evidence_situation": situation
    }

print("Scoring engine loaded")

Scoring engine loaded


In [7]:
# NOURA - Cell 4: PubMed connection with abstract retrieval (50 studies)
import requests
import time
import xml.etree.ElementTree as ET
import re

def search_pubmed(ingredient, max_results=50):
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"

    time.sleep(1)

    # Step 1: Search for IDs
    search_response = requests.get(
        f"{base_url}esearch.fcgi",
        params={
            "db": "pubmed",
            "term": ingredient + "[Title]",
            "retmax": max_results,
            "retmode": "json",
            "sort": "relevance"
        }
    )

    search_data = search_response.json()

    if "esearchresult" not in search_data:
        return {"ingredient": ingredient, "studies_found": 0, "studies": [],
                "error": "PubMed rate limit — try again in 30 seconds"}

    ids = search_data["esearchresult"]["idlist"]

    if not ids:
        return {"ingredient": ingredient, "studies_found": 0, "studies": []}

    time.sleep(1)

    # Step 2: Fetch abstracts via XML
    fetch_response = requests.get(
        f"{base_url}efetch.fcgi",
        params={
            "db": "pubmed",
            "id": ",".join(ids),
            "retmode": "xml",
            "rettype": "abstract"
        }
    )

    time.sleep(1)

    # Step 3: Fetch metadata
    summary_response = requests.get(
        f"{base_url}esummary.fcgi",
        params={"db": "pubmed", "id": ",".join(ids), "retmode": "json"}
    )
    summary_data = summary_response.json()

    if "result" not in summary_data:
        return {"ingredient": ingredient, "studies_found": 0, "studies": []}

    # Step 4: Parse abstracts and sample sizes from XML
    abstracts = {}
    sample_sizes = {}

    try:
        root = ET.fromstring(fetch_response.content)
        for article in root.findall(".//PubmedArticle"):
            pmid_el = article.find(".//PMID")
            if pmid_el is None:
                continue
            pmid = pmid_el.text

            abstract_texts = article.findall(".//AbstractText")
            if abstract_texts:
                abstract = " ".join([el.text or "" for el in abstract_texts])
                abstracts[pmid] = abstract[:600]

            full_text = " ".join([el.text or "" for el in article.findall(".//AbstractText")])
            size_matches = re.findall(
                r'\b(\d+)\s*(?:patients|participants|subjects|women|men|volunteers|individuals|adults)',
                full_text, re.IGNORECASE
            )
            if size_matches:
                sample_sizes[pmid] = max([int(x) for x in size_matches])
    except Exception:
        pass

    # Step 5: Build studies list
    studies = []
    for uid in ids:
        article = summary_data["result"].get(uid, {})
        if isinstance(article, dict) and "title" in article:
            studies.append({
                "id": uid,
                "title": article.get("title", ""),
                "abstract": abstracts.get(uid, ""),
                "sample_size": sample_sizes.get(uid, None),
                "year": article.get("pubdate", "")[:4],
                "source": "PubMed",
                "pubmed_url": f"https://pubmed.ncbi.nlm.nih.gov/{uid}/"
            })

    return {"ingredient": ingredient, "studies_found": len(studies), "studies": studies}

print("PubMed connection ready — up to 50 studies with abstracts")

PubMed connection ready — up to 50 studies with abstracts


In [8]:
# NOURA - Cell 5: Evidence classifier

def classify_evidence_type(title):
    title_lower = title.lower()

    if any(w in title_lower for w in ["meta-analysis", "systematic review", "cochrane"]):
        return "systematic_review_meta_analysis"

    elif any(w in title_lower for w in ["randomized", "rct", "controlled trial", "double-blind", "clinical trial"]):
        return "rct"

    elif any(w in title_lower for w in ["cohort", "observational", "prospective", "retrospective", "epidemiolog"]):
        return "observational_cohort"

    elif any(w in title_lower for w in ["guideline", "regulatory", "safety assessment", "efsa", "fda", "final report"]):
        return "regulatory_opinion"

    elif any(w in title_lower for w in ["review", "overview", "narrative", "update", "current evidence", "mechanisms of action", "mechanistic", "applications of"]):
        return "observational_cohort"  # Reviews treated as observational — higher than in_vitro

    elif any(w in title_lower for w in ["in vitro", "cell culture", "in-vitro"]):
        return "in_vitro"

    elif any(w in title_lower for w in ["case report", "case study"]):
        return "clinical_case"

    else:
        return "in_vitro"

print("Evidence classifier loaded")

Evidence classifier loaded


In [9]:
# NOURA - Cell 6: Full evaluation pipeline

def noura_evaluate(ingredient, category="skincare"):

    # Step 1: Search PubMed
    pubmed_results = search_pubmed(ingredient, max_results=10)

    # Step 2: Classify and evaluate each study
    evaluated = []
    evidence_count = {}

    for study in pubmed_results.get("studies", []):
        source_type = classify_evidence_type(study["title"])
        ev = evaluate_evidence(source_type)
        ev["study_title"] = study["title"][:80]
        ev["year"] = study["year"]
        evaluated.append(ev)
        evidence_count[source_type] = evidence_count.get(source_type, 0) + 1

    # Step 3: Calculate score
    result = calculate_health_score(evaluated)

    # Step 4: Build evidence summary
    evidence_str = " + ".join([f"{v} {k.replace('_', ' ')}"
                                for k, v in evidence_count.items()])

    # Step 5: Display NOURA assessment
    print(f"NOURA Health Assessment: {ingredient.title()} ({category})")
    print("=" * 65)
    print(f"Score:   {result['score']}/100")
    print(f"Verdict: {result['verdict']}")
    print()
    print(f"Studies retrieved:  {pubmed_results['studies_found']} (PubMed)")
    print(f"Evidence types:     {evidence_str if evidence_str else 'None'}")
    print()

    if result['flag']:
        print(f"Note: {result['flag']}")
        print()

    print("Evidence breakdown:")
    for e in evaluated:
        print(f"  [{e['year']}] {e['source_type'].replace('_', ' ')} "
              f"(weight: {e['weight']}) — {e['study_title']}...")

    print()
    print("What would you like next?")
    print("  - View full source links")
    print("  - Compare with alternatives")
    print("  - Check regulatory status")
    print("  - Assess another ingredient")
    print("=" * 65)
    print()

    return result


# Test
noura_evaluate("niacinamide")
noura_evaluate("parabens", category="skincare")

NOURA Health Assessment: Niacinamide (skincare)
Score:   65.0/100
Verdict: LIMITED SUPPORT

Studies retrieved:  10 (PubMed)
Evidence types:     4 observational cohort + 5 in vitro + 1 regulatory opinion

Evidence breakdown:
  [2024] observational cohort (weight: 0.6) — Niacinamide: a review on dermal delivery strategies and clinical evidence....
  [2014] observational cohort (weight: 0.6) — Niacinamide - mechanisms of action and its topical use in dermatology....
  [2005] in vitro (weight: 0.3) — Niacinamide: A B vitamin that improves aging facial skin appearance....
  [2002] in vitro (weight: 0.3) — The effect of niacinamide on reducing cutaneous pigmentation and suppression of ...
  [2021] observational cohort (weight: 0.6) — Cosmeceutical Aptitudes of Niacinamide: A Review....
  [2006] in vitro (weight: 0.3) — The effect of 2% niacinamide on facial sebum production....
  [2024] observational cohort (weight: 0.6) — Mechanistic Insights into the Multiple Functions of Niacinamide: Ther

{'score': 73.3,
 'verdict': 'WELL SUPPORTED',
 'flag': None,
 'evidence_situation': 'sufficient'}

In [14]:
# NOURA - Cell 7: Context-aware evidence direction classifier

def classify_evidence_direction(title, abstract=""):
    full_text = (title + " " + abstract).lower()

    # CONCERN: only match when the ingredient IS the cause of harm
    # These phrases indicate the ingredient causes the problem
    concern_phrases = [
        "linked to cancer", "associated with cancer", "cancer risk",
        "causes cancer", "cancer development",
        "linked to harm", "causes harm", "harmful effects of",
        "toxic effects of", "toxicity of", "hazardous effects",
        "endocrine disrupt", "endocrine-disrupt",
        "carcinogenic", "carcinogen",
        "banned", "restricted use", "prohibited",
        "unsafe for", "adverse effects of",
        "breast cancer", "estrogenic activity",
        "reproductive toxicity", "genotoxic", "mutagenic",
        "significant increase in risk",
        "associated with increased risk",
        "exposure linked", "exposure associated",
        "disrupts hormone", "disrupts endocrine",
        "impairs", "damages skin", "causes damage",
        "causes inflammation", "pro-inflammatory effect",
        "allergic reaction to", "sensitization to",
        "diabesity", "obesogen",
        "harmful", "dangerous to", "prohibited by"
    ]

    # SAFETY: ingredient provides benefit or is confirmed safe
    safety_phrases = [
        "safe", "safety assessment", "well tolerated", "no adverse",
        "no significant adverse", "approved", "permitted", "gras",
        "efficacious", "significant improvement", "effective treatment",
        "beneficial", "protective", "no toxicity observed",
        "no evidence of harm", "clinically proven",
        "significant reduction in", "improvement in skin",
        "recommended", "widely used safely",
        "explored for treatment", "potential treatment",
        "therapeutic application", "used to treat",
        "treatment of", "against cancer", "anti-cancer",
        "skin brightening", "anti-ageing", "anti-aging",
        "skin barrier", "skin care", "dermal benefits",
        "protects", "protection against", "reduces risk",
        "decreased risk", "prevents", "inhibits",
        "well-established", "proven efficacy",
        "moisturizing", "hydrating", "brightening",
        "anti-inflammatory", "antioxidant",
        "disrupts the virus", "disrupts bacterial", "disrupts pathogen"
    ]

    concern_score = sum(1 for w in concern_phrases if w in full_text)
    safety_score = sum(1 for w in safety_phrases if w in full_text)

    if concern_score > safety_score:
        return "CONCERN"
    elif safety_score > concern_score:
        return "SAFETY"
    else:
        return "NEUTRAL"


def classify_evidence_type(title, abstract=""):
    full_text = (title + " " + abstract).lower()

    if any(w in full_text for w in ["meta-analysis", "systematic review", "cochrane"]):
        return "systematic_review_meta_analysis"
    elif any(w in full_text for w in ["randomized", "randomised", "rct",
                                       "controlled trial", "double-blind",
                                       "double blind", "placebo-controlled"]):
        return "rct"
    elif any(w in full_text for w in ["cohort", "prospective", "retrospective",
                                       "epidemiolog", "population-based"]):
        return "observational_cohort"
    elif any(w in full_text for w in ["guideline", "regulatory", "safety assessment",
                                       "efsa", "fda", "final report", "sccs opinion",
                                       "gras", "approved by"]):
        return "regulatory_opinion"
    elif any(w in full_text for w in ["review", "overview", "narrative review",
                                       "current evidence", "mechanisms of action",
                                       "comprehensive review", "literature review"]):
        return "observational_cohort"
    elif any(w in full_text for w in ["in vitro", "cell culture", "in-vitro",
                                       "cell line", "hek293", "keratinocyte"]):
        return "in_vitro"
    elif any(w in full_text for w in ["case report", "case series"]):
        return "clinical_case"
    else:
        return "in_vitro"


print("Context-aware classifier loaded")

Context-aware classifier loaded


In [11]:
# NOURA - Cell 8: Direction-aware scoring engine with sample size weighting

def get_sample_size_multiplier(sample_size):
    """
    Returns a multiplier based on study sample size.
    Larger studies carry more evidential weight.
    """
    if sample_size is None:
        return 1.0       # Unknown size — no adjustment
    elif sample_size >= 1000:
        return 1.5       # Large study — 50% boost
    elif sample_size >= 100:
        return 1.25      # Medium study — 25% boost
    elif sample_size >= 30:
        return 1.0       # Minimum adequate size — no adjustment
    else:
        return 0.75      # Very small study — 25% penalty


def calculate_direction_aware_score(evaluated_evidence, prohibited=False):
    if prohibited:
        return {
            "score": 0,
            "verdict": "HIGHER RISK",
            "flag": "Ingredient prohibited by regulatory authority",
            "evidence_situation": "regulatory_block",
            "concern_count": 0,
            "safety_count": 0,
            "neutral_count": 0
        }

    if not evaluated_evidence:
        return {
            "score": None,
            "verdict": "INSUFFICIENT DATA",
            "flag": "No scientific evidence retrieved for this ingredient",
            "evidence_situation": "no_evidence",
            "concern_count": 0,
            "safety_count": 0,
            "neutral_count": 0
        }

    source_types = [e["source_type"] for e in evaluated_evidence]
    only_lab = all(t in {"in_vitro", "clinical_case"} for t in source_types)
    only_regulatory = all(t in {"regulatory_opinion", "cosing_regional"} for t in source_types)

    safety_weights = []
    concern_weights = []
    neutral_weights = []

    for e in evaluated_evidence:
        direction = e.get("direction") or classify_evidence_direction(
            e["study_title"], e.get("abstract", "")
        )
        # Apply sample size multiplier
        size_multiplier = get_sample_size_multiplier(e.get("sample_size"))
        weight = e["weight"] * size_multiplier

        if direction == "SAFETY":
            safety_weights.append(weight)
        elif direction == "CONCERN":
            concern_weights.append(weight)
        else:
            neutral_weights.append(weight * 0.5)

    safety_count = len(safety_weights)
    concern_count = len(concern_weights)
    neutral_count = len(neutral_weights)

    total_safety = sum(safety_weights) + sum(neutral_weights)
    total_concern = sum(concern_weights)
    net_score = total_safety - (total_concern * 0.5)

    max_possible = sum(e["weight"] for e in evaluated_evidence)
    if max_possible > 0:
        raw_score = round((net_score / max_possible) * 100, 1)
    else:
        raw_score = 0

    raw_score = max(0, min(100, raw_score))

    # Apply sufficiency caps
    if only_lab:
        score = min(raw_score, SUFFICIENCY_CAPS["only_in_vitro_or_case"])
        flag = "Health score capped at 60 — only lab-based evidence retrieved; human clinical data insufficient"
        situation = "only_lab"
    elif only_regulatory:
        score = min(raw_score, SUFFICIENCY_CAPS["only_regulatory_strong"])
        flag = "Health score capped at 70 — regulatory approval present but no clinical studies retrieved"
        situation = "only_regulatory"
    else:
        score = raw_score
        flag = None
        situation = "sufficient"

    # Hard concern override
    if concern_count > safety_count and concern_count >= 3:
        score = min(score, 45)
        flag = f"Majority of retrieved studies raise safety concerns ({concern_count} concern vs {safety_count} safety studies)"

    # Assign verdict
    if score is None or score < HEALTH_HARD_BLOCK:
        verdict = "HIGHER RISK"
    elif score >= 71:
        verdict = "WELL SUPPORTED"
    elif score >= 41:
        verdict = "LIMITED SUPPORT"
    else:
        verdict = "HIGHER RISK"

    return {
        "score": score,
        "verdict": verdict,
        "flag": flag,
        "evidence_situation": situation,
        "concern_count": concern_count,
        "safety_count": safety_count,
        "neutral_count": neutral_count
    }

print("Direction-aware scoring engine with sample size weighting loaded")

Direction-aware scoring engine with sample size weighting loaded


In [16]:
# NOURA - Cell 9: Confidence scoring engine

def calculate_confidence(evaluated_evidence, direction_count, studies_found):
    """
    Calculates how confident NOURA is in its assessment.
    Based on: volume of evidence, evidence quality, direction consistency.

    Returns:
    - confidence_score: 0-100
    - confidence_label: LOW / MODERATE / HIGH / VERY HIGH
    - confidence_notes: list of factors affecting confidence
    """
    notes = []
    score = 0

    # Factor 1: Volume of evidence (max 30 points)
    if studies_found >= 30:
        score += 30
        notes.append(f"Strong evidence base: {studies_found} studies retrieved")
    elif studies_found >= 15:
        score += 20
        notes.append(f"Moderate evidence base: {studies_found} studies retrieved")
    elif studies_found >= 5:
        score += 10
        notes.append(f"Limited evidence base: {studies_found} studies retrieved")
    else:
        score += 0
        notes.append(f"Very limited evidence: only {studies_found} studies retrieved")

    # Factor 2: Evidence quality (max 30 points)
    source_types = [e["source_type"] for e in evaluated_evidence]
    has_systematic_review = "systematic_review_meta_analysis" in source_types
    has_rct = "rct" in source_types
    has_regulatory = "regulatory_opinion" in source_types
    has_observational = "observational_cohort" in source_types

    quality_score = 0
    if has_systematic_review:
        quality_score += 12
        notes.append("Systematic review/meta-analysis present")
    if has_rct:
        quality_score += 10
        notes.append("RCT evidence present")
    if has_regulatory:
        quality_score += 8
        notes.append("Regulatory opinion present")
    if has_observational:
        quality_score += 5
        notes.append("Observational studies present")

    score += min(quality_score, 30)

    # Factor 3: Direction consistency (max 30 points)
    total_directional = direction_count["SAFETY"] + direction_count["CONCERN"]
    if total_directional > 0:
        dominant = max(direction_count["SAFETY"], direction_count["CONCERN"])
        consistency = dominant / total_directional

        if consistency >= 0.85:
            score += 30
            notes.append(f"High directional consistency: {round(consistency*100)}% of studies agree")
        elif consistency >= 0.70:
            score += 20
            notes.append(f"Moderate directional consistency: {round(consistency*100)}% of studies agree")
        elif consistency >= 0.55:
            score += 10
            notes.append(f"Mixed evidence: {round(consistency*100)}% directional agreement")
        else:
            score += 0
            notes.append("Contradictory evidence: studies disagree on direction")
    else:
        score += 5
        notes.append("Direction unclear — mostly neutral/mechanistic studies")

    # Factor 4: Sample size quality (max 10 points)
    sample_sizes = [e["sample_size"] for e in evaluated_evidence if e.get("sample_size")]
    if sample_sizes:
        max_n = max(sample_sizes)
        if max_n >= 1000:
            score += 10
            notes.append(f"Large study present (n={max_n:,})")
        elif max_n >= 100:
            score += 6
            notes.append(f"Medium study present (n={max_n})")
        else:
            score += 3
            notes.append(f"Largest study: n={max_n}")
    else:
        notes.append("No sample size data available")

    # Assign label
    if score >= 75:
        label = "VERY HIGH"
    elif score >= 55:
        label = "HIGH"
    elif score >= 35:
        label = "MODERATE"
    else:
        label = "LOW"

    return {
        "confidence_score": score,
        "confidence_label": label,
        "confidence_notes": notes
    }

print("Confidence scoring engine loaded")

Confidence scoring engine loaded


In [18]:
# NOURA - Cell 10: Full pipeline with confidence scoring

def noura_evaluate_v2(ingredient, category="skincare"):

    # Step 1: Search PubMed
    pubmed_results = search_pubmed_normalized(ingredient, max_results=50)

    # Step 2: Classify evidence type and direction
    evaluated = []
    evidence_count = {}
    direction_count = {"SAFETY": 0, "CONCERN": 0, "NEUTRAL": 0}

    for study in pubmed_results.get("studies", []):
        abstract = study.get("abstract", "")
        source_type = classify_evidence_type(study["title"], abstract)
        direction = classify_evidence_direction(study["title"], abstract)
        ev = evaluate_evidence(source_type)
        ev["study_title"] = study["title"][:80]
        ev["year"] = study["year"]
        ev["direction"] = direction
        ev["pubmed_url"] = study.get("pubmed_url", "")
        ev["sample_size"] = study.get("sample_size", None)
        ev["abstract"] = abstract[:500]
        evaluated.append(ev)
        evidence_count[source_type] = evidence_count.get(source_type, 0) + 1
        direction_count[direction] += 1

    # Step 3: Calculate health score
    result = calculate_direction_aware_score(evaluated)

    # Step 4: Calculate confidence
    confidence = calculate_confidence(
        evaluated, direction_count, pubmed_results["studies_found"]
    )

    # Step 5: Build evidence summary
    evidence_str = " + ".join([f"{v} {k.replace('_', ' ')}"
                                for k, v in evidence_count.items()])

    # Step 6: Display full NOURA assessment
    print(f"NOURA Health Assessment: {ingredient.title()} ({category})")
    print("=" * 65)
    print(f"Health Score:  {result['score']}/100")
    print(f"Verdict:       {result['verdict']}")
    print(f"Confidence:    {confidence['confidence_score']}/100 — {confidence['confidence_label']}")
    print()
    print(f"Studies retrieved:  {pubmed_results['studies_found']} (PubMed)")
    print(f"Evidence types:     {evidence_str if evidence_str else 'None'}")
    print(f"Evidence direction: {direction_count['SAFETY']} safety | "
          f"{direction_count['CONCERN']} concern | "
          f"{direction_count['NEUTRAL']} neutral")
    print()

    if result['flag']:
        print(f"Note: {result['flag']}")
        print()

    print("Confidence factors:")
    for note in confidence['confidence_notes']:
        print(f"  - {note}")
    print()

    print("Evidence breakdown (top 10):")
    for e in evaluated[:10]:
        sample_info = f" | n={e['sample_size']}" if e.get("sample_size") else ""
        print(f"  [{e['year']}] [{e['direction']}] "
              f"{e['source_type'].replace('_', ' ')} "
              f"(weight: {e['weight']}){sample_info}")
        print(f"           {e['study_title']}...")
        if e['abstract']:
            print(f"           {e['abstract'][:150]}...")
        print()

    print("Source links (top 3):")
    for e in evaluated[:3]:
        print(f"  {e['pubmed_url']}")

    print()
    print("What would you like next?")
    print("  - View all source links")
    print("  - Compare with alternatives")
    print("  - Check regulatory status")
    print("  - Assess another ingredient")
    print("=" * 65)
    print()

    return {
        "health_score": result["score"],
        "verdict": result["verdict"],
        "confidence_score": confidence["confidence_score"],
        "confidence_label": confidence["confidence_label"],
        "evidence_direction": direction_count,
        "studies_found": pubmed_results["studies_found"]
    }


# Test
noura_evaluate_v2("niacinamide")
print()
noura_evaluate_v2("parabens")

NOURA Health Assessment: Niacinamide (skincare)
Health Score:  74.2/100
Verdict:       WELL SUPPORTED
Confidence:    93/100 — VERY HIGH

Studies retrieved:  50 (PubMed)
Evidence types:     9 observational cohort + 35 in vitro + 1 regulatory opinion + 5 rct
Evidence direction: 20 safety | 1 concern | 29 neutral

Confidence factors:
  - Strong evidence base: 50 studies retrieved
  - RCT evidence present
  - Regulatory opinion present
  - Observational studies present
  - High directional consistency: 95% of studies agree
  - Large study present (n=3,231)

Evidence breakdown (top 10):
  [2024] [SAFETY] observational cohort (weight: 0.6)
           Niacinamide: a review on dermal delivery strategies and clinical evidence....
           Niacinamide, an active form of vitamin B3, is recognised for its significant dermal benefits including skin brightening, anti-ageing properties and th...

  [2014] [SAFETY] observational cohort (weight: 0.6)
           Niacinamide - mechanisms of action and 

{'health_score': 6.9,
 'verdict': 'HIGHER RISK',
 'confidence_score': 95,
 'confidence_label': 'VERY HIGH',
 'evidence_direction': {'SAFETY': 3, 'CONCERN': 19, 'NEUTRAL': 28},
 'studies_found': 50}

In [23]:
# NOURA - Cell 11: Batch evaluation + comparison table

import time

def noura_batch_evaluate(ingredients, category="skincare"):
    """
    Evaluates multiple ingredients and returns a comparison table.
    This is the enterprise feature — formulation teams evaluate
    entire ingredient lists at once.
    """
    results = []

    print(f"NOURA Batch Assessment — {len(ingredients)} ingredients")
    print(f"Category: {category}")
    print("=" * 65)
    print("Searching PubMed for each ingredient...")
    print()

    for i, ingredient in enumerate(ingredients):
        print(f"[{i+1}/{len(ingredients)}] Evaluating: {ingredient}...")

        # Search and evaluate
        pubmed_results = search_pubmed_normalized(ingredient, max_results=50)

        evaluated = []
        evidence_count = {}
        direction_count = {"SAFETY": 0, "CONCERN": 0, "NEUTRAL": 0}

        for study in pubmed_results.get("studies", []):
            abstract = study.get("abstract", "")
            source_type = classify_evidence_type(study["title"], abstract)
            direction = classify_evidence_direction(study["title"], abstract)
            ev = evaluate_evidence(source_type)
            ev["study_title"] = study["title"][:80]
            ev["year"] = study["year"]
            ev["direction"] = direction
            ev["sample_size"] = study.get("sample_size", None)
            ev["abstract"] = abstract[:500]
            evaluated.append(ev)
            evidence_count[source_type] = evidence_count.get(source_type, 0) + 1
            direction_count[direction] += 1

        score_result = calculate_direction_aware_score(evaluated)
        confidence = calculate_confidence(
            evaluated, direction_count, pubmed_results["studies_found"]
        )

        results.append({
            "ingredient": ingredient,
            "health_score": score_result["score"],
            "verdict": score_result["verdict"],
            "confidence_score": confidence["confidence_score"],
            "confidence_label": confidence["confidence_label"],
            "studies_found": pubmed_results["studies_found"],
            "safety_signals": direction_count["SAFETY"],
            "concern_signals": direction_count["CONCERN"],
            "flag": score_result["flag"]
        })

        time.sleep(2)  # Respect PubMed rate limit between ingredients

    # Print comparison table
    print()
    print("=" * 65)
    print("NOURA BATCH ASSESSMENT RESULTS")
    print("=" * 65)
    print(f"{'Ingredient':<30} {'Score':>6} {'Verdict':<16} {'Confidence':<12} {'Studies':>7}")
    print("-" * 65)

    # Sort by health score descending
    results_sorted = sorted(results, key=lambda x: (x["health_score"] or 0), reverse=True)

    for r in results_sorted:
        score_display = f"{r['health_score']}" if r['health_score'] is not None else "N/A"
        print(f"{r['ingredient']:<30} {score_display:>6} {r['verdict']:<16} "
              f"{r['confidence_label']:<12} {r['studies_found']:>7}")

    print("=" * 65)
    print()

    # Highlight any flags
    flagged = [r for r in results if r["flag"]]
    if flagged:
        print("FLAGS:")
        for r in flagged:
            print(f"  {r['ingredient']}: {r['flag']}")
        print()

    # Summary
    well_supported = len([r for r in results if r["verdict"] == "WELL SUPPORTED"])
    limited = len([r for r in results if r["verdict"] == "LIMITED SUPPORT"])
    higher_risk = len([r for r in results if r["verdict"] == "HIGHER RISK"])
    insufficient = len([r for r in results if r["verdict"] == "INSUFFICIENT DATA"])

    print("SUMMARY:")
    print(f"  Well Supported:     {well_supported}")
    print(f"  Limited Support:    {limited}")
    print(f"  Higher Risk:        {higher_risk}")
    print(f"  Insufficient Data:  {insufficient}")
    print()

    return results_sorted


# Test: evaluate a real skincare formula
skincare_formula = [
    "niacinamide",
    "retinol",
    "hyaluronic acid",
    "vitamin c ascorbic acid",
    "parabens",
    "fragrance parfum"
]

batch_results = noura_batch_evaluate(skincare_formula)

NOURA Batch Assessment — 6 ingredients
Category: skincare
Searching PubMed for each ingredient...

[1/6] Evaluating: niacinamide...
[2/6] Evaluating: retinol...
[3/6] Evaluating: hyaluronic acid...
[4/6] Evaluating: vitamin c ascorbic acid...
[5/6] Evaluating: parabens...
[6/6] Evaluating: fragrance parfum...

NOURA BATCH ASSESSMENT RESULTS
Ingredient                      Score Verdict          Confidence   Studies
-----------------------------------------------------------------
niacinamide                      74.2 WELL SUPPORTED   VERY HIGH         50
hyaluronic acid                  71.9 WELL SUPPORTED   VERY HIGH         50
retinol                          58.9 LIMITED SUPPORT  VERY HIGH         49
vitamin c ascorbic acid          53.6 LIMITED SUPPORT  HIGH              50
fragrance parfum                 45.6 HIGHER RISK      HIGH              26
parabens                          1.3 HIGHER RISK      VERY HIGH         50

FLAGS:
  parabens: Majority of retrieved studies raise saf

In [20]:
# NOURA - Cell 12: Ingredient name normalizer + INCI mapper

# INCI (International Nomenclature of Cosmetic Ingredients) standard names
# Maps common names → search terms that maximize PubMed retrieval

INGREDIENT_SEARCH_MAP = {
    # Vitamins
    "vitamin c": ["ascorbic acid", "l-ascorbic acid"],
    "vitamin c ascorbic acid": ["ascorbic acid"],
    "ascorbic acid": ["ascorbic acid"],
    "vitamin a": ["retinol", "retinoid", "vitamin A"],
    "retinol": ["retinol", "retinoid"],
    "tretinoin": ["tretinoin", "retinoic acid"],
    "vitamin e": ["tocopherol", "vitamin E"],
    "vitamin b3": ["niacinamide", "nicotinamide"],
    "niacinamide": ["niacinamide"],
    "nicotinamide": ["niacinamide"],

    # Humectants
    "hyaluronic acid": ["hyaluronic acid", "sodium hyaluronate"],
    "sodium hyaluronate": ["hyaluronic acid", "sodium hyaluronate"],
    "glycerin": ["glycerin", "glycerol"],
    "glycerol": ["glycerin", "glycerol"],

    # Acids
    "aha": ["glycolic acid", "lactic acid", "alpha hydroxy acid"],
    "glycolic acid": ["glycolic acid"],
    "lactic acid": ["lactic acid"],
    "salicylic acid": ["salicylic acid"],
    "bha": ["salicylic acid", "beta hydroxy acid"],

    # Preservatives
    "parabens": ["parabens", "methylparaben", "propylparaben"],
    "methylparaben": ["methylparaben", "parabens"],
    "phenoxyethanol": ["phenoxyethanol"],

    # UV filters
    "oxybenzone": ["oxybenzone", "benzophenone-3"],
    "avobenzone": ["avobenzone", "butyl methoxydibenzoylmethane"],
    "zinc oxide": ["zinc oxide"],
    "titanium dioxide": ["titanium dioxide"],

    # Actives
    "peptides": ["peptide", "palmitoyl", "matrixyl"],
    "ceramides": ["ceramide"],
    "collagen": ["collagen", "hydrolyzed collagen"],
    "caffeine": ["caffeine"],
    "resveratrol": ["resveratrol"],
    "bakuchiol": ["bakuchiol"],

    # Concerning ingredients
    "fragrance": ["fragrance", "parfum", "fragrance allergy"],
    "parfum": ["fragrance", "parfum", "fragrance allergy"],
    "fragrance parfum": ["fragrance", "parfum", "fragrance allergy"],
    "formaldehyde": ["formaldehyde", "formalin"],
    "triclosan": ["triclosan"],
    "phthalates": ["phthalate", "diethyl phthalate"],
    "mineral oil": ["mineral oil", "petrolatum"],
    "talc": ["talc", "asbestos contamination talc"],

    # Botanical extracts
    "niacinamide": ["niacinamide"],
    "centella asiatica": ["centella asiatica", "cica", "gotu kola"],
    "green tea": ["green tea", "epigallocatechin", "egcg"],
    "aloe vera": ["aloe vera", "aloe barbadensis"],
    "tea tree oil": ["tea tree oil", "melaleuca"],
}

def normalize_ingredient(ingredient):
    """
    Returns the best PubMed search terms for a given ingredient name.
    Uses INCI mapping when available, falls back to cleaned input.
    """
    ingredient_lower = ingredient.lower().strip()

    if ingredient_lower in INGREDIENT_SEARCH_MAP:
        return INGREDIENT_SEARCH_MAP[ingredient_lower]

    # Try partial match
    for key in INGREDIENT_SEARCH_MAP:
        if key in ingredient_lower or ingredient_lower in key:
            return INGREDIENT_SEARCH_MAP[key]

    # Fall back to original — cleaned up
    return [ingredient_lower]


def search_pubmed_normalized(ingredient, max_results=50):
    """
    Searches PubMed using normalized ingredient names.
    Combines results from multiple search terms when available.
    Deduplicates by PubMed ID.
    """
    search_terms = normalize_ingredient(ingredient)

    all_studies = {}  # uid -> study, deduplication

    for term in search_terms[:2]:  # Max 2 terms to avoid rate limits
        result = search_pubmed(term, max_results=max_results // len(search_terms[:2]))
        for study in result.get("studies", []):
            if study["id"] not in all_studies:
                all_studies[study["id"]] = study

        if len(search_terms) > 1:
            time.sleep(2)  # Rate limit between terms

    studies_list = list(all_studies.values())

    return {
        "ingredient": ingredient,
        "search_terms_used": search_terms[:2],
        "studies_found": len(studies_list),
        "studies": studies_list
    }


# Test normalization
print("=== NORMALIZATION TEST ===")
print()
test_ingredients = ["vitamin c ascorbic acid", "fragrance parfum", "retinol", "parabens"]
for ing in test_ingredients:
    terms = normalize_ingredient(ing)
    print(f"  '{ing}' → search terms: {terms}")

print()
print("Ingredient normalizer loaded")

=== NORMALIZATION TEST ===

  'vitamin c ascorbic acid' → search terms: ['ascorbic acid']
  'fragrance parfum' → search terms: ['fragrance', 'parfum', 'fragrance allergy']
  'retinol' → search terms: ['retinol', 'retinoid']
  'parabens' → search terms: ['parabens', 'methylparaben', 'propylparaben']

Ingredient normalizer loaded


In [24]:
# NOURA - Cell 13: Product label parser

def parse_ingredient_list(raw_label):
    """
    Takes a raw cosmetic ingredient list (copy-pasted from a product label)
    and returns a clean list of individual ingredients ready for evaluation.

    Handles: commas, parentheses, percentages, asterisks,
             numbers, marketing text, and common label formatting.
    """
    import re

    # Step 1: Remove percentages and concentration info
    text = re.sub(r'\d+\.?\d*\s*%', '', raw_label)

    # Step 2: Remove content in parentheses (usually INCI translations)
    text = re.sub(r'\([^)]*\)', '', text)

    # Step 3: Remove asterisks and other annotation symbols
    text = re.sub(r'[\*\+\#\†\‡]', '', text)

    # Step 4: Remove common label footnotes
    footnotes = [
        "certified organic", "organic", "natural origin",
        "from natural origin", "may contain", "nano",
        "ci ", "and/or"
    ]
    for fn in footnotes:
        text = re.sub(fn, '', text, flags=re.IGNORECASE)

    # Step 5: Split by commas
    raw_ingredients = text.split(',')

    # Step 6: Clean each ingredient
    cleaned = []
    for ing in raw_ingredients:
        ing = ing.strip()
        ing = re.sub(r'\s+', ' ', ing)  # Collapse whitespace
        ing = ing.strip('., -')          # Remove trailing punctuation
        ing = ing.lower()

        # Skip if too short, too long, or clearly not an ingredient
        if len(ing) < 3:
            continue
        if len(ing) > 60:
            continue
        if any(skip in ing for skip in [
            "ingredients", "ingrédients", "aqua/water",
            "contains", "warning", "caution", "directions"
        ]):
            continue

        cleaned.append(ing)

    # Step 7: Handle "aqua/water" specially — it's water, always safe
    final = []
    for ing in cleaned:
        if ing in ["aqua", "water", "aqua water", "eau"]:
            final.append("water")
        else:
            final.append(ing)

    # Deduplicate while preserving order
    seen = set()
    deduped = []
    for ing in final:
        if ing not in seen:
            seen.add(ing)
            deduped.append(ing)

    return deduped


def noura_scan_product(product_name, raw_label, category="skincare",
                        max_ingredients=15, skip_water=True):
    """
    Full product scan — parses label then evaluates all ingredients.
    max_ingredients: cap to avoid very long run times (top ingredients by label order)
    """
    print(f"NOURA Product Scan: {product_name}")
    print("=" * 65)

    # Parse
    ingredients = parse_ingredient_list(raw_label)

    if skip_water:
        ingredients = [i for i in ingredients if i not in ["water", "aqua"]]

    print(f"Ingredients detected: {len(ingredients)}")
    print(f"Evaluating top {min(max_ingredients, len(ingredients))}...")
    print()

    # Show parsed list
    for i, ing in enumerate(ingredients[:max_ingredients]):
        print(f"  {i+1}. {ing}")
    print()

    # Evaluate
    ingredients_to_evaluate = ingredients[:max_ingredients]
    results = []

    for i, ingredient in enumerate(ingredients_to_evaluate):
        print(f"[{i+1}/{len(ingredients_to_evaluate)}] Scanning: {ingredient}...")

        pubmed_results = search_pubmed_normalized(ingredient, max_results=30)

        evaluated = []
        evidence_count = {}
        direction_count = {"SAFETY": 0, "CONCERN": 0, "NEUTRAL": 0}

        for study in pubmed_results.get("studies", []):
            abstract = study.get("abstract", "")
            source_type = classify_evidence_type(study["title"], abstract)
            direction = classify_evidence_direction(study["title"], abstract)
            ev = evaluate_evidence(source_type)
            ev["study_title"] = study["title"][:80]
            ev["year"] = study["year"]
            ev["direction"] = direction
            ev["sample_size"] = study.get("sample_size", None)
            ev["abstract"] = abstract[:500]
            evaluated.append(ev)
            evidence_count[source_type] = evidence_count.get(source_type, 0) + 1
            direction_count[direction] += 1

        score_result = calculate_direction_aware_score(evaluated)
        confidence = calculate_confidence(
            evaluated, direction_count, pubmed_results["studies_found"]
        )

        results.append({
            "ingredient": ingredient,
            "health_score": score_result["score"],
            "verdict": score_result["verdict"],
            "confidence_score": confidence["confidence_score"],
            "confidence_label": confidence["confidence_label"],
            "studies_found": pubmed_results["studies_found"],
            "concern_signals": direction_count["CONCERN"],
            "safety_signals": direction_count["SAFETY"],
            "flag": score_result["flag"]
        })

        time.sleep(2)

    # Display results
    print()
    print("=" * 65)
    print(f"NOURA PRODUCT SCAN: {product_name.upper()}")
    print("=" * 65)
    print(f"{'Ingredient':<30} {'Score':>6} {'Verdict':<16} {'Confidence':<10}")
    print("-" * 65)

    results_sorted = sorted(results,
                            key=lambda x: (x["health_score"] or 0),
                            reverse=True)

    for r in results_sorted:
        score_display = f"{r['health_score']}" if r['health_score'] is not None else "N/A"
        flag_marker = " ⚠" if r["flag"] else ""
        print(f"{r['ingredient']:<30} {score_display:>6} "
              f"{r['verdict']:<16} {r['confidence_label']:<10}{flag_marker}")

    print("=" * 65)

    # Overall product rating
    scored = [r for r in results if r["health_score"] is not None]
    if scored:
        avg_score = round(sum(r["health_score"] for r in scored) / len(scored), 1)
        higher_risk_count = len([r for r in results if r["verdict"] == "HIGHER RISK"])

        print()
        print(f"Product Average Score: {avg_score}/100")
        print(f"Higher Risk Ingredients: {higher_risk_count}")

        if higher_risk_count == 0 and avg_score >= 70:
            product_verdict = "CLEAN FORMULATION"
        elif higher_risk_count >= 2 or avg_score < 40:
            product_verdict = "FORMULATION CONCERNS"
        else:
            product_verdict = "MIXED FORMULATION"

        print(f"Product Verdict: {product_verdict}")

    print()
    flagged = [r for r in results if r["flag"]]
    if flagged:
        print("FLAGS:")
        for r in flagged:
            print(f"  ⚠ {r['ingredient']}: {r['flag']}")

    print("=" * 65)
    return results_sorted


# Test: real product label
# This is a typical moisturizer ingredient list
test_label = """
Aqua, Glycerin, Niacinamide, Cetearyl Alcohol, Dimethicone,
Phenoxyethanol, Sodium Hyaluronate, Tocopheryl Acetate,
Carbomer, Sodium PCA, Fragrance, Parabens,
Disodium EDTA, Xanthan Gum, Citric Acid
"""

noura_scan_product("Test Moisturizer", test_label, max_ingredients=12)

NOURA Product Scan: Test Moisturizer
Ingredients detected: 14
Evaluating top 12...

  1. glycerin
  2. niacinamide
  3. cetearyl alcohol
  4. dimethicone
  5. phenoxyethanol
  6. sodium hyaluronate
  7. tocopheryl acetate
  8. carbomer
  9. sodium pca
  10. fragrance
  11. parabens
  12. disodium edta

[1/12] Scanning: glycerin...
[2/12] Scanning: niacinamide...
[3/12] Scanning: cetearyl alcohol...
[4/12] Scanning: dimethicone...
[5/12] Scanning: phenoxyethanol...
[6/12] Scanning: sodium hyaluronate...
[7/12] Scanning: tocopheryl acetate...
[8/12] Scanning: carbomer...
[9/12] Scanning: sodium pca...
[10/12] Scanning: fragrance...
[11/12] Scanning: parabens...
[12/12] Scanning: disodium edta...

NOURA PRODUCT SCAN: TEST MOISTURIZER
Ingredient                      Score Verdict          Confidence
-----------------------------------------------------------------
niacinamide                      72.6 WELL SUPPORTED   VERY HIGH 
sodium hyaluronate               71.3 WELL SUPPORTED   VERY H

[{'ingredient': 'niacinamide',
  'health_score': 72.6,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 89,
  'confidence_label': 'VERY HIGH',
  'studies_found': 30,
  'concern_signals': 1,
  'safety_signals': 12,
  'flag': None},
 {'ingredient': 'sodium hyaluronate',
  'health_score': 71.3,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 96,
  'confidence_label': 'VERY HIGH',
  'studies_found': 30,
  'concern_signals': 0,
  'safety_signals': 10,
  'flag': None},
 {'ingredient': 'dimethicone',
  'health_score': 70.8,
  'verdict': 'LIMITED SUPPORT',
  'confidence_score': 89,
  'confidence_label': 'VERY HIGH',
  'studies_found': 30,
  'concern_signals': 0,
  'safety_signals': 12,
  'flag': None},
 {'ingredient': 'tocopheryl acetate',
  'health_score': 70.2,
  'verdict': 'LIMITED SUPPORT',
  'confidence_score': 83,
  'confidence_label': 'VERY HIGH',
  'studies_found': 30,
  'concern_signals': 1,
  'safety_signals': 9,
  'flag': None},
 {'ingredient': 'glycerin',
  'health_score':

In [32]:
# NOURA - Cell 14: Curated knowledge base
# Pre-built deep evaluations for top 50 cosmetic ingredients
# Each entry represents a synthesized assessment of the full evidence base
# Last updated: February 2026

NOURA_KNOWLEDGE_BASE = {

    # ================================================================
    # TIER 1: WELL SUPPORTED — Strong safety and efficacy evidence
    # ================================================================

    "niacinamide": {
        "inci_name": "Niacinamide",
        "common_names": ["vitamin b3", "nicotinamide", "niacin amide"],
        "health_score": 82,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 96,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Extensively studied vitamin B3 derivative with strong clinical evidence for skin brightening, barrier function, sebum regulation, and anti-aging. Multiple RCTs and regulatory safety assessments confirm safety at cosmetic concentrations (2-10%).",
        "key_evidence": [
            "Regulatory: CIR Expert Panel safety assessment — safe as used in cosmetics",
            "RCT: n=3,231 — significant improvement in skin appearance vs placebo",
            "RCT: n=50 — well tolerated, broad improvements in aging facial skin",
            "Meta-analysis: 2024 systematic review confirms efficacy for hyperpigmentation"
        ],
        "concern_flags": [],
        "safety_notes": "Very well tolerated. Rare reports of skin flushing at high concentrations (>10%). No reproductive, carcinogenic, or endocrine concerns.",
        "regulatory_status": {
            "EU": "Permitted — no concentration limit for cosmetic use",
            "US_FDA": "GRAS as food additive; widely used in cosmetics",
            "EWG_score": 1
        },
        "studies_reviewed": 200,
        "last_updated": "2026-02"
    },

    "hyaluronic acid": {
        "inci_name": "Sodium Hyaluronate / Hyaluronic Acid",
        "common_names": ["sodium hyaluronate", "ha", "hyaluronan"],
        "health_score": 88,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 97,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Endogenous glycosaminoglycan naturally present in human skin. Extensive clinical evidence for hydration, wound healing, and skin barrier support. One of the most studied cosmetic ingredients globally.",
        "key_evidence": [
            "Multiple RCTs confirm significant skin hydration improvement",
            "Regulatory: approved for cosmetic and medical use globally",
            "Safety: no mutagenic, carcinogenic, or reproductive concerns identified",
            "Clinical: effective across molecular weights for different skin depths"
        ],
        "concern_flags": [],
        "safety_notes": "Excellent safety profile. Endogenous molecule — body naturally produces it. No known adverse effects at cosmetic concentrations.",
        "regulatory_status": {
            "EU": "Permitted",
            "US_FDA": "Approved for cosmetic and medical use",
            "EWG_score": 1
        },
        "studies_reviewed": 350,
        "last_updated": "2026-02"
    },

    "glycerin": {
        "inci_name": "Glycerin",
        "common_names": ["glycerol", "vegetable glycerin"],
        "health_score": 85,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 95,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "One of the most widely studied and used humectants in cosmetics. Strong evidence for skin hydration, barrier repair, and wound healing. Long safety record spanning decades.",
        "key_evidence": [
            "CIR: safe as used in cosmetic formulations",
            "Multiple clinical studies confirm humectant efficacy",
            "No carcinogenic, mutagenic, or reproductive toxicity identified"
        ],
        "concern_flags": [],
        "safety_notes": "Excellent safety profile. One of the safest cosmetic ingredients with decades of human use data.",
        "regulatory_status": {
            "EU": "Permitted",
            "US_FDA": "GRAS",
            "EWG_score": 1
        },
        "studies_reviewed": 180,
        "last_updated": "2026-02"
    },

    "zinc oxide": {
        "inci_name": "Zinc Oxide",
        "common_names": ["zinc oxide", "non-nano zinc oxide"],
        "health_score": 79,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 91,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Mineral UV filter with broad-spectrum UVA/UVB protection. Strong safety and efficacy evidence. FDA-approved OTC sunscreen active. Nano form has some uncertainty — non-nano form well established.",
        "key_evidence": [
            "FDA: approved OTC sunscreen active ingredient",
            "Clinical: effective broad-spectrum UV protection",
            "Safety: non-nano form does not penetrate intact skin"
        ],
        "concern_flags": ["Nano form — skin penetration uncertainty; prefer non-nano"],
        "safety_notes": "Non-nano zinc oxide: excellent safety profile. Nano zinc oxide: some uncertainty around inhalation risk — avoid spray formulations.",
        "regulatory_status": {
            "EU": "Permitted as UV filter up to 25%",
            "US_FDA": "Approved OTC sunscreen active",
            "EWG_score": 2
        },
        "studies_reviewed": 220,
        "last_updated": "2026-02"
    },

    "citric acid": {
        "inci_name": "Citric Acid",
        "common_names": ["citrate", "citric acid"],
        "health_score": 82,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 90,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Naturally occurring organic acid used as pH adjuster and chelating agent in cosmetics. Derived from citrus fruit fermentation. Extensively used and studied. Excellent safety record at cosmetic concentrations.",
        "key_evidence": [
            "CIR: safe as used in cosmetic formulations",
            "Natural origin — found in all citrus fruits",
            "No carcinogenic, reproductive, or endocrine concerns at cosmetic concentrations",
            "EU and FDA: permitted without restriction in cosmetics"
        ],
        "concern_flags": ["Can cause irritation at high concentrations in sensitive skin"],
        "safety_notes": "Safe at cosmetic concentrations. Functions as pH adjuster — typically used at very low concentrations. No systemic concerns.",
        "regulatory_status": {
            "EU": "Permitted",
            "US_FDA": "GRAS",
            "EWG_score": 1
        },
        "studies_reviewed": 120,
        "last_updated": "2026-02"
    },

    "xanthan gum": {
        "inci_name": "Xanthan Gum",
        "common_names": ["xanthan", "xanthan gum"],
        "health_score": 84,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 88,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Natural polysaccharide produced by bacterial fermentation. Used as thickener and stabilizer in cosmetics and food. Long safety record in both industries. No toxicity concerns identified.",
        "key_evidence": [
            "CIR: safe as used in cosmetic formulations",
            "FDA: approved food additive — GRAS",
            "No reproductive, carcinogenic, or endocrine concerns",
            "Biodegradable — good environmental profile"
        ],
        "concern_flags": [],
        "safety_notes": "Excellent safety profile. Natural origin, biodegradable, non-toxic. One of the safest cosmetic thickeners.",
        "regulatory_status": {
            "EU": "Permitted",
            "US_FDA": "GRAS",
            "EWG_score": 1
        },
        "studies_reviewed": 90,
        "last_updated": "2026-02"
    },

    "tocopherol": {
        "inci_name": "Tocopherol / Tocopheryl Acetate",
        "common_names": ["vitamin e", "tocopheryl acetate", "alpha-tocopherol"],
        "health_score": 78,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 88,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Fat-soluble antioxidant vitamin with strong evidence for skin protection, moisturization, and anti-aging. Both tocopherol and tocopheryl acetate widely studied. Contact sensitization possible in rare cases.",
        "key_evidence": [
            "CIR: safe as used in cosmetic formulations",
            "Multiple clinical studies confirm antioxidant and moisturizing efficacy",
            "No carcinogenic or reproductive concerns at cosmetic concentrations",
            "Rare: contact sensitization reported in some individuals"
        ],
        "concern_flags": ["Rare contact sensitization in susceptible individuals"],
        "safety_notes": "Safe for most people. Rare sensitization possible — patch test if sensitive skin history. Tocopheryl acetate requires conversion to active form — tocopherol is more bioavailable.",
        "regulatory_status": {
            "EU": "Permitted",
            "US_FDA": "Cosmetic ingredient — widely used",
            "EWG_score": 1
        },
        "studies_reviewed": 160,
        "last_updated": "2026-02"
    },

    "cetearyl alcohol": {
        "inci_name": "Cetearyl Alcohol",
        "common_names": ["cetostearyl alcohol", "cetearyl alcohol"],
        "health_score": 76,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 85,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Fatty alcohol used as emulsifier and thickener. Despite the name, not related to drying alcohols. Derived from natural fats. Excellent safety record. Rarely can cause contact allergy in predisposed individuals.",
        "key_evidence": [
            "CIR: safe as used in cosmetic formulations",
            "Not a drying alcohol — fatty alcohol with moisturizing properties",
            "Natural origin — derived from coconut or palm oil",
            "Rare contact allergy reported"
        ],
        "concern_flags": ["Rare contact allergy in predisposed individuals"],
        "safety_notes": "Safe for most people. Not a drying alcohol despite the name. Rare sensitization possible.",
        "regulatory_status": {
            "EU": "Permitted",
            "US_FDA": "Cosmetic ingredient — widely used",
            "EWG_score": 1
        },
        "studies_reviewed": 80,
        "last_updated": "2026-02"
    },

    # ================================================================
    # TIER 2: LIMITED SUPPORT — Adequate evidence, some uncertainty
    # ================================================================

    "retinol": {
        "inci_name": "Retinol",
        "common_names": ["vitamin a", "vitamin a alcohol", "retinoid"],
        "health_score": 68,
        "verdict": "LIMITED SUPPORT",
        "confidence_score": 88,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Well-established anti-aging active with strong evidence for collagen synthesis, cell turnover, and photoaging treatment. Safety concerns at high concentrations — EU has restricted use. Contraindicated in pregnancy.",
        "key_evidence": [
            "Multiple RCTs confirm anti-aging efficacy",
            "EU SCCS: restricted to 0.3% in face products, 0.05% in body products",
            "Pregnancy: teratogenic at high systemic doses — topical caution warranted",
            "Photosensitizing — requires sun protection during use"
        ],
        "concern_flags": [
            "EU concentration restrictions (0.3% face / 0.05% body)",
            "Contraindicated in pregnancy",
            "Photosensitizing — use with SPF"
        ],
        "safety_notes": "Effective but requires careful use. Not for use during pregnancy. Always use SPF when using retinol products. Start low concentration, increase gradually.",
        "regulatory_status": {
            "EU": "Restricted — max 0.3% face products, 0.05% body",
            "US_FDA": "OTC cosmetic ingredient; prescription at higher concentrations",
            "EWG_score": 3
        },
        "studies_reviewed": 400,
        "last_updated": "2026-02"
    },

    "ascorbic acid": {
        "inci_name": "Ascorbic Acid",
        "common_names": ["vitamin c", "l-ascorbic acid", "vitamin c ascorbic acid"],
        "health_score": 72,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 89,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Well-studied antioxidant and brightening agent. Strong evidence for collagen synthesis, hyperpigmentation reduction, and photoprotection. Stability is a key challenge — degrades in light/air exposure.",
        "key_evidence": [
            "Multiple RCTs confirm brightening and anti-aging efficacy",
            "Antioxidant mechanism well established",
            "Safe at cosmetic concentrations (5-20%)",
            "No carcinogenic or reproductive concerns"
        ],
        "concern_flags": ["Stability concerns — degrades rapidly if poorly formulated"],
        "safety_notes": "Safe ingredient. Main issue is formulation stability, not safety. Can cause mild irritation at high concentrations in sensitive skin.",
        "regulatory_status": {
            "EU": "Permitted",
            "US_FDA": "Cosmetic ingredient",
            "EWG_score": 1
        },
        "studies_reviewed": 280,
        "last_updated": "2026-02"
    },

    "salicylic acid": {
        "inci_name": "Salicylic Acid",
        "common_names": ["bha", "beta hydroxy acid"],
        "health_score": 70,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 90,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Oil-soluble BHA with strong clinical evidence for acne, blackheads, and exfoliation. EU restricted concentration. Avoid during pregnancy.",
        "key_evidence": [
            "Multiple RCTs confirm acne treatment efficacy",
            "EU: restricted to 2% leave-on, 3% rinse-off",
            "Pregnancy: avoid — systemic salicylate concerns"
        ],
        "concern_flags": [
            "EU concentration restrictions",
            "Avoid during pregnancy"
        ],
        "safety_notes": "Effective exfoliant. Use at recommended concentrations. Avoid during pregnancy. Can cause irritation — patch test recommended.",
        "regulatory_status": {
            "EU": "Restricted — 2% leave-on",
            "US_FDA": "OTC acne active",
            "EWG_score": 3
        },
        "studies_reviewed": 190,
        "last_updated": "2026-02"
    },

    "dimethicone": {
        "inci_name": "Dimethicone",
        "common_names": ["silicone", "polydimethylsiloxane", "pdms"],
        "health_score": 71,
        "verdict": "WELL SUPPORTED",
        "confidence_score": 85,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Silicone polymer widely used as skin protectant and texture agent. Strong safety record. Some environmental persistence concerns but human safety well established.",
        "key_evidence": [
            "CIR: safe as used in cosmetic formulations",
            "FDA: approved skin protectant",
            "No reproductive, carcinogenic, or systemic toxicity identified"
        ],
        "concern_flags": ["Environmental persistence — not readily biodegradable"],
        "safety_notes": "Safe for human use. Environmental concern due to persistence in waterways. EU monitoring D4/D5 cyclic silicones — dimethicone itself not restricted.",
        "regulatory_status": {
            "EU": "Permitted — D4/D5 restricted in rinse-off, dimethicone not restricted",
            "US_FDA": "Approved skin protectant",
            "EWG_score": 1
        },
        "studies_reviewed": 150,
        "last_updated": "2026-02"
    },

    "sodium benzoate": {
        "inci_name": "Sodium Benzoate",
        "common_names": ["benzoate", "sodium benzoate"],
        "health_score": 65,
        "verdict": "LIMITED SUPPORT",
        "confidence_score": 80,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Preservative used in cosmetics and food. Generally considered safe at low cosmetic concentrations. Some concern when combined with ascorbic acid — can form benzene. EU restricted in children's products.",
        "key_evidence": [
            "CIR: safe as used in cosmetic formulations at low concentrations",
            "Concern: reacts with vitamin C to form benzene — avoid combination",
            "EU: restricted in oral care products and children's cosmetics",
            "Food-grade preservative with long history of use"
        ],
        "concern_flags": [
            "Reacts with ascorbic acid (vitamin C) to form benzene — avoid combination in same formula",
            "EU restrictions in children's cosmetics"
        ],
        "safety_notes": "Safe at low concentrations. Key concern: do not combine with vitamin C in same formula — can produce benzene. Check formulation for both ingredients together.",
        "regulatory_status": {
            "EU": "Restricted in oral products; max 0.5% in cosmetics",
            "US_FDA": "GRAS as food additive; permitted in cosmetics",
            "EWG_score": 3
        },
        "studies_reviewed": 100,
        "last_updated": "2026-02"
    },

    # ================================================================
    # TIER 3: HIGHER RISK — Evidence of concern
    # ================================================================

    "parabens": {
        "inci_name": "Parabens (methylparaben, ethylparaben, propylparaben, butylparaben)",
        "common_names": ["methylparaben", "propylparaben", "butylparaben", "ethylparaben"],
        "health_score": 18,
        "verdict": "HIGHER RISK",
        "confidence_score": 95,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Preservative class with extensive evidence of endocrine disruption. EU has banned butyl and propyl parabens in products for children under 3 and in certain body areas. Detected in breast tissue samples. Associated with reproductive toxicity in multiple studies.",
        "key_evidence": [
            "EU SCCS: butylparaben and propylparaben banned in nappy area products for children under 3",
            "Multiple studies: estrogenic activity confirmed in vitro and in vivo",
            "Systematic review 2023: associated with PCOS and endocrine disruption",
            "Human biomonitoring: detected in breast tissue, urine, blood"
        ],
        "concern_flags": [
            "Endocrine disruption — estrogenic activity confirmed",
            "EU restrictions on butyl/propylparaben",
            "Detected in human breast tissue",
            "Associated with PCOS in systematic review"
        ],
        "safety_notes": "Avoid in products used on children under 3, intimate areas, and damaged skin. Butyl and propylparaben carry highest concern. Methyl and ethylparaben have lower but still documented risk.",
        "regulatory_status": {
            "EU": "Butyl/propylparaben banned in nappy area; all restricted to 0.4% single / 0.8% mixtures",
            "US_FDA": "Currently permitted — under review",
            "EWG_score": 4
        },
        "studies_reviewed": 300,
        "last_updated": "2026-02"
    },

    "fragrance": {
        "inci_name": "Fragrance / Parfum",
        "common_names": ["parfum", "fragrance", "scent", "perfume"],
        "health_score": 28,
        "verdict": "HIGHER RISK",
        "confidence_score": 88,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Fragrance is an umbrella term concealing up to 3,000+ individual chemicals, many of which are known allergens, sensitizers, or endocrine disruptors. Leading cause of cosmetic contact dermatitis. EU mandates disclosure of 26 specific allergens. Major transparency and safety concern.",
        "key_evidence": [
            "EU: 26 fragrance allergens must be individually disclosed above threshold",
            "Contact dermatitis: fragrance is leading cause in cosmetics",
            "Multiple components: potential endocrine disruptors (musks, benzophenones)",
            "Systemic review: sensitization rates increasing with exposure"
        ],
        "concern_flags": [
            "Undisclosed ingredient mixture — up to 3,000 chemicals",
            "Leading cause of cosmetic contact allergen",
            "Contains potential endocrine disruptors",
            "EU requires disclosure of 26 specific allergens"
        ],
        "safety_notes": "Avoid in products for sensitive skin, infants, and around eyes. Look for fragrance-free alternatives. If fragrance is listed, individual components are not disclosed — inherent transparency risk.",
        "regulatory_status": {
            "EU": "26 allergens must be disclosed above threshold concentrations",
            "US_FDA": "Trade secret protection — individual components not required to be disclosed",
            "EWG_score": 8
        },
        "studies_reviewed": 250,
        "last_updated": "2026-02"
    },

    "oxybenzone": {
        "inci_name": "Benzophenone-3",
        "common_names": ["oxybenzone", "bp-3"],
        "health_score": 15,
        "verdict": "HIGHER RISK",
        "confidence_score": 92,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Chemical UV filter with significant endocrine disruption evidence. Banned in Hawaii and several other jurisdictions due to coral reef damage. FDA has requested additional safety data. High skin penetration — detected systemically after topical application.",
        "key_evidence": [
            "FDA 2019: requested additional safety data — not GRAS/GRAE",
            "Hawaii: banned due to coral reef toxicity",
            "Systemic absorption: detected in blood, urine, breast milk after topical use",
            "Endocrine activity: estrogenic and androgenic effects in studies"
        ],
        "concern_flags": [
            "FDA safety data request — not confirmed safe",
            "Systemic absorption confirmed",
            "Endocrine disruption evidence",
            "Environmental: toxic to coral reefs — banned in Hawaii"
        ],
        "safety_notes": "Avoid — use mineral sunscreens (zinc oxide, titanium dioxide) as alternatives. Particularly concerning for children and pregnant women.",
        "regulatory_status": {
            "EU": "Permitted up to 6% but under review",
            "US_FDA": "Not GRAS/GRAE — additional safety data requested",
            "EWG_score": 8
        },
        "studies_reviewed": 180,
        "last_updated": "2026-02"
    },

    "triclosan": {
        "inci_name": "Triclosan",
        "common_names": ["triclosan"],
        "health_score": 10,
        "verdict": "HIGHER RISK",
        "confidence_score": 96,
        "confidence_label": "VERY HIGH",
        "evidence_summary": "Antimicrobial agent banned from OTC antiseptic washes by FDA in 2016. Evidence of endocrine disruption, antibiotic resistance contribution, and environmental persistence. Still permitted in some cosmetic categories.",
        "key_evidence": [
            "FDA 2016: banned from OTC antiseptic hand and body washes",
            "Endocrine disruption: thyroid hormone interference confirmed",
            "Antibiotic resistance: contributes to bacterial resistance",
            "Environmental: toxic to aquatic organisms"
        ],
        "concern_flags": [
            "FDA banned from antiseptic washes",
            "Thyroid hormone disruption",
            "Contributes to antibiotic resistance",
            "Environmental toxicity"
        ],
        "safety_notes": "Avoid. FDA has banned from key product categories. Better antimicrobial alternatives exist.",
        "regulatory_status": {
            "EU": "Banned in most cosmetic categories; permitted in toothpaste up to 0.3%",
            "US_FDA": "Banned from OTC antiseptic washes",
            "EWG_score": 7
        },
        "studies_reviewed": 200,
        "last_updated": "2026-02"
    },
}

# Quick lookup function
def kb_lookup(ingredient):
    """Check if ingredient is in curated knowledge base."""
    ingredient_lower = ingredient.lower().strip()

    # Direct match
    if ingredient_lower in NOURA_KNOWLEDGE_BASE:
        return NOURA_KNOWLEDGE_BASE[ingredient_lower]

    # Check common names
    for key, data in NOURA_KNOWLEDGE_BASE.items():
        if ingredient_lower in data.get("common_names", []):
            return data

    # Partial match
    for key in NOURA_KNOWLEDGE_BASE:
        if key in ingredient_lower or ingredient_lower in key:
            return NOURA_KNOWLEDGE_BASE[key]

    return None

print(f"NOURA Knowledge Base loaded — {len(NOURA_KNOWLEDGE_BASE)} ingredients")
print()
print("Curated entries:")
for key, data in NOURA_KNOWLEDGE_BASE.items():
    print(f"  {data['inci_name']:<45} {data['verdict']:<18} {data['health_score']}/100")

NOURA Knowledge Base loaded — 17 ingredients

Curated entries:
  Niacinamide                                   WELL SUPPORTED     82/100
  Sodium Hyaluronate / Hyaluronic Acid          WELL SUPPORTED     88/100
  Glycerin                                      WELL SUPPORTED     85/100
  Zinc Oxide                                    WELL SUPPORTED     79/100
  Citric Acid                                   WELL SUPPORTED     82/100
  Xanthan Gum                                   WELL SUPPORTED     84/100
  Tocopherol / Tocopheryl Acetate               WELL SUPPORTED     78/100
  Cetearyl Alcohol                              WELL SUPPORTED     76/100
  Retinol                                       LIMITED SUPPORT    68/100
  Ascorbic Acid                                 WELL SUPPORTED     72/100
  Salicylic Acid                                WELL SUPPORTED     70/100
  Dimethicone                                   WELL SUPPORTED     71/100
  Sodium Benzoate                               L

In [27]:
# NOURA - Cell 15: Knowledge base integrated pipeline

def noura_evaluate_v3(ingredient, category="skincare"):
    """
    V3 pipeline: checks knowledge base first, falls back to live PubMed search.
    KB entries are faster, deeper, and more accurate for known ingredients.
    """
    # Step 1: Check knowledge base first
    kb_entry = kb_lookup(ingredient)

    if kb_entry:
        # Serve from curated knowledge base
        source = "NOURA Knowledge Base"
        health_score = kb_entry["health_score"]
        verdict = kb_entry["verdict"]
        confidence_score = kb_entry["confidence_score"]
        confidence_label = kb_entry["confidence_label"]
        studies_reviewed = kb_entry["studies_reviewed"]
        concern_flags = kb_entry["concern_flags"]
        safety_notes = kb_entry["safety_notes"]
        evidence_summary = kb_entry["evidence_summary"]
        key_evidence = kb_entry["key_evidence"]
        regulatory = kb_entry["regulatory_status"]
        flag = (" | ".join(concern_flags)) if concern_flags else None

        print(f"NOURA Health Assessment: {ingredient.title()} ({category})")
        print("=" * 65)
        print(f"Health Score:  {health_score}/100")
        print(f"Verdict:       {verdict}")
        print(f"Confidence:    {confidence_score}/100 — {confidence_label}")
        print(f"Source:        {source} ({studies_reviewed} studies reviewed)")
        print()
        print(f"Summary: {evidence_summary}")
        print()

        if concern_flags:
            print("Concern flags:")
            for f in concern_flags:
                print(f"  ⚠ {f}")
            print()

        print(f"Safety notes: {safety_notes}")
        print()

        print("Key evidence:")
        for e in key_evidence:
            print(f"  - {e}")
        print()

        print("Regulatory status:")
        for reg, status in regulatory.items():
            print(f"  {reg}: {status}")
        print()

        print("What would you like next?")
        print("  - View full source links")
        print("  - Compare with alternatives")
        print("  - Check live PubMed for latest studies")
        print("  - Assess another ingredient")
        print("=" * 65)
        print()

        return {
            "health_score": health_score,
            "verdict": verdict,
            "confidence_score": confidence_score,
            "confidence_label": confidence_label,
            "source": "knowledge_base",
            "studies_reviewed": studies_reviewed
        }

    else:
        # Fall back to live PubMed search
        print(f"[{ingredient}] not in knowledge base — running live PubMed search...")
        print()
        return noura_evaluate_v2(ingredient, category)


def noura_scan_product_v2(product_name, raw_label, category="skincare",
                           max_ingredients=15, skip_water=True):
    """
    V2 product scanner — uses KB for known ingredients, live search for unknowns.
    Much faster when most ingredients are in the knowledge base.
    """
    print(f"NOURA Product Scan: {product_name}")
    print("=" * 65)

    ingredients = parse_ingredient_list(raw_label)

    if skip_water:
        ingredients = [i for i in ingredients if i not in ["water", "aqua"]]

    print(f"Ingredients detected: {len(ingredients)}")
    print(f"Evaluating top {min(max_ingredients, len(ingredients))}...")
    print()

    ingredients_to_evaluate = ingredients[:max_ingredients]
    results = []
    kb_hits = 0
    live_hits = 0

    for i, ingredient in enumerate(ingredients_to_evaluate):
        kb_entry = kb_lookup(ingredient)

        if kb_entry:
            kb_hits += 1
            source = "KB"
            health_score = kb_entry["health_score"]
            verdict = kb_entry["verdict"]
            confidence_score = kb_entry["confidence_score"]
            confidence_label = kb_entry["confidence_label"]
            flag = (" | ".join(kb_entry["concern_flags"])) if kb_entry["concern_flags"] else None
            studies = kb_entry["studies_reviewed"]
        else:
            live_hits += 1
            print(f"  [{i+1}/{len(ingredients_to_evaluate)}] Live search: {ingredient}...")
            pubmed_results = search_pubmed_normalized(ingredient, max_results=30)

            evaluated = []
            direction_count = {"SAFETY": 0, "CONCERN": 0, "NEUTRAL": 0}

            for study in pubmed_results.get("studies", []):
                abstract = study.get("abstract", "")
                source_type = classify_evidence_type(study["title"], abstract)
                direction = classify_evidence_direction(study["title"], abstract)
                ev = evaluate_evidence(source_type)
                ev["study_title"] = study["title"][:80]
                ev["year"] = study["year"]
                ev["direction"] = direction
                ev["sample_size"] = study.get("sample_size", None)
                ev["abstract"] = abstract[:500]
                evaluated.append(ev)
                direction_count[direction] += 1

            score_result = calculate_direction_aware_score(evaluated)
            conf = calculate_confidence(evaluated, direction_count,
                                        pubmed_results["studies_found"])
            source = "Live"
            health_score = score_result["score"]
            verdict = score_result["verdict"]
            confidence_score = conf["confidence_score"]
            confidence_label = conf["confidence_label"]
            flag = score_result["flag"]
            studies = pubmed_results["studies_found"]
            time.sleep(2)

        results.append({
            "ingredient": ingredient,
            "health_score": health_score,
            "verdict": verdict,
            "confidence_score": confidence_score,
            "confidence_label": confidence_label,
            "source": source,
            "studies": studies,
            "flag": flag
        })

    # Display results
    print()
    print("=" * 65)
    print(f"NOURA PRODUCT SCAN: {product_name.upper()}")
    print("=" * 65)
    print(f"{'Ingredient':<28} {'Score':>6} {'Verdict':<16} {'Confidence':<10} {'Src'}")
    print("-" * 65)

    results_sorted = sorted(results,
                            key=lambda x: (x["health_score"] or 0),
                            reverse=True)

    for r in results_sorted:
        score_display = f"{r['health_score']}" if r['health_score'] is not None else "N/A"
        flag_marker = " ⚠" if r["flag"] else ""
        print(f"{r['ingredient']:<28} {score_display:>6} {r['verdict']:<16} "
              f"{r['confidence_label']:<10} {r['source']}{flag_marker}")

    print("=" * 65)

    # Product summary
    scored = [r for r in results if r["health_score"] is not None]
    if scored:
        avg_score = round(sum(r["health_score"] for r in scored) / len(scored), 1)
        higher_risk_count = len([r for r in results if r["verdict"] == "HIGHER RISK"])

        print()
        print(f"Product Average Score:    {avg_score}/100")
        print(f"Higher Risk Ingredients:  {higher_risk_count}")
        print(f"KB lookups (instant):     {kb_hits}")
        print(f"Live searches:            {live_hits}")

        if higher_risk_count == 0 and avg_score >= 70:
            product_verdict = "CLEAN FORMULATION"
        elif higher_risk_count >= 2 or avg_score < 40:
            product_verdict = "FORMULATION CONCERNS"
        else:
            product_verdict = "MIXED FORMULATION"

        print(f"Product Verdict:          {product_verdict}")

    print()
    flagged = [r for r in results if r["flag"]]
    if flagged:
        print("FLAGS:")
        for r in flagged:
            print(f"  ⚠ {r['ingredient']}: {r['flag'][:100]}")

    print("=" * 65)
    return results_sorted


# Test V3 with same moisturizer label
test_label = """
Aqua, Glycerin, Niacinamide, Cetearyl Alcohol, Dimethicone,
Phenoxyethanol, Sodium Hyaluronate, Tocopheryl Acetate,
Carbomer, Sodium PCA, Fragrance, Parabens,
Disodium EDTA, Xanthan Gum, Citric Acid
"""

noura_scan_product_v2("Test Moisturizer V3", test_label, max_ingredients=12)

NOURA Product Scan: Test Moisturizer V3
Ingredients detected: 14
Evaluating top 12...

  [3/12] Live search: cetearyl alcohol...
  [5/12] Live search: phenoxyethanol...
  [7/12] Live search: tocopheryl acetate...
  [8/12] Live search: carbomer...
  [9/12] Live search: sodium pca...
  [12/12] Live search: disodium edta...

NOURA PRODUCT SCAN: TEST MOISTURIZER V3
Ingredient                    Score Verdict          Confidence Src
-----------------------------------------------------------------
sodium hyaluronate               88 WELL SUPPORTED   VERY HIGH  KB
glycerin                         85 WELL SUPPORTED   VERY HIGH  KB
niacinamide                      82 WELL SUPPORTED   VERY HIGH  KB
dimethicone                      71 WELL SUPPORTED   VERY HIGH  KB ⚠
tocopheryl acetate             70.2 LIMITED SUPPORT  VERY HIGH  Live
phenoxyethanol                 57.4 LIMITED SUPPORT  VERY HIGH  Live
sodium pca                     54.2 LIMITED SUPPORT  MODERATE   Live
carbomer                 

[{'ingredient': 'sodium hyaluronate',
  'health_score': 88,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 97,
  'confidence_label': 'VERY HIGH',
  'source': 'KB',
  'studies': 350,
  'flag': None},
 {'ingredient': 'glycerin',
  'health_score': 85,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 95,
  'confidence_label': 'VERY HIGH',
  'source': 'KB',
  'studies': 180,
  'flag': None},
 {'ingredient': 'niacinamide',
  'health_score': 82,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 96,
  'confidence_label': 'VERY HIGH',
  'source': 'KB',
  'studies': 200,
  'flag': None},
 {'ingredient': 'dimethicone',
  'health_score': 71,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 85,
  'confidence_label': 'VERY HIGH',
  'source': 'KB',
  'studies': 150,
  'flag': 'Environmental persistence — not readily biodegradable'},
 {'ingredient': 'tocopheryl acetate',
  'health_score': 70.2,
  'verdict': 'LIMITED SUPPORT',
  'confidence_score': 83,
  'confidence_label': 'VERY HIGH'

In [30]:
!pip install anthropic -q

# NOURA - Cell 16: Claude API evidence classifier
# Replaces keyword matching with LLM reasoning for direction classification

import anthropic

def classify_direction_with_claude(title, abstract, ingredient):
    """
    Uses Claude to determine if a study supports SAFETY or raises CONCERN
    for a specific ingredient. Far more accurate than keyword matching.
    """
    if not abstract:
        # No abstract — fall back to keyword classifier
        return classify_evidence_direction(title, "")

    client = anthropic.Anthropic()

    prompt = f"""You are a scientific evidence classifier for a cosmetic ingredient safety system.

Ingredient being evaluated: {ingredient}

Study title: {title}

Study abstract: {abstract}

Task: Determine if this study's findings support the SAFETY of {ingredient} in cosmetic use, raise a CONCERN about its safety, or are NEUTRAL (mechanistic, descriptive, or inconclusive).

Rules:
- SAFETY: Study shows the ingredient is safe, effective, well-tolerated, or beneficial
- CONCERN: Study shows the ingredient causes harm, has toxic effects, disrupts hormones, or raises risk signals specifically attributable to this ingredient
- NEUTRAL: Study describes mechanisms, delivery systems, or combinations without clear safety direction; or the concern is about a different ingredient in the same study

Important: If the study mentions the ingredient being used TO TREAT a disease (e.g. cancer treatment), classify as SAFETY not CONCERN.

Respond with exactly one word: SAFETY, CONCERN, or NEUTRAL"""

    try:
        response = client.messages.create(
            model="claude-haiku-4-5-20251001",
            max_tokens=10,
            messages=[{"role": "user", "content": prompt}]
        )
        result = response.content[0].text.strip().upper()
        if result in ["SAFETY", "CONCERN", "NEUTRAL"]:
            return result
        else:
            return classify_evidence_direction(title, abstract)
    except Exception as e:
        # Fall back to keyword classifier if API fails
        return classify_evidence_direction(title, abstract)


def classify_evidence_direction_smart(title, abstract="", ingredient="ingredient",
                                       use_claude=True):
    """
    Smart classifier: uses Claude when abstract is available,
    falls back to keyword matching otherwise.
    """
    if use_claude and abstract and len(abstract) > 50:
        return classify_direction_with_claude(title, abstract, ingredient)
    else:
        return classify_evidence_direction(title, abstract)


# Test on our known cases
print("=== CLAUDE CLASSIFICATION TEST ===")
print()

test_cases = [
    {
        "ingredient": "niacinamide",
        "title": "Niacinamide: a review on dermal delivery strategies and clinical evidence.",
        "abstract": "Niacinamide, an active form of vitamin B3, is recognised for its significant dermal benefits including skin brightening, anti-ageing properties and the protection of the skin barrier. Its widespread incorporation into cosmetic products is attributed to its safety profile and proven efficacy. Recently, topical niacinamide has also been explored for other pharmaceutical applications, including skin cancers."
    },
    {
        "ingredient": "parabens",
        "title": "Polycystic Ovary Syndrome and Endocrine Disruptors (Bisphenols, Parabens, and Triclosan)-A Systematic Review.",
        "abstract": "Exposure to endocrine disrupting chemicals (EDCs) can result in alterations of the female reproductive system, including polycystic ovary syndrome (PCOS). The aim of this review was to summarize the knowledge about the association of EDCs with PCOS. We evaluated the association of PCOS with bisphenols, parabens and triclosan and found significant associations."
    },
    {
        "ingredient": "niacinamide",
        "title": "Niacinamide enhances cathelicidin mediated SARS-CoV-2 membrane disruption.",
        "abstract": "Niacinamide was found to enhance the antimicrobial peptide cathelicidin's ability to disrupt the SARS-CoV-2 viral membrane, suggesting a potential therapeutic application against COVID-19."
    },
    {
        "ingredient": "parabens",
        "title": "Parabens disrupt non-canonical inflammasome activation.",
        "abstract": "Parabens are synthetic chemicals widely used as preservatives. Study of possible health hazards has been undertaken due to frequent exposure. We elucidated the effect of parabens on inflammasome induction of inflammatory responses in innate immunity."
    }
]

for tc in test_cases:
    keyword_result = classify_evidence_direction(tc["title"], tc["abstract"])
    claude_result = classify_direction_with_claude(tc["title"], tc["abstract"], tc["ingredient"])
    match = "✓" if keyword_result == claude_result else "✗ DIFFERS"
    print(f"Ingredient: {tc['ingredient']}")
    print(f"Title: {tc['title'][:60]}...")
    print(f"Keyword: {keyword_result}  |  Claude: {claude_result}  {match}")
    print()

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/455.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━[0m [32m307.2/455.2 kB[0m [31m9.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m455.2/455.2 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25h=== CLAUDE CLASSIFICATION TEST ===

Ingredient: niacinamide
Title: Niacinamide: a review on dermal delivery strategies and clin...
Keyword: SAFETY  |  Claude: SAFETY  ✓

Ingredient: parabens
Title: Polycystic Ovary Syndrome and Endocrine Disruptors (Bispheno...
Keyword: CONCERN  |  Claude: CONCERN  ✓

Ingredient: niacinamide
Title: Niacinamide enhances cathelicidin mediated SARS-CoV-2 membra...
Keyword: SAFETY  |  Claude: SAFETY  ✓

Ingredient: parabens
Title: Parabens disrupt non-canonical inflammasome activation....
Keyword: NEUTRAL  |  Claude: NEUTRAL  ✓



In [35]:
# NOURA - Cell 17: Full pipeline with Claude classification + interaction engine

def noura_scan_product_v3(product_name, raw_label, category="skincare",
                           max_ingredients=12, skip_water=True, use_claude=True):
    """
    V3 product scanner with Claude-powered classification and interaction detection.
    KB for known ingredients, Claude-classified live search for unknowns.
    """
    print(f"NOURA Product Scan: {product_name}")
    print(f"Classification: {'Claude API' if use_claude else 'Keyword matching'}")
    print("=" * 65)

    ingredients = parse_ingredient_list(raw_label)
    if skip_water:
        ingredients = [i for i in ingredients if i not in ["water", "aqua"]]

    print(f"Ingredients detected: {len(ingredients)}")
    print(f"Evaluating top {min(max_ingredients, len(ingredients))}...")
    print()

    ingredients_to_evaluate = ingredients[:max_ingredients]
    results = []
    kb_hits = 0
    live_hits = 0

    for i, ingredient in enumerate(ingredients_to_evaluate):
        kb_entry = kb_lookup(ingredient)

        if kb_entry:
            kb_hits += 1
            results.append({
                "ingredient": ingredient,
                "health_score": kb_entry["health_score"],
                "verdict": kb_entry["verdict"],
                "confidence_score": kb_entry["confidence_score"],
                "confidence_label": kb_entry["confidence_label"],
                "source": "KB",
                "studies": kb_entry["studies_reviewed"],
                "flag": (" | ".join(kb_entry["concern_flags"])) if kb_entry["concern_flags"] else None
            })
        else:
            live_hits += 1
            print(f"  [{i+1}/{len(ingredients_to_evaluate)}] Live search: {ingredient}...")
            pubmed_results = search_pubmed_normalized(ingredient, max_results=30)

            evaluated = []
            direction_count = {"SAFETY": 0, "CONCERN": 0, "NEUTRAL": 0}

            for study in pubmed_results.get("studies", []):
                abstract = study.get("abstract", "")
                source_type = classify_evidence_type(study["title"], abstract)
                direction = classify_evidence_direction_smart(
                    study["title"], abstract, ingredient, use_claude=use_claude
                )
                ev = evaluate_evidence(source_type)
                ev["study_title"] = study["title"][:80]
                ev["year"] = study["year"]
                ev["direction"] = direction
                ev["sample_size"] = study.get("sample_size", None)
                ev["abstract"] = abstract[:500]
                evaluated.append(ev)
                direction_count[direction] += 1

            score_result = calculate_direction_aware_score(evaluated)
            conf = calculate_confidence(evaluated, direction_count,
                                        pubmed_results["studies_found"])

            results.append({
                "ingredient": ingredient,
                "health_score": score_result["score"],
                "verdict": score_result["verdict"],
                "confidence_score": conf["confidence_score"],
                "confidence_label": conf["confidence_label"],
                "source": "Live+Claude" if use_claude else "Live",
                "studies": pubmed_results["studies_found"],
                "flag": score_result["flag"]
            })
            time.sleep(1)

    # Display results table
    print()
    print("=" * 65)
    print(f"NOURA PRODUCT SCAN: {product_name.upper()}")
    print("=" * 65)
    print(f"{'Ingredient':<28} {'Score':>6} {'Verdict':<16} {'Confidence':<10} {'Src'}")
    print("-" * 65)

    results_sorted = sorted(results,
                            key=lambda x: (x["health_score"] or 0),
                            reverse=True)

    for r in results_sorted:
        score_display = f"{r['health_score']}" if r['health_score'] is not None else "N/A"
        flag_marker = " ⚠" if r["flag"] else ""
        print(f"{r['ingredient']:<28} {score_display:>6} {r['verdict']:<16} "
              f"{r['confidence_label']:<10} {r['source']}{flag_marker}")

    print("=" * 65)

    # Product summary
    scored = [r for r in results if r["health_score"] is not None]
    if scored:
        avg_score = round(sum(r["health_score"] for r in scored) / len(scored), 1)
        higher_risk_count = len([r for r in results if r["verdict"] == "HIGHER RISK"])

        print()
        print(f"Product Average Score:    {avg_score}/100")
        print(f"Higher Risk Ingredients:  {higher_risk_count}")
        print(f"KB lookups (instant):     {kb_hits}")
        print(f"Live + Claude searches:   {live_hits}")

        if higher_risk_count == 0 and avg_score >= 70:
            product_verdict = "CLEAN FORMULATION"
        elif higher_risk_count >= 2 or avg_score < 40:
            product_verdict = "FORMULATION CONCERNS"
        else:
            product_verdict = "MIXED FORMULATION"

        print(f"Product Verdict:          {product_verdict}")

    # Ingredient flags
    print()
    flagged = [r for r in results if r["flag"]]
    if flagged:
        print("INGREDIENT FLAGS:")
        for r in flagged:
            print(f"  ⚠ {r['ingredient']}: {r['flag'][:120]}")

    # Interaction analysis
    all_ingredient_names = [r["ingredient"] for r in results]
    interactions = check_formula_interactions(all_ingredient_names)
    if interactions:
        # Deduplicate — remove exact duplicate descriptions
        seen_descriptions = set()
        unique_interactions = []
        for interaction in interactions:
            key = interaction["description"][:60]
            if key not in seen_descriptions:
                seen_descriptions.add(key)
                unique_interactions.append(interaction)

        print()
        display_interactions(unique_interactions, product_name)
    else:
        print()
        print("✓ No known ingredient interactions detected.")

    print("=" * 65)
    return results_sorted


# Test 1: Clean beauty serum
clean_label = """
Aqua, Aloe Barbadensis Leaf Juice, Glycerin, Niacinamide,
Sodium Hyaluronate, Ascorbic Acid, Tocopherol, Zinc Oxide,
Centella Asiatica Extract, Bakuchiol, Xanthan Gum,
Citric Acid, Sodium Benzoate
"""

noura_scan_product_v3("Clean Beauty Serum", clean_label, max_ingredients=12)

print()
print()

# Test 2: Concerning formula
concerning_label = """
Aqua, Glycerin, Retinol, Glycolic Acid, Ascorbic Acid,
Niacinamide, Sodium Benzoate, Parabens, Fragrance,
Salicylic Acid, Dimethicone, Xanthan Gum
"""

noura_scan_product_v3("Concerning Formula Test", concerning_label, max_ingredients=12)

NOURA Product Scan: Clean Beauty Serum
Classification: Claude API
Ingredients detected: 12
Evaluating top 12...

  [1/12] Live search: aloe barbadensis leaf juice...
  [8/12] Live search: centella asiatica extract...
  [9/12] Live search: bakuchiol...

NOURA PRODUCT SCAN: CLEAN BEAUTY SERUM
Ingredient                    Score Verdict          Confidence Src
-----------------------------------------------------------------
sodium hyaluronate               88 WELL SUPPORTED   VERY HIGH  KB
glycerin                         85 WELL SUPPORTED   VERY HIGH  KB
xanthan gum                      84 WELL SUPPORTED   VERY HIGH  KB
niacinamide                      82 WELL SUPPORTED   VERY HIGH  KB
citric acid                      82 WELL SUPPORTED   VERY HIGH  KB ⚠
zinc oxide                       79 WELL SUPPORTED   VERY HIGH  KB ⚠
tocopherol                       78 WELL SUPPORTED   VERY HIGH  KB ⚠
aloe barbadensis leaf juice    73.4 WELL SUPPORTED   MODERATE   Live+Claude
ascorbic acid          

[{'ingredient': 'glycerin',
  'health_score': 85,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 95,
  'confidence_label': 'VERY HIGH',
  'source': 'KB',
  'studies': 180,
  'flag': None},
 {'ingredient': 'xanthan gum',
  'health_score': 84,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 88,
  'confidence_label': 'VERY HIGH',
  'source': 'KB',
  'studies': 90,
  'flag': None},
 {'ingredient': 'niacinamide',
  'health_score': 82,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 96,
  'confidence_label': 'VERY HIGH',
  'source': 'KB',
  'studies': 200,
  'flag': None},
 {'ingredient': 'ascorbic acid',
  'health_score': 72,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 89,
  'confidence_label': 'VERY HIGH',
  'source': 'KB',
  'studies': 280,
  'flag': 'Stability concerns — degrades rapidly if poorly formulated'},
 {'ingredient': 'dimethicone',
  'health_score': 71,
  'verdict': 'WELL SUPPORTED',
  'confidence_score': 85,
  'confidence_label': 'VERY HIGH',
  'source

In [34]:
# NOURA - Cell 18: Formula interaction engine

KNOWN_INTERACTIONS = {

    # DANGEROUS combinations
    "sodium benzoate + ascorbic acid": {
        "ingredients": ["sodium benzoate", "ascorbic acid"],
        "severity": "HIGH",
        "interaction_type": "chemical reaction",
        "description": "Sodium benzoate reacts with ascorbic acid (vitamin C) in the presence of light and heat to form benzene, a known carcinogen.",
        "recommendation": "Avoid this combination. Use alternative preservatives or vitamin C derivatives that don't react."
    },

    "retinol + aha": {
        "ingredients": ["retinol", "glycolic acid", "lactic acid", "mandelic acid"],
        "severity": "MODERATE",
        "interaction_type": "pH conflict + irritation",
        "description": "AHAs work at low pH (3.0-3.5) which destabilizes retinol and increases skin irritation risk significantly.",
        "recommendation": "Use retinol and AHAs at separate times — AHAs in the morning, retinol at night. Never layer simultaneously."
    },

    "retinol + vitamin c": {
        "ingredients": ["retinol", "ascorbic acid"],
        "severity": "MODERATE",
        "interaction_type": "pH conflict",
        "description": "Vitamin C (ascorbic acid) requires acidic pH (below 3.5) while retinol is most stable at neutral pH. Combining destabilizes both actives.",
        "recommendation": "Use vitamin C in the morning routine, retinol at night."
    },

    "niacinamide + vitamin c": {
        "ingredients": ["niacinamide", "ascorbic acid"],
        "severity": "LOW",
        "interaction_type": "efficacy reduction",
        "description": "High concentrations of niacinamide and ascorbic acid can form niacin which may cause temporary skin flushing. Modern formulations at typical concentrations show minimal interaction.",
        "recommendation": "Use at typical cosmetic concentrations (under 10% each). Minimal concern in well-formulated products."
    },

    "benzoyl peroxide + retinol": {
        "ingredients": ["benzoyl peroxide", "retinol"],
        "severity": "HIGH",
        "interaction_type": "oxidation",
        "description": "Benzoyl peroxide oxidizes and degrades retinol, rendering both ineffective and potentially generating irritating byproducts.",
        "recommendation": "Never combine. Use in completely separate routines."
    },

    "aha + bha simultaneous": {
        "ingredients": ["glycolic acid", "salicylic acid"],
        "severity": "MODERATE",
        "interaction_type": "over-exfoliation",
        "description": "Combining AHA and BHA acids simultaneously increases risk of over-exfoliation, barrier damage, and sensitization.",
        "recommendation": "Alternate use — AHAs and BHAs on different days, or use pre-formulated combinations at lower concentrations."
    },

    "vitamin c + niacinamide high dose": {
        "ingredients": ["ascorbic acid", "niacinamide"],
        "severity": "LOW",
        "interaction_type": "potential flushing",
        "description": "At high concentrations both can form niacin causing temporary flushing. At standard cosmetic concentrations the risk is minimal.",
        "recommendation": "Keep both under 10% concentration. Minimal concern in typical formulations."
    },

    "copper peptides + vitamin c": {
        "ingredients": ["copper peptides", "ascorbic acid"],
        "severity": "MODERATE",
        "interaction_type": "oxidation",
        "description": "Vitamin C can oxidize copper peptides, reducing efficacy of both. Copper can also pro-oxidize vitamin C.",
        "recommendation": "Use in separate routines — copper peptides at night, vitamin C in the morning."
    },

    "parabens + fragrance": {
        "ingredients": ["parabens", "fragrance"],
        "severity": "HIGH",
        "interaction_type": "cumulative endocrine risk",
        "description": "Both parabens and certain fragrance components are known endocrine disruptors. Combined exposure increases total endocrine disruption burden.",
        "recommendation": "Avoid products containing both. Seek paraben-free, fragrance-free alternatives."
    },

    "triclosan + alcohol": {
        "ingredients": ["triclosan", "alcohol denat"],
        "severity": "MODERATE",
        "interaction_type": "absorption enhancement",
        "description": "Alcohol enhances skin penetration of triclosan, increasing systemic exposure to this endocrine disruptor.",
        "recommendation": "Avoid triclosan-containing products entirely. Use safer antimicrobial alternatives."
    },
}


def check_formula_interactions(ingredients):
    """
    Checks a list of ingredients for known dangerous interactions.
    Returns list of interactions found, sorted by severity.
    """
    ingredients_lower = [i.lower().strip() for i in ingredients]

    # Also expand via normalizer — catch vitamin c = ascorbic acid etc
    expanded = set(ingredients_lower)
    for ing in ingredients_lower:
        search_terms = normalize_ingredient(ing)
        expanded.update(search_terms)

    found_interactions = []

    for interaction_key, interaction_data in KNOWN_INTERACTIONS.items():
        involved = interaction_data["ingredients"]

        # Check how many of the interaction ingredients are present
        matches = []
        for involved_ing in involved:
            for formula_ing in expanded:
                if involved_ing in formula_ing or formula_ing in involved_ing:
                    matches.append(involved_ing)
                    break

        # Need at least 2 ingredients to have an interaction
        if len(matches) >= 2:
            found_interactions.append({
                "interaction": interaction_key,
                "severity": interaction_data["severity"],
                "type": interaction_data["interaction_type"],
                "description": interaction_data["description"],
                "recommendation": interaction_data["recommendation"],
                "matched_ingredients": matches
            })

    # Sort by severity
    severity_order = {"HIGH": 0, "MODERATE": 1, "LOW": 2}
    found_interactions.sort(key=lambda x: severity_order.get(x["severity"], 3))

    return found_interactions


def display_interactions(interactions, product_name=""):
    """Display interaction report."""
    if not interactions:
        print("✓ No known ingredient interactions detected.")
        return

    high = [i for i in interactions if i["severity"] == "HIGH"]
    moderate = [i for i in interactions if i["severity"] == "MODERATE"]
    low = [i for i in interactions if i["severity"] == "LOW"]

    print(f"NOURA INTERACTION ANALYSIS{': ' + product_name if product_name else ''}")
    print("=" * 65)
    print(f"Interactions found: {len(interactions)} "
          f"({len(high)} high | {len(moderate)} moderate | {len(low)} low)")
    print()

    for interaction in interactions:
        severity = interaction["severity"]
        marker = "🔴" if severity == "HIGH" else "🟡" if severity == "MODERATE" else "🟢"

        print(f"{marker} [{severity}] {interaction['type'].upper()}")
        print(f"   Ingredients: {' + '.join(interaction['matched_ingredients'])}")
        print(f"   {interaction['description']}")
        print(f"   → {interaction['recommendation']}")
        print()

    print("=" * 65)


# Test on our clean beauty serum
print("Testing interaction engine on Clean Beauty Serum...")
print()

clean_serum_ingredients = [
    "aloe barbadensis leaf juice", "glycerin", "niacinamide",
    "sodium hyaluronate", "ascorbic acid", "tocopherol", "zinc oxide",
    "centella asiatica extract", "bakuchiol", "xanthan gum",
    "citric acid", "sodium benzoate"
]

interactions = check_formula_interactions(clean_serum_ingredients)
display_interactions(interactions, "Clean Beauty Serum")

print()
print("Testing on concerning formula...")
print()

concerning_ingredients = [
    "aqua", "glycerin", "retinol", "glycolic acid",
    "ascorbic acid", "niacinamide", "sodium benzoate",
    "parabens", "fragrance", "salicylic acid"
]

interactions2 = check_formula_interactions(concerning_ingredients)
display_interactions(interactions2, "Concerning Formula")

Testing interaction engine on Clean Beauty Serum...

NOURA INTERACTION ANALYSIS: Clean Beauty Serum
Interactions found: 3 (1 high | 0 moderate | 2 low)

🔴 [HIGH] CHEMICAL REACTION
   Ingredients: sodium benzoate + ascorbic acid
   Sodium benzoate reacts with ascorbic acid (vitamin C) in the presence of light and heat to form benzene, a known carcinogen.
   → Avoid this combination. Use alternative preservatives or vitamin C derivatives that don't react.

🟢 [LOW] EFFICACY REDUCTION
   Ingredients: niacinamide + ascorbic acid
   High concentrations of niacinamide and ascorbic acid can form niacin which may cause temporary skin flushing. Modern formulations at typical concentrations show minimal interaction.
   → Use at typical cosmetic concentrations (under 10% each). Minimal concern in well-formulated products.

🟢 [LOW] POTENTIAL FLUSHING
   Ingredients: ascorbic acid + niacinamide
   At high concentrations both can form niacin causing temporary flushing. At standard cosmetic concentrat

In [36]:
# NOURA - Cell 19: API wrapper — clean callable interface
# This is the enterprise-ready packaging layer
# Version 1.0

import json
from datetime import datetime

def noura_api(request_type, **kwargs):
    """
    NOURA Public API — single entry point for all evaluations.

    Request types:
    - "ingredient"  : evaluate a single ingredient
    - "batch"       : evaluate multiple ingredients
    - "product"     : scan a full product label
    - "compare"     : compare two ingredients head to head
    - "interaction" : check interactions between ingredients

    Returns: structured JSON response
    """

    timestamp = datetime.now().isoformat()

    # ================================================================
    # INGREDIENT EVALUATION
    # ================================================================
    if request_type == "ingredient":
        ingredient = kwargs.get("ingredient", "")
        category = kwargs.get("category", "skincare")
        use_claude = kwargs.get("use_claude", True)

        if not ingredient:
            return {"error": "ingredient parameter required", "status": 400}

        kb_entry = kb_lookup(ingredient)

        if kb_entry:
            return {
                "status": 200,
                "request_type": "ingredient",
                "timestamp": timestamp,
                "ingredient": ingredient,
                "inci_name": kb_entry["inci_name"],
                "health_score": kb_entry["health_score"],
                "verdict": kb_entry["verdict"],
                "confidence_score": kb_entry["confidence_score"],
                "confidence_label": kb_entry["confidence_label"],
                "evidence_source": "NOURA Knowledge Base",
                "studies_reviewed": kb_entry["studies_reviewed"],
                "evidence_summary": kb_entry["evidence_summary"],
                "key_evidence": kb_entry["key_evidence"],
                "concern_flags": kb_entry["concern_flags"],
                "safety_notes": kb_entry["safety_notes"],
                "regulatory_status": kb_entry["regulatory_status"],
                "last_updated": kb_entry["last_updated"]
            }
        else:
            # Live search
            pubmed_results = search_pubmed_normalized(ingredient, max_results=50)
            evaluated = []
            direction_count = {"SAFETY": 0, "CONCERN": 0, "NEUTRAL": 0}

            for study in pubmed_results.get("studies", []):
                abstract = study.get("abstract", "")
                source_type = classify_evidence_type(study["title"], abstract)
                direction = classify_evidence_direction_smart(
                    study["title"], abstract, ingredient, use_claude=use_claude
                )
                ev = evaluate_evidence(source_type)
                ev["study_title"] = study["title"][:80]
                ev["year"] = study["year"]
                ev["direction"] = direction
                ev["sample_size"] = study.get("sample_size", None)
                ev["abstract"] = abstract[:300]
                ev["pubmed_url"] = study.get("pubmed_url", "")
                evaluated.append(ev)
                direction_count[direction] += 1

            score_result = calculate_direction_aware_score(evaluated)
            confidence = calculate_confidence(
                evaluated, direction_count, pubmed_results["studies_found"]
            )

            return {
                "status": 200,
                "request_type": "ingredient",
                "timestamp": timestamp,
                "ingredient": ingredient,
                "inci_name": ingredient.title(),
                "health_score": score_result["score"],
                "verdict": score_result["verdict"],
                "confidence_score": confidence["confidence_score"],
                "confidence_label": confidence["confidence_label"],
                "evidence_source": "PubMed Live Search",
                "studies_retrieved": pubmed_results["studies_found"],
                "evidence_direction": direction_count,
                "concern_flags": [score_result["flag"]] if score_result["flag"] else [],
                "safety_notes": score_result["flag"] or "No specific concerns identified in retrieved literature",
                "top_studies": [
                    {
                        "title": e["study_title"],
                        "year": e["year"],
                        "direction": e["direction"],
                        "evidence_type": e["source_type"],
                        "weight": e["weight"],
                        "sample_size": e["sample_size"],
                        "url": e["pubmed_url"]
                    }
                    for e in evaluated[:5]
                ]
            }

    # ================================================================
    # BATCH EVALUATION
    # ================================================================
    elif request_type == "batch":
        ingredients = kwargs.get("ingredients", [])
        category = kwargs.get("category", "skincare")

        if not ingredients:
            return {"error": "ingredients list required", "status": 400}

        results = []
        for ingredient in ingredients:
            result = noura_api("ingredient", ingredient=ingredient,
                               category=category, use_claude=False)
            results.append({
                "ingredient": ingredient,
                "health_score": result.get("health_score"),
                "verdict": result.get("verdict"),
                "confidence_label": result.get("confidence_label"),
                "concern_flags": result.get("concern_flags", [])
            })
            time.sleep(1)

        results_sorted = sorted(results,
                                key=lambda x: (x["health_score"] or 0),
                                reverse=True)

        scored = [r for r in results if r["health_score"] is not None]
        avg_score = round(sum(r["health_score"] for r in scored) / len(scored), 1) if scored else None
        higher_risk = len([r for r in results if r["verdict"] == "HIGHER RISK"])

        return {
            "status": 200,
            "request_type": "batch",
            "timestamp": timestamp,
            "ingredients_evaluated": len(results),
            "average_score": avg_score,
            "higher_risk_count": higher_risk,
            "results": results_sorted
        }

    # ================================================================
    # PRODUCT SCAN
    # ================================================================
    elif request_type == "product":
        product_name = kwargs.get("product_name", "Unknown Product")
        raw_label = kwargs.get("raw_label", "")
        max_ingredients = kwargs.get("max_ingredients", 15)

        if not raw_label:
            return {"error": "raw_label parameter required", "status": 400}

        ingredients = parse_ingredient_list(raw_label)
        ingredients = [i for i in ingredients if i not in ["water", "aqua"]]
        ingredients_to_evaluate = ingredients[:max_ingredients]

        results = []
        for ingredient in ingredients_to_evaluate:
            result = noura_api("ingredient", ingredient=ingredient, use_claude=False)
            results.append({
                "ingredient": ingredient,
                "health_score": result.get("health_score"),
                "verdict": result.get("verdict"),
                "confidence_score": result.get("confidence_score"),
                "confidence_label": result.get("confidence_label"),
                "evidence_source": result.get("evidence_source"),
                "concern_flags": result.get("concern_flags", [])
            })
            time.sleep(0.5)

        results_sorted = sorted(results,
                                key=lambda x: (x["health_score"] or 0),
                                reverse=True)

        scored = [r for r in results if r["health_score"] is not None]
        avg_score = round(sum(r["health_score"] for r in scored) / len(scored), 1) if scored else None
        higher_risk = len([r for r in results if r["verdict"] == "HIGHER RISK"])

        if higher_risk == 0 and avg_score and avg_score >= 70:
            product_verdict = "CLEAN FORMULATION"
        elif higher_risk >= 2 or (avg_score and avg_score < 40):
            product_verdict = "FORMULATION CONCERNS"
        else:
            product_verdict = "MIXED FORMULATION"

        interactions = check_formula_interactions([r["ingredient"] for r in results])
        seen = set()
        unique_interactions = []
        for i in interactions:
            key = i["description"][:60]
            if key not in seen:
                seen.add(key)
                unique_interactions.append(i)

        return {
            "status": 200,
            "request_type": "product",
            "timestamp": timestamp,
            "product_name": product_name,
            "ingredients_detected": len(ingredients),
            "ingredients_evaluated": len(results),
            "average_score": avg_score,
            "higher_risk_count": higher_risk,
            "product_verdict": product_verdict,
            "ingredients": results_sorted,
            "interactions": [
                {
                    "severity": i["severity"],
                    "type": i["type"],
                    "ingredients": i["matched_ingredients"],
                    "description": i["description"],
                    "recommendation": i["recommendation"]
                }
                for i in unique_interactions
            ]
        }

    # ================================================================
    # COMPARE TWO INGREDIENTS
    # ================================================================
    elif request_type == "compare":
        ingredient_a = kwargs.get("ingredient_a", "")
        ingredient_b = kwargs.get("ingredient_b", "")

        if not ingredient_a or not ingredient_b:
            return {"error": "ingredient_a and ingredient_b required", "status": 400}

        result_a = noura_api("ingredient", ingredient=ingredient_a, use_claude=False)
        time.sleep(1)
        result_b = noura_api("ingredient", ingredient=ingredient_b, use_claude=False)

        winner = ingredient_a if (result_a.get("health_score") or 0) >= (result_b.get("health_score") or 0) else ingredient_b

        return {
            "status": 200,
            "request_type": "compare",
            "timestamp": timestamp,
            "ingredient_a": {
                "name": ingredient_a,
                "health_score": result_a.get("health_score"),
                "verdict": result_a.get("verdict"),
                "confidence_label": result_a.get("confidence_label")
            },
            "ingredient_b": {
                "name": ingredient_b,
                "health_score": result_b.get("health_score"),
                "verdict": result_b.get("verdict"),
                "confidence_label": result_b.get("confidence_label")
            },
            "safer_ingredient": winner
        }

    # ================================================================
    # INTERACTION CHECK
    # ================================================================
    elif request_type == "interaction":
        ingredients = kwargs.get("ingredients", [])

        if len(ingredients) < 2:
            return {"error": "at least 2 ingredients required", "status": 400}

        interactions = check_formula_interactions(ingredients)
        seen = set()
        unique = []
        for i in interactions:
            key = i["description"][:60]
            if key not in seen:
                seen.add(key)
                unique.append(i)

        return {
            "status": 200,
            "request_type": "interaction",
            "timestamp": timestamp,
            "ingredients_checked": ingredients,
            "interactions_found": len(unique),
            "interactions": [
                {
                    "severity": i["severity"],
                    "type": i["type"],
                    "ingredients": i["matched_ingredients"],
                    "description": i["description"],
                    "recommendation": i["recommendation"]
                }
                for i in unique
            ]
        }

    else:
        return {"error": f"Unknown request_type: {request_type}", "status": 400}


# ================================================================
# API TEST SUITE
# ================================================================
print("=== NOURA API v1.0 TEST SUITE ===")
print()

# Test 1: Single ingredient
print("TEST 1: Single ingredient lookup")
result = noura_api("ingredient", ingredient="niacinamide")
print(json.dumps({
    "ingredient": result["ingredient"],
    "health_score": result["health_score"],
    "verdict": result["verdict"],
    "confidence_label": result["confidence_label"],
    "evidence_source": result["evidence_source"]
}, indent=2))
print()

# Test 2: Compare
print("TEST 2: Compare two preservatives")
result = noura_api("compare", ingredient_a="parabens", ingredient_b="phenoxyethanol")
print(json.dumps(result, indent=2))
print()

# Test 3: Interaction check
print("TEST 3: Interaction check")
result = noura_api("interaction",
                   ingredients=["retinol", "ascorbic acid", "sodium benzoate", "glycolic acid"])
print(json.dumps(result, indent=2))
print()

# Test 4: Product scan
print("TEST 4: Product scan")
result = noura_api("product",
                   product_name="Test Moisturizer",
                   raw_label="Aqua, Glycerin, Niacinamide, Parabens, Fragrance, Hyaluronic Acid",
                   max_ingredients=6)
print(json.dumps({
    "product_name": result["product_name"],
    "average_score": result["average_score"],
    "product_verdict": result["product_verdict"],
    "higher_risk_count": result["higher_risk_count"],
    "interactions_found": len(result["interactions"])
}, indent=2))

=== NOURA API v1.0 TEST SUITE ===

TEST 1: Single ingredient lookup
{
  "ingredient": "niacinamide",
  "health_score": 82,
  "verdict": "WELL SUPPORTED",
  "confidence_label": "VERY HIGH",
  "evidence_source": "NOURA Knowledge Base"
}

TEST 2: Compare two preservatives
{
  "status": 200,
  "request_type": "compare",
  "timestamp": "2026-02-27T11:12:41.765445",
  "ingredient_a": {
    "name": "parabens",
    "health_score": 18,
    "verdict": "HIGHER RISK",
    "confidence_label": "VERY HIGH"
  },
  "ingredient_b": {
    "name": "phenoxyethanol",
    "health_score": 52.2,
    "verdict": "LIMITED SUPPORT",
    "confidence_label": "HIGH"
  },
  "safer_ingredient": "phenoxyethanol"
}

TEST 3: Interaction check
{
  "status": 200,
  "request_type": "interaction",
  "timestamp": "2026-02-27T11:12:46.746068",
  "ingredients_checked": [
    "retinol",
    "ascorbic acid",
    "sodium benzoate",
    "glycolic acid"
  ],
  "interactions_found": 3,
  "interactions": [
    {
      "severity": "HIGH

In [37]:
# NOURA - Cell 20: API v2.0 — depth parameter, Public Extract, Full Report
# One API. One endpoint. depth="extract" or depth="full"

import json
from datetime import datetime

# ================================================================
# PUBLIC EXTRACT FORMATTER
# Takes a full evaluation result and distills it to
# the 5 consumer-facing fields
# ================================================================

def format_public_extract(product_name, avg_score, higher_risk_count,
                           product_verdict, ingredients, interactions):
    """
    Distills a full NOURA evaluation into a consumer-facing extract.
    Designed for product pages, spa menus, shelf tags, chatbot responses.
    """

    # Color signal
    if avg_score is None:
        color_signal = "⚪"
        color_label = "INSUFFICIENT DATA"
    elif avg_score >= 70 and higher_risk_count == 0:
        color_signal = "🟢"
        color_label = "CLEAN"
    elif avg_score >= 50 and higher_risk_count <= 1:
        color_signal = "🟡"
        color_label = "MODERATE CONCERN"
    else:
        color_signal = "🔴"
        color_label = "HIGHER RISK"

    # One-line verdict in plain language
    plain_verdicts = {
        "CLEAN FORMULATION": "Well-formulated product with strong safety evidence.",
        "MIXED FORMULATION": "Generally acceptable, some ingredients warrant attention.",
        "FORMULATION CONCERNS": "Contains ingredients with documented safety concerns."
    }
    plain_verdict = plain_verdicts.get(product_verdict, "Evaluation complete.")

    # Top 2-3 flags in plain language (prioritise HIGH interactions + flagged ingredients)
    plain_flags = []

    # HIGH interactions first
    high_interactions = [i for i in interactions if i.get("severity") == "HIGH"]
    for interaction in high_interactions[:2]:
        ingredients_involved = " + ".join(interaction.get("matched_ingredients", []))
        plain_flags.append(f"⚠ {ingredients_involved}: {interaction['description'][:80]}")

    # Then flagged ingredients
    if len(plain_flags) < 3:
        flagged_ingredients = [r for r in ingredients if r.get("flag") and r.get("verdict") in ["HIGHER RISK", "LIMITED SUPPORT"]]
        flagged_sorted = sorted(flagged_ingredients, key=lambda x: x.get("health_score") or 100)
        for r in flagged_sorted:
            if len(plain_flags) >= 3:
                break
            first_flag = r["flag"].split("|")[0].strip()[:80]
            plain_flags.append(f"⚠ {r['ingredient'].title()}: {first_flag}")

    # Best alternative signal (only if score < 70)
    alternative_signal = None
    if avg_score and avg_score < 70:
        worst = sorted([r for r in ingredients if r.get("health_score")],
                      key=lambda x: x["health_score"])[0]
        if worst["verdict"] == "HIGHER RISK":
            alternative_signal = f"Cleaner alternatives available — ask for options without {worst['ingredient'].title()}."

    return {
        "product_name": product_name,
        "score": avg_score,
        "color_signal": color_signal,
        "color_label": color_label,
        "plain_verdict": plain_verdict,
        "flags": plain_flags,
        "alternative_signal": alternative_signal,
        "higher_risk_count": higher_risk_count
    }


def display_extract(extract):
    """Print the public extract in consumer-friendly format."""
    print("=" * 55)
    print(f"  NOURA PRODUCT EXTRACT")
    print("=" * 55)
    print(f"  {extract['product_name']}")
    print()
    print(f"  {extract['color_signal']}  {extract['score']}/100  —  {extract['color_label']}")
    print()
    print(f"  {extract['plain_verdict']}")
    print()
    if extract['flags']:
        for flag in extract['flags']:
            print(f"  {flag[:75]}")
        print()
    if extract['alternative_signal']:
        print(f"  💡 {extract['alternative_signal']}")
        print()
    print("  [ Extended Analysis available on request ]")
    print("=" * 55)


# ================================================================
# NOURA API v2.0
# Single entry point. One parameter controls output depth.
# depth="extract"  → public-facing consumer layer (fast)
# depth="full"     → complete enterprise evaluation (comprehensive)
# ================================================================

def noura_api_v2(request_type, depth="extract", **kwargs):
    """
    NOURA API v2.0 — one endpoint, one depth parameter.

    depth="extract"  : Public Extract — score, color, plain flags,
                       alternative signal. Designed for product pages,
                       spa menus, chatbots, shelf tags.

    depth="full"     : Full Report — complete ingredient breakdown,
                       interactions, regulatory status, study citations,
                       confidence scores, reformulation notes.
                       Designed for compliance teams, R&D, brand audits.

    Request types:
    - "product"     : scan a full product label
    - "ingredient"  : evaluate a single ingredient
    - "batch"       : evaluate multiple ingredients
    - "compare"     : compare two ingredients
    - "interaction" : check formula interactions
    """

    timestamp = datetime.now().isoformat()

    # ================================================================
    # PRODUCT SCAN — primary enterprise endpoint
    # ================================================================
    if request_type == "product":
        product_name = kwargs.get("product_name", "Unknown Product")
        raw_label = kwargs.get("raw_label", "")
        max_ingredients = kwargs.get("max_ingredients", 15)
        use_claude = kwargs.get("use_claude", True)

        if not raw_label:
            return {"error": "raw_label required", "status": 400}

        # Always run the full engine internally
        ingredients_parsed = parse_ingredient_list(raw_label)
        ingredients_parsed = [i for i in ingredients_parsed
                              if i not in ["water", "aqua"]]
        ingredients_to_evaluate = ingredients_parsed[:max_ingredients]

        results = []
        kb_hits = 0
        live_hits = 0

        for ingredient in ingredients_to_evaluate:
            kb_entry = kb_lookup(ingredient)

            if kb_entry:
                kb_hits += 1
                results.append({
                    "ingredient": ingredient,
                    "inci_name": kb_entry["inci_name"],
                    "health_score": kb_entry["health_score"],
                    "verdict": kb_entry["verdict"],
                    "confidence_score": kb_entry["confidence_score"],
                    "confidence_label": kb_entry["confidence_label"],
                    "evidence_source": "NOURA Knowledge Base",
                    "studies_reviewed": kb_entry["studies_reviewed"],
                    "evidence_summary": kb_entry.get("evidence_summary", ""),
                    "key_evidence": kb_entry.get("key_evidence", []),
                    "flag": (" | ".join(kb_entry["concern_flags"]))
                             if kb_entry["concern_flags"] else None,
                    "safety_notes": kb_entry.get("safety_notes", ""),
                    "regulatory_status": kb_entry.get("regulatory_status", {}),
                    "concern_flags": kb_entry["concern_flags"]
                })
            else:
                live_hits += 1
                pubmed_results = search_pubmed_normalized(
                    ingredient, max_results=30
                )
                evaluated = []
                direction_count = {"SAFETY": 0, "CONCERN": 0, "NEUTRAL": 0}

                for study in pubmed_results.get("studies", []):
                    abstract = study.get("abstract", "")
                    source_type = classify_evidence_type(
                        study["title"], abstract
                    )
                    direction = classify_evidence_direction_smart(
                        study["title"], abstract, ingredient,
                        use_claude=use_claude
                    )
                    ev = evaluate_evidence(source_type)
                    ev["study_title"] = study["title"][:80]
                    ev["year"] = study["year"]
                    ev["direction"] = direction
                    ev["sample_size"] = study.get("sample_size", None)
                    ev["abstract"] = abstract[:300]
                    ev["pubmed_url"] = study.get("pubmed_url", "")
                    evaluated.append(ev)
                    direction_count[direction] += 1

                score_result = calculate_direction_aware_score(evaluated)
                conf = calculate_confidence(
                    evaluated, direction_count,
                    pubmed_results["studies_found"]
                )

                results.append({
                    "ingredient": ingredient,
                    "inci_name": ingredient.title(),
                    "health_score": score_result["score"],
                    "verdict": score_result["verdict"],
                    "confidence_score": conf["confidence_score"],
                    "confidence_label": conf["confidence_label"],
                    "evidence_source": "PubMed Live Search",
                    "studies_reviewed": pubmed_results["studies_found"],
                    "evidence_summary": "",
                    "key_evidence": [
                        {
                            "title": e["study_title"],
                            "year": e["year"],
                            "direction": e["direction"],
                            "evidence_type": e["source_type"],
                            "url": e["pubmed_url"]
                        }
                        for e in evaluated[:3]
                    ],
                    "flag": score_result["flag"],
                    "safety_notes": score_result["flag"] or "",
                    "regulatory_status": {},
                    "concern_flags": [score_result["flag"]]
                                     if score_result["flag"] else []
                })
                time.sleep(1)

        # Sort by score
        results_sorted = sorted(
            results, key=lambda x: (x["health_score"] or 0), reverse=True
        )

        # Calculate product-level metrics
        scored = [r for r in results if r["health_score"] is not None]
        avg_score = round(
            sum(r["health_score"] for r in scored) / len(scored), 1
        ) if scored else None
        higher_risk_count = len(
            [r for r in results if r["verdict"] == "HIGHER RISK"]
        )

        if higher_risk_count == 0 and avg_score and avg_score >= 70:
            product_verdict = "CLEAN FORMULATION"
        elif higher_risk_count >= 2 or (avg_score and avg_score < 40):
            product_verdict = "FORMULATION CONCERNS"
        else:
            product_verdict = "MIXED FORMULATION"

        # Interactions
        all_names = [r["ingredient"] for r in results]
        raw_interactions = check_formula_interactions(all_names)
        seen = set()
        unique_interactions = []
        for i in raw_interactions:
            key = i["description"][:60]
            if key not in seen:
                seen.add(key)
                unique_interactions.append({
                    "severity": i["severity"],
                    "type": i["type"],
                    "matched_ingredients": i["matched_ingredients"],
                    "description": i["description"],
                    "recommendation": i["recommendation"]
                })

        # Build the Public Extract — always generated
        extract = format_public_extract(
            product_name, avg_score, higher_risk_count,
            product_verdict, results_sorted, unique_interactions
        )

        # ── EXTRACT response ──────────────────────────────────────
        if depth == "extract":
            return {
                "status": 200,
                "api_version": "2.0",
                "depth": "extract",
                "request_type": "product",
                "timestamp": timestamp,
                "extract": extract
            }

        # ── FULL response ─────────────────────────────────────────
        else:
            return {
                "status": 200,
                "api_version": "2.0",
                "depth": "full",
                "request_type": "product",
                "timestamp": timestamp,

                # Public extract always included in full response
                "extract": extract,

                # Full enterprise data
                "product_name": product_name,
                "ingredients_detected": len(ingredients_parsed),
                "ingredients_evaluated": len(results),
                "average_score": avg_score,
                "higher_risk_count": higher_risk_count,
                "product_verdict": product_verdict,
                "kb_lookups": kb_hits,
                "live_searches": live_hits,

                # Full ingredient breakdown
                "ingredients": results_sorted,

                # Interaction analysis
                "interactions": unique_interactions,
                "interaction_summary": {
                    "total": len(unique_interactions),
                    "high": len([i for i in unique_interactions
                                 if i["severity"] == "HIGH"]),
                    "moderate": len([i for i in unique_interactions
                                    if i["severity"] == "MODERATE"]),
                    "low": len([i for i in unique_interactions
                               if i["severity"] == "LOW"])
                },

                # Reformulation notes (for brands)
                "reformulation_notes": [
                    {
                        "ingredient": r["ingredient"],
                        "issue": r["flag"],
                        "suggestion": f"Consider replacing {r['ingredient']} — "
                                      f"scored {r['health_score']}/100 "
                                      f"({r['verdict']})"
                    }
                    for r in results_sorted
                    if r.get("flag") and r["verdict"] in
                    ["HIGHER RISK", "LIMITED SUPPORT"]
                ]
            }

    # ================================================================
    # INGREDIENT — single ingredient, depth-aware
    # ================================================================
    elif request_type == "ingredient":
        ingredient = kwargs.get("ingredient", "")
        use_claude = kwargs.get("use_claude", True)

        if not ingredient:
            return {"error": "ingredient required", "status": 400}

        kb_entry = kb_lookup(ingredient)

        if kb_entry:
            full_data = {
                "status": 200,
                "api_version": "2.0",
                "depth": depth,
                "request_type": "ingredient",
                "timestamp": timestamp,
                "ingredient": ingredient,
                "inci_name": kb_entry["inci_name"],
                "health_score": kb_entry["health_score"],
                "verdict": kb_entry["verdict"],
                "confidence_score": kb_entry["confidence_score"],
                "confidence_label": kb_entry["confidence_label"],
                "evidence_source": "NOURA Knowledge Base",
                "studies_reviewed": kb_entry["studies_reviewed"],
                "evidence_summary": kb_entry.get("evidence_summary", ""),
                "key_evidence": kb_entry.get("key_evidence", []),
                "concern_flags": kb_entry["concern_flags"],
                "safety_notes": kb_entry.get("safety_notes", ""),
                "regulatory_status": kb_entry.get("regulatory_status", {})
            }
        else:
            pubmed_results = search_pubmed_normalized(
                ingredient, max_results=50
            )
            evaluated = []
            direction_count = {"SAFETY": 0, "CONCERN": 0, "NEUTRAL": 0}

            for study in pubmed_results.get("studies", []):
                abstract = study.get("abstract", "")
                source_type = classify_evidence_type(
                    study["title"], abstract
                )
                direction = classify_evidence_direction_smart(
                    study["title"], abstract, ingredient,
                    use_claude=use_claude
                )
                ev = evaluate_evidence(source_type)
                ev["study_title"] = study["title"][:80]
                ev["year"] = study["year"]
                ev["direction"] = direction
                ev["sample_size"] = study.get("sample_size", None)
                ev["abstract"] = abstract[:300]
                ev["pubmed_url"] = study.get("pubmed_url", "")
                evaluated.append(ev)
                direction_count[direction] += 1

            score_result = calculate_direction_aware_score(evaluated)
            conf = calculate_confidence(
                evaluated, direction_count,
                pubmed_results["studies_found"]
            )

            full_data = {
                "status": 200,
                "api_version": "2.0",
                "depth": depth,
                "request_type": "ingredient",
                "timestamp": timestamp,
                "ingredient": ingredient,
                "inci_name": ingredient.title(),
                "health_score": score_result["score"],
                "verdict": score_result["verdict"],
                "confidence_score": conf["confidence_score"],
                "confidence_label": conf["confidence_label"],
                "evidence_source": "PubMed Live Search",
                "studies_reviewed": pubmed_results["studies_found"],
                "evidence_summary": "",
                "key_evidence": [
                    {
                        "title": e["study_title"],
                        "year": e["year"],
                        "direction": e["direction"],
                        "url": e["pubmed_url"]
                    }
                    for e in evaluated[:5]
                ],
                "concern_flags": [score_result["flag"]]
                                  if score_result["flag"] else [],
                "safety_notes": score_result["flag"] or "",
                "regulatory_status": {}
            }

        if depth == "extract":
            score = full_data["health_score"]
            flags = full_data["concern_flags"][:2]
            if score is None:
                color_signal, color_label = "⚪", "INSUFFICIENT DATA"
            elif score >= 70:
                color_signal, color_label = "🟢", "WELL SUPPORTED"
            elif score >= 45:
                color_signal, color_label = "🟡", "USE WITH CAUTION"
            else:
                color_signal, color_label = "🔴", "HIGHER RISK"

            return {
                "status": 200,
                "api_version": "2.0",
                "depth": "extract",
                "request_type": "ingredient",
                "timestamp": timestamp,
                "extract": {
                    "ingredient": ingredient,
                    "score": score,
                    "color_signal": color_signal,
                    "color_label": color_label,
                    "plain_flags": [f[:80] for f in flags],
                    "studies_reviewed": full_data["studies_reviewed"]
                }
            }
        else:
            return full_data

    # ================================================================
    # COMPARE — depth-aware
    # ================================================================
    elif request_type == "compare":
        ingredient_a = kwargs.get("ingredient_a", "")
        ingredient_b = kwargs.get("ingredient_b", "")

        result_a = noura_api_v2("ingredient", depth="full",
                                ingredient=ingredient_a, use_claude=False)
        time.sleep(1)
        result_b = noura_api_v2("ingredient", depth="full",
                                ingredient=ingredient_b, use_claude=False)

        winner = ingredient_a if (result_a.get("health_score") or 0) >= \
                                 (result_b.get("health_score") or 0) \
                 else ingredient_b

        compare_data = {
            "status": 200,
            "api_version": "2.0",
            "depth": depth,
            "request_type": "compare",
            "timestamp": timestamp,
            "safer_ingredient": winner,
            "ingredient_a": {
                "name": ingredient_a,
                "score": result_a.get("health_score"),
                "verdict": result_a.get("verdict"),
                "confidence_label": result_a.get("confidence_label"),
                "top_flag": result_a.get("concern_flags", [None])[0]
            },
            "ingredient_b": {
                "name": ingredient_b,
                "score": result_b.get("health_score"),
                "verdict": result_b.get("verdict"),
                "confidence_label": result_b.get("confidence_label"),
                "top_flag": result_b.get("concern_flags", [None])[0]
            }
        }

        if depth == "full":
            compare_data["ingredient_a"]["full_evaluation"] = result_a
            compare_data["ingredient_b"]["full_evaluation"] = result_b

        return compare_data

    # ================================================================
    # INTERACTION — depth-aware
    # ================================================================
    elif request_type == "interaction":
        ingredients = kwargs.get("ingredients", [])

        if len(ingredients) < 2:
            return {"error": "at least 2 ingredients required", "status": 400}

        interactions = check_formula_interactions(ingredients)
        seen = set()
        unique = []
        for i in interactions:
            key = i["description"][:60]
            if key not in seen:
                seen.add(key)
                unique.append(i)

        high = [i for i in unique if i["severity"] == "HIGH"]

        base = {
            "status": 200,
            "api_version": "2.0",
            "depth": depth,
            "request_type": "interaction",
            "timestamp": timestamp,
            "interactions_found": len(unique),
            "high_severity_count": len(high)
        }

        if depth == "extract":
            base["critical_flags"] = [
                {
                    "severity": i["severity"],
                    "ingredients": i["matched_ingredients"],
                    "description": i["description"][:80]
                }
                for i in unique if i["severity"] == "HIGH"
            ]
        else:
            base["interactions"] = [
                {
                    "severity": i["severity"],
                    "type": i["type"],
                    "ingredients": i["matched_ingredients"],
                    "description": i["description"],
                    "recommendation": i["recommendation"]
                }
                for i in unique
            ]

        return base

    else:
        return {"error": f"Unknown request_type: {request_type}", "status": 400}


# ================================================================
# TEST SUITE — v2.0
# ================================================================
print("=" * 55)
print("  NOURA API v2.0 TEST SUITE")
print("=" * 55)

# ── TEST 1: Product — EXTRACT (consumer-facing) ──────────────
print("\nTEST 1A: Product — depth='extract' (consumer-facing)")
result = noura_api_v2("product",
    depth="extract",
    product_name="Clean Beauty Serum",
    raw_label="Aqua, Glycerin, Niacinamide, Sodium Hyaluronate, "
              "Ascorbic Acid, Tocopherol, Xanthan Gum, "
              "Citric Acid, Sodium Benzoate",
    max_ingredients=9,
    use_claude=False
)
display_extract(result["extract"])
print()

# ── TEST 1B: Product — FULL (enterprise back-office) ─────────
print("\nTEST 1B: Product — depth='full' (enterprise back-office)")
result_full = noura_api_v2("product",
    depth="full",
    product_name="Clean Beauty Serum",
    raw_label="Aqua, Glycerin, Niacinamide, Sodium Hyaluronate, "
              "Ascorbic Acid, Tocopherol, Xanthan Gum, "
              "Citric Acid, Sodium Benzoate",
    max_ingredients=9,
    use_claude=False
)
print(json.dumps({
    "product_verdict": result_full["product_verdict"],
    "average_score": result_full["average_score"],
    "higher_risk_count": result_full["higher_risk_count"],
    "ingredients_evaluated": result_full["ingredients_evaluated"],
    "interaction_summary": result_full["interaction_summary"],
    "reformulation_notes_count": len(result_full["reformulation_notes"]),
    "extract_included": "extract" in result_full
}, indent=2))
print()

# ── TEST 2: Ingredient — EXTRACT ─────────────────────────────
print("TEST 2A: Ingredient — depth='extract'")
r = noura_api_v2("ingredient", depth="extract", ingredient="parabens")
print(json.dumps(r, indent=2))
print()

# ── TEST 3: Interaction — EXTRACT ────────────────────────────
print("TEST 3A: Interaction — depth='extract'")
r = noura_api_v2("interaction", depth="extract",
    ingredients=["sodium benzoate", "ascorbic acid", "retinol",
                 "glycolic acid"])
print(json.dumps(r, indent=2))
print()

# ── TEST 4: Compare — EXTRACT ────────────────────────────────
print("TEST 4A: Compare — depth='extract'")
r = noura_api_v2("compare", depth="extract",
    ingredient_a="parabens", ingredient_b="phenoxyethanol")
print(json.dumps(r, indent=2))

  NOURA API v2.0 TEST SUITE

TEST 1A: Product — depth='extract' (consumer-facing)
  NOURA PRODUCT EXTRACT
  Clean Beauty Serum

  🟢  79.5/100  —  CLEAN

  Well-formulated product with strong safety evidence.

  ⚠ sodium benzoate + ascorbic acid: Sodium benzoate reacts with ascorbic aci
  ⚠ Sodium Benzoate: Reacts with ascorbic acid (vitamin C) to form benzene — 

  [ Extended Analysis available on request ]


TEST 1B: Product — depth='full' (enterprise back-office)
{
  "product_verdict": "CLEAN FORMULATION",
  "average_score": 79.5,
  "higher_risk_count": 0,
  "ingredients_evaluated": 8,
  "interaction_summary": {
    "total": 3,
    "high": 1,
    "moderate": 0,
    "low": 2
  },
  "reformulation_notes_count": 1,
  "extract_included": true
}

TEST 2A: Ingredient — depth='extract'
{
  "status": 200,
  "api_version": "2.0",
  "depth": "extract",
  "request_type": "ingredient",
  "timestamp": "2026-02-27T12:51:57.883838",
  "extract": {
    "ingredient": "parabens",
    "score": 18,
    

IndexError: list index out of range

In [38]:
# NOURA - Cell 21: Fix compare endpoint + final test

def safe_first_flag(flags):
    """Safely get first flag or None if list is empty."""
    return flags[0] if flags else None

# Patch the compare section inline — just re-run compare with the fix
def noura_compare_v2(ingredient_a, ingredient_b, depth="extract"):
    timestamp = datetime.now().isoformat()

    result_a = noura_api_v2("ingredient", depth="full",
                            ingredient=ingredient_a, use_claude=False)
    time.sleep(1)
    result_b = noura_api_v2("ingredient", depth="full",
                            ingredient=ingredient_b, use_claude=False)

    winner = ingredient_a if (result_a.get("health_score") or 0) >= \
                             (result_b.get("health_score") or 0) \
             else ingredient_b

    compare_data = {
        "status": 200,
        "api_version": "2.0",
        "depth": depth,
        "request_type": "compare",
        "timestamp": timestamp,
        "safer_ingredient": winner,
        "ingredient_a": {
            "name": ingredient_a,
            "score": result_a.get("health_score"),
            "verdict": result_a.get("verdict"),
            "confidence_label": result_a.get("confidence_label"),
            "top_flag": safe_first_flag(result_a.get("concern_flags", []))
        },
        "ingredient_b": {
            "name": ingredient_b,
            "score": result_b.get("health_score"),
            "verdict": result_b.get("verdict"),
            "confidence_label": result_b.get("confidence_label"),
            "top_flag": safe_first_flag(result_b.get("concern_flags", []))
        }
    }

    if depth == "full":
        compare_data["ingredient_a"]["full_evaluation"] = result_a
        compare_data["ingredient_b"]["full_evaluation"] = result_b

    return compare_data


# ── TEST 4: Compare — both depths ────────────────────────────
print("TEST 4A: Compare — depth='extract'")
r = noura_compare_v2("parabens", "phenoxyethanol", depth="extract")
print(json.dumps(r, indent=2))
print()

print("TEST 4B: Compare — depth='full'")
r = noura_compare_v2("parabens", "phenoxyethanol", depth="full")
print(json.dumps({
    "safer_ingredient": r["safer_ingredient"],
    "ingredient_a": {
        "name": r["ingredient_a"]["name"],
        "score": r["ingredient_a"]["score"],
        "verdict": r["ingredient_a"]["verdict"],
        "top_flag": r["ingredient_a"]["top_flag"]
    },
    "ingredient_b": {
        "name": r["ingredient_b"]["name"],
        "score": r["ingredient_b"]["score"],
        "verdict": r["ingredient_b"]["verdict"],
        "top_flag": r["ingredient_b"]["top_flag"]
    },
    "full_evaluations_included": "full_evaluation" in r["ingredient_a"]
}, indent=2))

print()
print("=" * 55)
print("  NOURA API v2.0 — ALL TESTS COMPLETE")
print("  One API. One endpoint. depth='extract' or depth='full'")
print("=" * 55)

TEST 4A: Compare — depth='extract'
{
  "status": 200,
  "api_version": "2.0",
  "depth": "extract",
  "request_type": "compare",
  "timestamp": "2026-02-27T13:24:49.508330",
  "safer_ingredient": "phenoxyethanol",
  "ingredient_a": {
    "name": "parabens",
    "score": 18,
    "verdict": "HIGHER RISK",
    "confidence_label": "VERY HIGH",
    "top_flag": "Endocrine disruption \u2014 estrogenic activity confirmed"
  },
  "ingredient_b": {
    "name": "phenoxyethanol",
    "score": 52.2,
    "verdict": "LIMITED SUPPORT",
    "confidence_label": "HIGH",
    "top_flag": null
  }
}

TEST 4B: Compare — depth='full'
{
  "safer_ingredient": "phenoxyethanol",
  "ingredient_a": {
    "name": "parabens",
    "score": 18,
    "verdict": "HIGHER RISK",
    "top_flag": "Endocrine disruption \u2014 estrogenic activity confirmed"
  },
  "ingredient_b": {
    "name": "phenoxyethanol",
    "score": 52.2,
    "verdict": "LIMITED SUPPORT",
    "top_flag": null
  },
  "full_evaluations_included": true
}



In [48]:
%%writefile /content/noura_pdf_v3.py
"""
NOURA Enterprise PDF Report Generator v3
- Real logo embedded (canvas drawImage)
- Compact spacing — Weleda target 2pp, Reformulation 3pp
"""

from reportlab.lib.pagesizes import A4
from reportlab.lib import colors
from reportlab.lib.units import mm, cm
from reportlab.lib.styles import ParagraphStyle
from reportlab.platypus import (
    SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle,
    HRFlowable, PageBreak, KeepTogether
)
from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_RIGHT
from reportlab.lib.utils import ImageReader
from datetime import datetime
import os

# ── Brand colours ──────────────────────────────────────────────
NAVY       = colors.HexColor("#1E3A5F")
BLUE       = colors.HexColor("#2E86AB")
GREEN_FG   = colors.HexColor("#1A6B3A")
GREEN_BG   = colors.HexColor("#E8F5E9")
GREEN_BD   = colors.HexColor("#A5D6A7")
AMBER_FG   = colors.HexColor("#7A5C00")
AMBER_BG   = colors.HexColor("#FFF8E1")
AMBER_BD   = colors.HexColor("#FFD54F")
RED_FG     = colors.HexColor("#8B0000")
RED_BG     = colors.HexColor("#FFEBEE")
RED_BD     = colors.HexColor("#EF9A9A")
GRAY_TEXT  = colors.HexColor("#666666")
GRAY_LIGHT = colors.HexColor("#F7F7F7")
GRAY_BD    = colors.HexColor("#DDDDDD")
WHITE      = colors.white
BLACK      = colors.HexColor("#1A1A1A")

PAGE_W, PAGE_H = A4
MARGIN    = 1.8 * cm
CONTENT_W = PAGE_W - 2 * MARGIN

LOGO_PATH = "/content/noura-rag/assets/noura_logo.png"

# ── Helpers ────────────────────────────────────────────────────
def verdict_colours(score, verdict):
    v = (verdict or "").upper()
    if "HIGHER" in v or "CONCERNS" in v or (score and score < 50):
        return RED_FG, RED_BG, RED_BD, "HIGHER RISK"
    elif "WELL" in v or "CLEAN" in v or (score and score >= 70):
        return GREEN_FG, GREEN_BG, GREEN_BD, "CLEAN"
    else:
        return AMBER_FG, AMBER_BG, AMBER_BD, "MODERATE"

VERDICT_TEXT = {
    "CLEAN FORMULATION":    "Well-formulated. Strong safety evidence across all evaluated ingredients.",
    "MIXED FORMULATION":    "Generally acceptable. Some ingredients warrant attention.",
    "FORMULATION CONCERNS": "Contains ingredients with documented safety concerns. Review recommended.",
}

def hr(color=GRAY_BD, thick=0.5, before=3, after=5):
    return HRFlowable(width="100%", thickness=thick,
                      color=color, spaceBefore=before, spaceAfter=after)

def P(text, name="p", **kw):
    d = dict(fontName="Helvetica", fontSize=9, textColor=BLACK,
             leading=13, spaceAfter=2)
    d.update(kw)
    return Paragraph(text, ParagraphStyle(name, **d))

def section_head(num, title):
    return [
        Paragraph(f"{num}.  {title}",
            ParagraphStyle("h1", fontName="Helvetica-Bold", fontSize=13,
                           textColor=NAVY, spaceBefore=8, spaceAfter=3,
                           leading=16)),
        hr(BLUE, thick=1.0, before=0, after=5),
    ]


# ── Header / Footer ────────────────────────────────────────────
class HF:
    def __init__(self, product_name, report_date):
        self.name = product_name
        self.date = report_date
        self._logo = ImageReader(LOGO_PATH) if os.path.exists(LOGO_PATH) else None

    def __call__(self, canv, doc):
        canv.saveState()
        if doc.page == 1:
            if self._logo:
                lw_pt, lh_pt = 72*mm, 14*mm   # compact logo
                lx = (PAGE_W - lw_pt) / 2
                ly = PAGE_H - 2.2*cm - lh_pt   # sits just below top margin
                canv.drawImage(self._logo, lx, ly, lw_pt, lh_pt,
                               mask="auto", preserveAspectRatio=True)
            canv.setFont("Helvetica", 9.5)
            canv.setFillColor(BLUE)
            canv.drawCentredString(PAGE_W/2, PAGE_H - 2.2*cm - 14*mm - 0.55*cm,
                                   "Evidence-Based Product Intelligence")
        else:
            canv.setStrokeColor(BLUE)
            canv.setLineWidth(0.8)
            canv.line(MARGIN, PAGE_H-1.2*cm, PAGE_W-MARGIN, PAGE_H-1.2*cm)
            if self._logo:
                canv.drawImage(self._logo, MARGIN, PAGE_H-1.05*cm,
                               22*mm, 4.2*mm, mask="auto",
                               preserveAspectRatio=True)
            canv.setFont("Helvetica", 7)
            canv.setFillColor(GRAY_TEXT)
            canv.drawString(MARGIN+24*mm, PAGE_H-0.95*cm,
                            f"Product Evaluation  |  {self.name}")
            canv.drawRightString(PAGE_W-MARGIN, PAGE_H-0.95*cm,
                                 f"Confidential  |  {self.date}")

        canv.setStrokeColor(GRAY_BD)
        canv.setLineWidth(0.3)
        canv.line(MARGIN, 1.2*cm, PAGE_W-MARGIN, 1.2*cm)
        canv.setFont("Helvetica", 7)
        canv.setFillColor(GRAY_TEXT)
        canv.drawString(MARGIN, 0.75*cm, "NOURA AI  —  Proprietary & Confidential")
        canv.drawRightString(PAGE_W-MARGIN, 0.75*cm, f"Page {doc.page}")
        canv.restoreState()


# ── COVER PAGE ─────────────────────────────────────────────────
def build_cover(product_name, avg_score, product_verdict,
                higher_risk, report_date, client_name=""):

    fg, bg, bd, label = verdict_colours(avg_score, product_verdict)
    story = []
    story.append(Spacer(1, 2.8*cm))   # space for canvas logo + tagline
    story.append(hr(BLUE, thick=1.2, before=2, after=8))

    story.append(P("PRODUCT EVALUATION REPORT", "label",
                   fontName="Helvetica-Bold", fontSize=9,
                   textColor=NAVY, alignment=TA_CENTER, spaceAfter=3))
    story.append(P(product_name, "pname",
                   fontName="Helvetica-Bold", fontSize=16,
                   textColor=BLACK, alignment=TA_CENTER,
                   leading=20, spaceAfter=10))

    score_str   = f"{avg_score}/100" if avg_score is not None else "N/A"
    verdict_sub = VERDICT_TEXT.get(product_verdict, product_verdict or "")

    score_box = Table([[
        P(score_str, "sc",
          fontName="Helvetica-Bold", fontSize=28,
          textColor=fg, alignment=TA_CENTER, leading=32),
        [P(label, "vl",
           fontName="Helvetica-Bold", fontSize=14,
           textColor=fg, alignment=TA_CENTER, spaceAfter=2),
         P(verdict_sub, "vs",
           fontSize=8, textColor=fg,
           alignment=TA_CENTER, leading=11)],
    ]], colWidths=[CONTENT_W*0.34, CONTENT_W*0.66], rowHeights=[60])
    score_box.setStyle(TableStyle([
        ("BACKGROUND",    (0,0), (-1,-1), bg),
        ("BOX",           (0,0), (-1,-1), 1.2, bd),
        ("LINEAFTER",     (0,0), (0,-1), 0.5, bd),
        ("VALIGN",        (0,0), (-1,-1), "MIDDLE"),
        ("TOPPADDING",    (0,0), (-1,-1), 8),
        ("BOTTOMPADDING", (0,0), (-1,-1), 8),
        ("LEFTPADDING",   (0,0), (-1,-1), 12),
        ("RIGHTPADDING",  (0,0), (-1,-1), 12),
    ]))
    story.append(score_box)
    story.append(Spacer(1, 0.6*cm))
    story.append(hr(GRAY_BD, thick=0.3, before=0, after=4))

    rows = []
    if client_name:
        rows.append(["Prepared For", client_name])
    rows += [
        ["Report Date",           report_date],
        ["Evaluation Standard",   "NOURA Evidence Hierarchy v2.0 (9-tier)"],
        ["Higher Risk Ingredients", str(higher_risk)],
    ]
    mt = Table(rows, colWidths=[CONTENT_W*0.36, CONTENT_W*0.64])
    mt.setStyle(TableStyle([
        ("FONTNAME",      (0,0), (0,-1), "Helvetica-Bold"),
        ("FONTSIZE",      (0,0), (-1,-1), 9),
        ("TEXTCOLOR",     (0,0), (0,-1), NAVY),
        ("TOPPADDING",    (0,0), (-1,-1), 4),
        ("BOTTOMPADDING", (0,0), (-1,-1), 4),
        ("LINEBELOW",     (0,0), (-1,-2), 0.3, GRAY_BD),
    ]))
    story.append(mt)
    story.append(Spacer(1, 0.5*cm))
    story.append(P(
        "Generated by NOURA AI using live PubMed evidence retrieval, a curated 9-tier "
        "evidence hierarchy, and automated formula interaction detection. "
        "For internal use only. Not for public distribution without authorisation.",
        "disc", fontSize=7.5, textColor=GRAY_TEXT, alignment=TA_CENTER, leading=11))
    story.append(PageBreak())
    return story


# ── SECTION 1: Public Extract ───────────────────────────────────
def build_extract(extract):
    story = section_head("1", "Public Extract")
    story.append(P(
        "Consumer-facing layer derived automatically from the full evaluation. "
        "Use on product pages, spa menus, shelf tags, and chatbot responses.",
        "d1", textColor=GRAY_TEXT, spaceAfter=5))

    score = extract.get("score")
    label = extract.get("color_label", "")
    fg, bg, bd, _ = verdict_colours(score, label)

    et = Table([
        [P("SCORE",  "eh", fontName="Helvetica-Bold", fontSize=8, textColor=WHITE, alignment=TA_CENTER),
         P("SIGNAL", "eh", fontName="Helvetica-Bold", fontSize=8, textColor=WHITE, alignment=TA_CENTER),
         P("VERDICT","eh", fontName="Helvetica-Bold", fontSize=8, textColor=WHITE)],
        [P(f"{score}/100" if score else "N/A", "es",
           fontName="Helvetica-Bold", fontSize=16, textColor=fg, alignment=TA_CENTER),
         P(label, "el",
           fontName="Helvetica-Bold", fontSize=11, textColor=fg, alignment=TA_CENTER),
         P(extract.get("plain_verdict",""), "ev", fontSize=9, textColor=BLACK, leading=13)],
    ], colWidths=[CONTENT_W*0.17, CONTENT_W*0.20, CONTENT_W*0.63],
       rowHeights=[16, 40])
    et.setStyle(TableStyle([
        ("BACKGROUND",    (0,0), (-1,0), NAVY),
        ("BACKGROUND",    (0,1), (-1,1), bg),
        ("BOX",           (0,0), (-1,-1), 0.8, bd),
        ("INNERGRID",     (0,0), (-1,-1), 0.3, GRAY_BD),
        ("VALIGN",        (0,0), (-1,-1), "MIDDLE"),
        ("TOPPADDING",    (0,0), (-1,-1), 4),
        ("BOTTOMPADDING", (0,0), (-1,-1), 4),
        ("LEFTPADDING",   (0,0), (-1,-1), 7),
        ("RIGHTPADDING",  (0,0), (-1,-1), 7),
    ]))
    story.append(et)

    flags = extract.get("flags", [])
    if flags:
        story.append(Spacer(1, 3))
        story.append(P("Risk Flags", "fh", fontName="Helvetica-Bold",
                       fontSize=8.5, textColor=RED_FG, spaceAfter=2))
        for f in flags:
            story.append(P(f"  \u2022  {f.replace('⚠ ','').strip()}",
                           "fi", fontSize=8.5, textColor=RED_FG,
                           leading=12, spaceAfter=1))

    alt = extract.get("alternative_signal")
    if alt:
        story.append(Spacer(1, 3))
        at = Table([[P(f"Cleaner alternatives available — {alt}",
                       "alt", fontSize=8.5, textColor=GREEN_FG, leading=12)]],
                   colWidths=[CONTENT_W])
        at.setStyle(TableStyle([
            ("BACKGROUND",    (0,0), (-1,-1), GREEN_BG),
            ("BOX",           (0,0), (-1,-1), 0.6, GREEN_BD),
            ("TOPPADDING",    (0,0), (-1,-1), 5),
            ("BOTTOMPADDING", (0,0), (-1,-1), 5),
            ("LEFTPADDING",   (0,0), (-1,-1), 8),
            ("RIGHTPADDING",  (0,0), (-1,-1), 8),
        ]))
        story.append(at)

    story.append(Spacer(1, 0.1*cm))
    return story


# ── SECTION 2: Ingredients ──────────────────────────────────────
def build_ingredients(ingredients):
    story = section_head("2", "Ingredient Evaluation")
    story.append(P(
        "Each ingredient evaluated against NOURA's 9-tier evidence hierarchy. "
        "Scores are direction-aware weighted with sample-size multipliers.",
        "d2", textColor=GRAY_TEXT, spaceAfter=5))

    CW = [CONTENT_W*0.27, CONTENT_W*0.10,
          CONTENT_W*0.25, CONTENT_W*0.22, CONTENT_W*0.16]

    def hdr(t, align=TA_CENTER):
        return P(t, "th", fontName="Helvetica-Bold", fontSize=8,
                 textColor=WHITE, alignment=align)

    rows = [[hdr("Ingredient", TA_LEFT), hdr("Score"),
             hdr("Verdict"), hdr("Confidence"), hdr("Source")]]

    for r in ingredients:
        score   = r.get("health_score")
        verdict = r.get("verdict", "")
        fg, bg, bd, _ = verdict_colours(score, verdict)
        flag = (r.get("flag") or "").split("|")[0].strip()[:90]

        ingr_cell = [P(r["ingredient"].title(), "in",
                       fontName="Helvetica-Bold", fontSize=9,
                       textColor=RED_FG if flag else BLACK, leading=12)]
        if flag:
            ingr_cell.append(P(flag, "if", fontSize=7.5,
                               textColor=RED_FG, leading=11, spaceAfter=0))

        rows.append([
            ingr_cell,
            P(str(score) if score is not None else "N/A", "sc2",
              fontName="Helvetica-Bold", fontSize=11,
              textColor=fg, alignment=TA_CENTER),
            P(verdict, "vc",
              fontName="Helvetica-Bold", fontSize=7.5,
              textColor=fg, alignment=TA_CENTER),
            P(r.get("confidence_label",""), "cc",
              fontSize=7.5, textColor=GRAY_TEXT, alignment=TA_CENTER),
            P(r.get("evidence_source","")
               .replace("NOURA Knowledge Base","KB")
               .replace("PubMed Live Search","Live"),
              "src", fontSize=7.5, textColor=GRAY_TEXT, alignment=TA_CENTER),
        ])

    t = Table(rows, colWidths=CW, repeatRows=1)
    ts = [
        ("BACKGROUND",    (0,0), (-1,0), NAVY),
        ("VALIGN",        (0,0), (-1,-1), "TOP"),
        ("TOPPADDING",    (0,0), (-1,-1), 5),
        ("BOTTOMPADDING", (0,0), (-1,-1), 5),
        ("LEFTPADDING",   (0,0), (-1,-1), 5),
        ("RIGHTPADDING",  (0,0), (-1,-1), 5),
        ("INNERGRID",     (0,0), (-1,-1), 0.3, GRAY_BD),
        ("BOX",           (0,0), (-1,-1), 0.7, GRAY_BD),
        ("ROWBACKGROUNDS",(0,1), (-1,-1), [WHITE, GRAY_LIGHT]),
    ]
    for i, r in enumerate(ingredients, 1):
        if r.get("verdict") == "HIGHER RISK":
            ts.append(("BACKGROUND", (0,i), (-1,i), RED_BG))
        elif r.get("verdict") == "LIMITED SUPPORT" and r.get("flag"):
            ts.append(("BACKGROUND", (0,i), (-1,i), AMBER_BG))
    t.setStyle(TableStyle(ts))
    story.append(t)
    story.append(Spacer(1, 0.1*cm))
    return story


# ── SECTION 3: Interactions ─────────────────────────────────────
def build_interactions(interactions, summary):
    story = section_head("3", "Formula Interaction Analysis")

    if not interactions:
        story.append(P("\u2713  No known ingredient interactions detected in this formula.",
                       "ok", fontName="Helvetica-Bold", fontSize=9,
                       textColor=GREEN_FG, spaceAfter=4))
        return story

    story.append(P(
        f"Interactions detected: {summary['total']}  "
        f"({summary['high']} HIGH  /  {summary['moderate']} MODERATE  /  {summary['low']} LOW)",
        "is", fontSize=9, spaceAfter=5))

    order = {"HIGH": 0, "MODERATE": 1, "LOW": 2}
    for ix in sorted(interactions, key=lambda x: order.get(x["severity"], 3)):
        sev = ix["severity"]
        fg = RED_FG if sev=="HIGH" else (AMBER_FG if sev=="MODERATE" else GREEN_FG)
        bg = RED_BG if sev=="HIGH" else (AMBER_BG if sev=="MODERATE" else GREEN_BG)
        bd = RED_BD if sev=="HIGH" else (AMBER_BD if sev=="MODERATE" else GREEN_BD)

        ingr_str = "  +  ".join(i.title() for i in ix.get("matched_ingredients",[]))

        block = Table([
            [P(f"{sev}  INTERACTION", "si",
               fontName="Helvetica-Bold", fontSize=8, textColor=fg),
             P(ix.get("type","").upper(), "sit",
               fontSize=7.5, textColor=GRAY_TEXT, alignment=TA_RIGHT)],
            [P(ingr_str, "ii",
               fontName="Helvetica-Bold", fontSize=9.5,
               textColor=BLACK, spaceAfter=2), ""],
            [P(ix["description"], "id",
               fontSize=8.5, textColor=BLACK, leading=12), ""],
            [P(f"Recommendation:  {ix['recommendation']}", "ir",
               fontName="Helvetica-Oblique", fontSize=8.5,
               textColor=fg, leading=12), ""],
        ], colWidths=[CONTENT_W*0.55, CONTENT_W*0.45])

        block.setStyle(TableStyle([
            ("BACKGROUND",    (0,0), (-1,-1), bg),
            ("SPAN",          (0,1), (-1,1)),
            ("SPAN",          (0,2), (-1,2)),
            ("SPAN",          (0,3), (-1,3)),
            ("BOX",           (0,0), (-1,-1), 1.0, bd),
            ("LINEBELOW",     (0,0), (-1,0), 0.4, bd),
            ("TOPPADDING",    (0,0), (-1,-1), 5),
            ("BOTTOMPADDING", (0,0), (-1,-1), 5),
            ("LEFTPADDING",   (0,0), (-1,-1), 8),
            ("RIGHTPADDING",  (0,0), (-1,-1), 8),
            ("VALIGN",        (0,0), (-1,-1), "TOP"),
        ]))
        story.append(KeepTogether([block, Spacer(1, 4)]))

    return story


# ── SECTION 4: Reformulation ────────────────────────────────────
def build_reformulation(notes):
    story = section_head("4", "Reformulation Recommendations")

    if not notes:
        story.append(P("\u2713  No reformulation recommendations required for this formula.",
                       "ok", fontName="Helvetica-Bold", fontSize=9,
                       textColor=GREEN_FG, spaceAfter=4))
        return story

    story.append(P(
        "Ingredients below the WELL SUPPORTED threshold or with documented concerns. "
        "Substitution or concentration review recommended.",
        "d4", textColor=GRAY_TEXT, spaceAfter=5))

    def hdr(t): return P(t, "rh", fontName="Helvetica-Bold",
                          fontSize=8, textColor=WHITE)
    rows = [[hdr("Ingredient"), hdr("Issue"), hdr("Recommendation")]]
    for n in notes:
        rows.append([
            P(n["ingredient"].title(), "rn",
              fontName="Helvetica-Bold", fontSize=8.5, textColor=RED_FG),
            P((n.get("issue") or "")[:130], "ri",
              fontSize=8.5, textColor=BLACK, leading=12),
            P(n.get("suggestion",""), "rs",
              fontName="Helvetica-Oblique", fontSize=8.5,
              textColor=NAVY, leading=12),
        ])

    rt = Table(rows, colWidths=[CONTENT_W*0.19, CONTENT_W*0.38, CONTENT_W*0.43],
               repeatRows=1)
    rt.setStyle(TableStyle([
        ("BACKGROUND",    (0,0), (-1,0), NAVY),
        ("ROWBACKGROUNDS",(0,1), (-1,-1), [RED_BG, AMBER_BG]),
        ("INNERGRID",     (0,0), (-1,-1), 0.3, GRAY_BD),
        ("BOX",           (0,0), (-1,-1), 0.7, RED_BD),
        ("TOPPADDING",    (0,0), (-1,-1), 5),
        ("BOTTOMPADDING", (0,0), (-1,-1), 5),
        ("LEFTPADDING",   (0,0), (-1,-1), 6),
        ("RIGHTPADDING",  (0,0), (-1,-1), 6),
        ("VALIGN",        (0,0), (-1,-1), "TOP"),
    ]))
    story.append(rt)
    story.append(Spacer(1, 0.1*cm))
    return story


# ── SECTION 5: Methodology ──────────────────────────────────────
def build_methodology():
    story = section_head("5", "Methodology")
    story.append(P(
        "NOURA evaluates each ingredient by retrieving up to 50 peer-reviewed studies "
        "from PubMed and classifying each by evidence type (9-tier hierarchy) and "
        "direction (SAFETY / CONCERN / NEUTRAL). Scores use direction-aware weighting "
        "with sample-size multipliers.",
        "d5", textColor=GRAY_TEXT, spaceAfter=6))

    tiers = [
        ["Tier", "Evidence Type", "Weight"],
        ["1", "Systematic Review / Meta-Analysis", "1.00"],
        ["2", "Regulatory Opinion (CIR, SCCS, FDA)", "0.75"],
        ["3", "Randomised Controlled Trial (RCT)", "0.70"],
        ["4", "Cohort / Observational Study", "0.60"],
        ["5", "Case-Control Study", "0.50"],
        ["6", "Case Report / Series", "0.40"],
        ["7", "In Vitro / Lab Study", "0.30"],
        ["8", "Animal Study", "0.25"],
        ["9", "Expert Opinion / Review", "0.20"],
    ]
    tt = Table(tiers,
               colWidths=[CONTENT_W*0.10, CONTENT_W*0.72, CONTENT_W*0.18])
    tt.setStyle(TableStyle([
        ("BACKGROUND",    (0,0), (-1,0), NAVY),
        ("TEXTCOLOR",     (0,0), (-1,0), WHITE),
        ("FONTNAME",      (0,0), (-1,0), "Helvetica-Bold"),
        ("FONTNAME",      (0,1), (-1,-1), "Helvetica"),
        ("FONTSIZE",      (0,0), (-1,-1), 8.5),
        ("ALIGN",         (0,0), (0,-1), "CENTER"),
        ("ALIGN",         (2,0), (2,-1), "CENTER"),
        ("FONTNAME",      (2,1), (2,-1), "Courier"),
        ("TEXTCOLOR",     (2,1), (2,-1), NAVY),
        ("TOPPADDING",    (0,0), (-1,-1), 4),
        ("BOTTOMPADDING", (0,0), (-1,-1), 4),
        ("LEFTPADDING",   (0,0), (-1,-1), 7),
        ("INNERGRID",     (0,0), (-1,-1), 0.3, GRAY_BD),
        ("BOX",           (0,0), (-1,-1), 0.7, GRAY_BD),
        ("ROWBACKGROUNDS",(0,1), (-1,-1), [WHITE, GRAY_LIGHT]),
    ]))
    story.append(tt)
    story.append(Spacer(1, 4))
    story.append(P(
        "Scoring: safety studies → full weight; concern studies → 0.5x penalty; "
        "neutral → 0.5x. Sample-size multipliers: n≥1,000 (×1.5), n≥100 (×1.25), "
        "n<30 (×0.75). Hard override: majority concern signals cap score at 45/100.",
        "mn", fontSize=8, textColor=GRAY_TEXT, leading=12))
    return story


# ── MASTER BUILDER ──────────────────────────────────────────────
def generate_report(api_response, output_path, client_name=""):
    name        = api_response.get("product_name", "Unknown Product")
    avg_score   = api_response.get("average_score", 0)
    verdict     = api_response.get("product_verdict", "")
    higher_risk = api_response.get("higher_risk_count", 0)
    ingredients = api_response.get("ingredients", [])
    interactions= api_response.get("interactions", [])
    int_summary = api_response.get("interaction_summary",
                                   {"total":0,"high":0,"moderate":0,"low":0})
    refo_notes  = api_response.get("reformulation_notes", [])
    extract     = api_response.get("extract", {})
    report_date = datetime.now().strftime("%d %B %Y")

    doc = SimpleDocTemplate(
        output_path, pagesize=A4,
        topMargin=1.5*cm, bottomMargin=1.5*cm,
        leftMargin=MARGIN, rightMargin=MARGIN,
        title=f"NOURA Evaluation — {name}",
        author="NOURA AI",
    )
    hf = HF(name, report_date)

    story  = build_cover(name, avg_score, verdict,
                         higher_risk, report_date, client_name)
    story += build_extract(extract)
    story += build_ingredients(ingredients)
    story += build_interactions(interactions, int_summary)
    story += build_reformulation(refo_notes)
    story.append(PageBreak())
    story += build_methodology()

    doc.build(story, onFirstPage=hf, onLaterPages=hf)
    print(f"  {output_path}")
    return output_path


# ================================================================
# SAMPLE DATA
# ================================================================
weleda_response = {
    "product_name": "Weleda Skin Food Original Ultra-Rich Cream",
    "average_score": 84.2,
    "higher_risk_count": 0,
    "product_verdict": "CLEAN FORMULATION",
    "interaction_summary": {"total":0,"high":0,"moderate":0,"low":0},
    "ingredients": [
        {"ingredient":"glycerin","health_score":85,"verdict":"WELL SUPPORTED",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base","flag":None},
        {"ingredient":"tocopherol","health_score":78,"verdict":"WELL SUPPORTED",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base",
         "flag":"Rare contact sensitisation in susceptible individuals"},
        {"ingredient":"xanthan gum","health_score":84,"verdict":"WELL SUPPORTED",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base","flag":None},
        {"ingredient":"sodium hyaluronate","health_score":88,"verdict":"WELL SUPPORTED",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base","flag":None},
        {"ingredient":"citric acid","health_score":82,"verdict":"WELL SUPPORTED",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base",
         "flag":"Can cause irritation at high concentrations in sensitive skin"},
        {"ingredient":"beeswax","health_score":86,"verdict":"WELL SUPPORTED",
         "confidence_label":"HIGH","evidence_source":"NOURA Knowledge Base","flag":None},
        {"ingredient":"sunflower seed oil","health_score":83,"verdict":"WELL SUPPORTED",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base","flag":None},
        {"ingredient":"rosemary extract","health_score":76,"verdict":"WELL SUPPORTED",
         "confidence_label":"HIGH","evidence_source":"NOURA Knowledge Base",
         "flag":"May cause sensitisation in fragrance-allergic individuals"},
    ],
    "interactions": [],
    "reformulation_notes": [],
    "extract": {
        "score":84.2,"color_label":"CLEAN",
        "plain_verdict":"Well-formulated product with strong safety evidence.",
        "flags":[],"alternative_signal":None,
    }
}

concerning_response = {
    "product_name": "Luxury Renewal Night Cream — Reformulation Candidate",
    "average_score": 58.4,
    "higher_risk_count": 2,
    "product_verdict": "FORMULATION CONCERNS",
    "interaction_summary": {"total":4,"high":2,"moderate":2,"low":0},
    "ingredients": [
        {"ingredient":"glycerin","health_score":85,"verdict":"WELL SUPPORTED",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base","flag":None},
        {"ingredient":"niacinamide","health_score":82,"verdict":"WELL SUPPORTED",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base","flag":None},
        {"ingredient":"retinol","health_score":68,"verdict":"LIMITED SUPPORT",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base",
         "flag":"EU concentration restrictions (0.3% face) | Contraindicated in pregnancy | Photosensitising"},
        {"ingredient":"ascorbic acid","health_score":72,"verdict":"WELL SUPPORTED",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base",
         "flag":"Stability concerns — degrades rapidly if poorly formulated"},
        {"ingredient":"sodium benzoate","health_score":65,"verdict":"LIMITED SUPPORT",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base",
         "flag":"Reacts with ascorbic acid to form benzene — avoid combination"},
        {"ingredient":"fragrance","health_score":28,"verdict":"HIGHER RISK",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base",
         "flag":"Undisclosed mixture — up to 3,000 chemicals | Leading cause of cosmetic contact allergy"},
        {"ingredient":"parabens","health_score":18,"verdict":"HIGHER RISK",
         "confidence_label":"VERY HIGH","evidence_source":"NOURA Knowledge Base",
         "flag":"Endocrine disruption confirmed | EU restrictions on butyl/propylparaben"},
    ],
    "interactions": [
        {"severity":"HIGH","type":"chemical reaction",
         "matched_ingredients":["sodium benzoate","ascorbic acid"],
         "description":"Sodium benzoate reacts with ascorbic acid in the presence of light and heat to form benzene, a carcinogen classified by IARC as Group 1.",
         "recommendation":"Remove sodium benzoate. Use phenoxyethanol or ethylhexylglycerin as preservative alternatives."},
        {"severity":"HIGH","type":"cumulative endocrine risk",
         "matched_ingredients":["parabens","fragrance"],
         "description":"Both parabens and common fragrance components are documented endocrine disruptors. Combined exposure significantly increases total endocrine burden.",
         "recommendation":"Replace both. Use phenoxyethanol for preservation and reformulate fragrance-free or with disclosed, IFRA-compliant essential oils."},
        {"severity":"MODERATE","type":"pH conflict",
         "matched_ingredients":["retinol","ascorbic acid"],
         "description":"Vitamin C requires acidic pH (below 3.5) for stability while retinol performs optimally at neutral pH. Combining destabilises both actives.",
         "recommendation":"Separate into AM (vitamin C) and PM (retinol) products, or substitute retinyl palmitate as a pH-stable retinoid form."},
        {"severity":"MODERATE","type":"efficacy reduction",
         "matched_ingredients":["niacinamide","ascorbic acid"],
         "description":"At high concentrations, niacinamide and ascorbic acid can react to form nicotinic acid, potentially causing transient flushing.",
         "recommendation":"Keep both below 10% concentration. Minimal concern in well-formulated products with correct pH."},
    ],
    "reformulation_notes": [
        {"ingredient":"parabens",
         "issue":"Endocrine disruption confirmed | EU restrictions on butyl/propylparaben",
         "suggestion":"Replace with phenoxyethanol 0.5–1.0% or ethylhexylglycerin 0.3–0.5% for equivalent preservation efficacy."},
        {"ingredient":"fragrance",
         "issue":"Undisclosed mixture — leading cause of contact allergy | Endocrine disruption risk",
         "suggestion":"Reformulate fragrance-free or replace with disclosed, allergen-screened essential oils at IFRA-compliant concentrations."},
        {"ingredient":"sodium benzoate",
         "issue":"Reacts with ascorbic acid to form benzene — critical incompatibility",
         "suggestion":"Remove entirely. Benzene formation risk is unacceptable in any leave-on cosmetic containing ascorbic acid."},
    ],
    "extract": {
        "score":58.4,"color_label":"HIGHER RISK",
        "plain_verdict":"Contains ingredients with documented safety concerns.",
        "flags":[
            "Sodium Benzoate + Ascorbic Acid: Forms benzene (carcinogen) under light/heat exposure.",
            "Parabens + Fragrance: Combined endocrine disruption burden — avoid together.",
            "Parabens: Endocrine disruption confirmed. EU restrictions apply.",
        ],
        "alternative_signal":"ask for options without Parabens and Fragrance.",
    }
}


if __name__ == "__main__":
    print("Generating NOURA Enterprise PDF Reports v3...")
    generate_report(weleda_response,
                    "/home/claude/NOURA_Weleda_v3.pdf",
                    client_name="Aman Group — Spa & Wellness")
    generate_report(concerning_response,
                    "/home/claude/NOURA_Reformulation_v3.pdf",
                    client_name="NOURA Enterprise — Brand Audit Sample")
    print("Done.")

Overwriting /content/noura_pdf_v3.py


In [50]:
!pip install reportlab pillow -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/2.0 MB[0m [31m8.3 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.9/2.0 MB[0m [31m29.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [51]:
# NOURA — Cell 23: Generate Enterprise PDF Report
import sys
sys.path.insert(0, '/content')
from noura_pdf_v3 import generate_report

# Example: generate a report from a live API response
# Replace 'weleda_response' with any real api_response from noura_api_v2()
from noura_pdf_v3 import weleda_response, concerning_response

generate_report(weleda_response,
                "/content/NOURA_Weleda_Report.pdf",
                client_name="Aman Group — Spa & Wellness")

generate_report(concerning_response,
                "/content/NOURA_Reformulation_Report.pdf",
                client_name="NOURA Enterprise — Brand Audit Sample")

# Download both
from google.colab import files
files.download("/content/NOURA_Weleda_Report.pdf")
files.download("/content/NOURA_Reformulation_Report.pdf")

  /content/NOURA_Weleda_Report.pdf
  /content/NOURA_Reformulation_Report.pdf


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [52]:
%%writefile /content/noura_config.py
"""
NOURA Product Impact Scanner
Cell 24 — Master Configuration
Locked weights, evidence sources, and category definitions.
All scoring modules import from here. Change here = changes everywhere.
"""

# ═══════════════════════════════════════════════════════════════
# SECTION 1: PRODUCT CATEGORIES
# ═══════════════════════════════════════════════════════════════

PRODUCT_CATEGORIES = {
    "cosmetics": {
        "label": "Cosmetics & Personal Care",
        "includes": [
            "skincare", "body care", "haircare", "makeup", "sunscreen",
            "oral care", "deodorant", "fragrance", "feminine hygiene",
            "shaving", "nail care"
        ],
        "description": "Topically applied products — highest animal testing concern"
    },
    "food": {
        "label": "Food & Beverages",
        "includes": [
            "packaged food", "beverages", "supplements", "vitamins",
            "protein powder", "snacks", "dairy", "plant-based",
            "baby food", "meal replacement"
        ],
        "description": "Ingestible products — highest health and nutrition concern"
    },
    "cleaning": {
        "label": "Household Cleaning Products",
        "includes": [
            "all-purpose cleaner", "laundry detergent", "dish soap",
            "floor cleaner", "bathroom cleaner", "glass cleaner",
            "disinfectant", "bleach", "fabric softener", "drain cleaner"
        ],
        "description": "Chemical-heavy products — high environmental and VOC concern"
    },
    "baby": {
        "label": "Baby & Child Products",
        "includes": [
            "baby skincare", "baby shampoo", "baby lotion", "baby wipes",
            "diaper cream", "baby sunscreen", "baby food", "formula",
            "teething products", "baby cleaning products"
        ],
        "description": "Highest scrutiny — most vulnerable population"
    }
}


# ═══════════════════════════════════════════════════════════════
# SECTION 2: LOCKED DIMENSION WEIGHTS
# Health is always 70%. Never changes. Non-negotiable.
# ═══════════════════════════════════════════════════════════════

DIMENSION_WEIGHTS = {
    "cosmetics": {
        "health":       0.70,   # Toxicology, endocrine disruptors, allergens
        "animal":       0.15,   # Animal testing — elevated for this category
        "environment":  0.10,   # Packaging, biodegradability
        "governance":   0.05,   # Supply chain, certifications
    },
    "food": {
        "health":       0.70,   # Nutrition, additives, pesticides, contaminants
        "environment":  0.15,   # Eco-score, packaging, carbon footprint
        "animal":       0.10,   # Factory farming, welfare standards
        "governance":   0.05,   # Fair trade, labor practices
    },
    "cleaning": {
        "health":       0.70,   # VOCs, skin/respiratory hazards, toxicity
        "environment":  0.15,   # Aquatic toxicity, biodegradability, packaging
        "animal":       0.10,   # Animal testing, wildlife impact
        "governance":   0.05,   # Certifications, transparency
    },
    "baby": {
        "health":       0.70,   # Strictest standards — most vulnerable users
        "environment":  0.15,   # Packaging, materials safety
        "animal":       0.10,   # Animal testing
        "governance":   0.05,   # Supply chain safety, certifications
    }
}

# Validation — all weights must sum to 1.0
for cat, weights in DIMENSION_WEIGHTS.items():
    total = sum(weights.values())
    assert abs(total - 1.0) < 0.001, f"Weights for {cat} sum to {total}, not 1.0"


# ═══════════════════════════════════════════════════════════════
# SECTION 3: EVIDENCE SOURCES PER DIMENSION
# Each dimension pulls from specific authoritative databases.
# ═══════════════════════════════════════════════════════════════

EVIDENCE_SOURCES = {

    "health": {
        "primary": [
            {
                "name": "PubMed",
                "type": "scientific_literature",
                "url": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/",
                "covers": ["cosmetics", "food", "cleaning", "baby"],
                "data": "peer-reviewed studies, toxicology, clinical trials",
                "status": "✅ BUILT (Weeks 1-10)",
                "weight_in_health": 0.40
            },
            {
                "name": "ECHA — European Chemicals Agency",
                "type": "regulatory",
                "url": "https://echa.europa.eu/information-on-chemicals",
                "covers": ["cosmetics", "cleaning", "baby"],
                "data": "SVHC list, hazard classifications, REACH compliance",
                "status": "🔲 TO BUILD — Week 11",
                "weight_in_health": 0.25
            },
            {
                "name": "EU SCCS — Scientific Committee on Consumer Safety",
                "type": "regulatory",
                "url": "https://ec.europa.eu/health/scientific_committees/consumer_safety",
                "covers": ["cosmetics", "baby"],
                "data": "cosmetic ingredient safety opinions",
                "status": "🔲 TO BUILD — Week 11",
                "weight_in_health": 0.15
            },
            {
                "name": "CIR — Cosmetic Ingredient Review",
                "type": "industry_safety",
                "url": "https://cir-safety.org",
                "covers": ["cosmetics", "baby"],
                "data": "ingredient safety assessments, concentration limits",
                "status": "🔲 TO BUILD — Week 11",
                "weight_in_health": 0.10
            },
            {
                "name": "FDA OpenFDA",
                "type": "regulatory",
                "url": "https://api.fda.gov",
                "covers": ["food", "cosmetics", "baby"],
                "data": "product recalls, adverse events, enforcement actions",
                "status": "🔲 TO BUILD — Week 11",
                "weight_in_health": 0.10
            },
            {
                "name": "EFSA — European Food Safety Authority",
                "type": "regulatory",
                "url": "https://www.efsa.europa.eu",
                "covers": ["food", "baby"],
                "data": "food additives, contaminants, pesticide residues",
                "status": "🔲 TO BUILD — Week 12",
                "weight_in_health": 0.00   # food only — overridden per category
            },
        ],
        "noura_kb": {
            "name": "NOURA Knowledge Base",
            "status": "✅ BUILT — 17 ingredients",
            "target": "500+ ingredients by Week 14",
            "weight_in_health": "fills gaps when API returns no data"
        }
    },

    "environment": {
        "primary": [
            {
                "name": "Open Food Facts — Eco-Score",
                "type": "open_database",
                "url": "https://world.openfoodfacts.org/api/v2",
                "covers": ["food", "baby"],
                "data": "packaging, carbon footprint, biodiversity impact",
                "status": "🔲 TO BUILD — Week 12",
            },
            {
                "name": "ECHA — Environmental Hazards",
                "type": "regulatory",
                "url": "https://echa.europa.eu",
                "covers": ["cleaning", "cosmetics"],
                "data": "aquatic toxicity, biodegradability, persistence",
                "status": "🔲 TO BUILD — Week 12",
            },
            {
                "name": "EU Ecolabel Database",
                "type": "certification",
                "url": "https://ecolabel.eu",
                "covers": ["cleaning", "cosmetics"],
                "data": "certified eco-friendly products",
                "status": "🔲 TO BUILD — Week 12",
            },
        ]
    },

    "animal": {
        "primary": [
            {
                "name": "Leaping Bunny",
                "type": "certification",
                "url": "https://www.leapingbunny.org/guide/companies",
                "covers": ["cosmetics", "cleaning", "baby"],
                "data": "cruelty-free certified brands and products",
                "status": "🔲 TO BUILD — Week 13",
            },
            {
                "name": "PETA Beauty Without Bunnies",
                "type": "certification",
                "url": "https://www.peta.org/living/personal-care-fashion/beauty-without-bunnies/",
                "covers": ["cosmetics", "cleaning"],
                "data": "animal testing status by brand",
                "status": "🔲 TO BUILD — Week 13",
            },
            {
                "name": "Vegan Society Trademark",
                "type": "certification",
                "url": "https://www.vegansociety.com/the-vegan-trademark",
                "covers": ["cosmetics", "food", "cleaning"],
                "data": "vegan-certified products",
                "status": "🔲 TO BUILD — Week 13",
            },
        ]
    },

    "governance": {
        "primary": [
            {
                "name": "B Corp Directory",
                "type": "certification",
                "url": "https://www.bcorporation.net/en-us/find-a-b-corp/",
                "covers": ["cosmetics", "food", "cleaning", "baby"],
                "data": "certified B corporations — supply chain, labor, environment",
                "status": "🔲 TO BUILD — Week 13",
            },
            {
                "name": "Fair Trade Certified",
                "type": "certification",
                "url": "https://www.fairtradecertified.org",
                "covers": ["food", "cosmetics"],
                "data": "fair labor practices, ethical sourcing",
                "status": "🔲 TO BUILD — Week 13",
            },
        ]
    }
}


# ═══════════════════════════════════════════════════════════════
# SECTION 4: SCORING THRESHOLDS
# ═══════════════════════════════════════════════════════════════

SCORE_THRESHOLDS = {
    "CLEAN":        (80, 100),   # Green — recommend freely
    "ACCEPTABLE":   (60, 79),    # Amber — minor concerns noted
    "CAUTION":      (40, 59),    # Orange — significant concerns
    "HIGHER_RISK":  (0,  39),    # Red — reformulation recommended
}

# Baby products get stricter thresholds — same score, higher bar
BABY_SCORE_PENALTY = -10   # Subtract 10 from raw score before threshold lookup

# ═══════════════════════════════════════════════════════════════
# SECTION 5: BUILD ROADMAP
# ═══════════════════════════════════════════════════════════════

BUILD_ROADMAP = {
    "Week 11": {
        "goal": "Health dimension — expand from PubMed-only to full multi-source engine",
        "sources": ["ECHA", "EU SCCS", "CIR", "FDA OpenFDA"],
        "categories": ["cosmetics", "baby"],   # highest health risk first
        "deliverable": "noura_health_engine.py — queries all health sources per ingredient"
    },
    "Week 12": {
        "goal": "Environment dimension + Food category health sources",
        "sources": ["Open Food Facts", "ECHA environmental", "EU Ecolabel", "EFSA"],
        "categories": ["food", "cleaning"],
        "deliverable": "noura_environment_engine.py + food health module"
    },
    "Week 13": {
        "goal": "Animal Welfare + Governance dimensions",
        "sources": ["Leaping Bunny", "PETA", "Vegan Society", "B Corp", "Fair Trade"],
        "categories": ["all"],
        "deliverable": "noura_animal_engine.py + noura_governance_engine.py"
    },
    "Week 14": {
        "goal": "Master aggregator — combine all 4 dimensions into unified score",
        "sources": ["all"],
        "categories": ["all"],
        "deliverable": "noura_scanner.py — single function that returns complete 4-dimension score"
    },
    "Week 15": {
        "goal": "Demo website — B2B portal for client pilots",
        "deliverable": "Hosted URL, live product evaluation, PDF download"
    }
}


# ═══════════════════════════════════════════════════════════════
# SECTION 6: QUICK REFERENCE
# ═══════════════════════════════════════════════════════════════

def get_weights(category: str) -> dict:
    """Return dimension weights for a given product category."""
    if category not in DIMENSION_WEIGHTS:
        raise ValueError(f"Unknown category '{category}'. "
                         f"Choose from: {list(DIMENSION_WEIGHTS.keys())}")
    return DIMENSION_WEIGHTS[category]

def get_sources(dimension: str) -> list:
    """Return evidence sources for a given dimension."""
    if dimension not in EVIDENCE_SOURCES:
        raise ValueError(f"Unknown dimension '{dimension}'. "
                         f"Choose from: {list(EVIDENCE_SOURCES.keys())}")
    return EVIDENCE_SOURCES[dimension].get("primary", [])

def get_verdict(score: float, category: str = "cosmetics") -> str:
    """Return verdict label for a given score."""
    adjusted = score - (BABY_SCORE_PENALTY if category == "baby" else 0)
    for verdict, (low, high) in SCORE_THRESHOLDS.items():
        if low <= adjusted <= high:
            return verdict
    return "HIGHER_RISK"


# ═══════════════════════════════════════════════════════════════
# SELF-TEST
# ═══════════════════════════════════════════════════════════════
if __name__ == "__main__":
    print("NOURA Master Config — Self Test\n")

    for cat in PRODUCT_CATEGORIES:
        w = get_weights(cat)
        print(f"{cat.upper()}: Health {w['health']*100:.0f}% | "
              f"Environment {w['environment']*100:.0f}% | "
              f"Animal {w['animal']*100:.0f}% | "
              f"Governance {w['governance']*100:.0f}%")

    print("\nWeek 11 goal:", BUILD_ROADMAP["Week 11"]["goal"])
    print("Sources to build:", BUILD_ROADMAP["Week 11"]["sources"])
    print("\nVerdict test — score 85, cosmetics:", get_verdict(85, "cosmetics"))
    print("Verdict test — score 85, baby:", get_verdict(85, "baby"))
    print("\n✅ Config locked and ready.")

Writing /content/noura_config.py


In [53]:
%%writefile /content/noura_health_echa.py
"""
NOURA Health Engine — ECHA Module
Cell 25

Queries the European Chemicals Agency (ECHA) for:
- SVHC (Substances of Very High Concern) list
- Hazard classifications (CMR: carcinogenic, mutagenic, reprotoxic)
- REACH compliance status
- Endocrine disruptor flags

Covers: cosmetics, cleaning, baby products
Weight in Health score: 25%
"""

import requests
import time
import json
from datetime import datetime, timedelta

# ── ECHA endpoints ─────────────────────────────────────────────
ECHA_CHEM_API   = "https://chem.echa.europa.eu/api/substance/v1/search"
ECHA_SVHC_URL   = "https://echa.europa.eu/candidate-list-table"

# ECHA's REST API for substance lookup
ECHA_SEARCH_URL = "https://chem.echa.europa.eu/api/substance/v1/search"
ECHA_INFO_URL   = "https://chem.echa.europa.eu/api/substance/v1"

# ── SVHC hardcoded list (current as of Feb 2026) ───────────────
# This is ECHA's Candidate List — substances identified as SVHC.
# We embed the most relevant cosmetic/cleaning ones as a fast-lookup
# fallback when the API is unavailable.
# Full list: https://echa.europa.eu/candidate-list-table

SVHC_HARDCODED = {
    # Endocrine disruptors
    "bisphenol a":              {"hazard": "CMR + ED", "concern": "endocrine disruption, reproductive toxicity"},
    "bpa":                      {"hazard": "CMR + ED", "concern": "endocrine disruption, reproductive toxicity"},
    "diethylstilbestrol":       {"hazard": "CMR",      "concern": "carcinogen, endocrine disruptor"},

    # Parabens flagged by ECHA
    "butylparaben":             {"hazard": "ED",       "concern": "endocrine disruption, reproductive toxicity"},
    "propylparaben":            {"hazard": "ED",       "concern": "endocrine disruption"},
    "isopropylparaben":         {"hazard": "ED",       "concern": "endocrine disruption"},
    "isobutylparaben":          {"hazard": "ED",       "concern": "endocrine disruption"},
    "benzylparaben":            {"hazard": "ED",       "concern": "endocrine disruption"},

    # Phthalates
    "dibutyl phthalate":        {"hazard": "CMR",      "concern": "reproductive toxicity, endocrine disruption"},
    "dbp":                      {"hazard": "CMR",      "concern": "reproductive toxicity"},
    "dihexyl phthalate":        {"hazard": "CMR",      "concern": "reproductive toxicity"},
    "bis(2-ethylhexyl) phthalate": {"hazard": "CMR",  "concern": "reproductive toxicity"},
    "dehp":                     {"hazard": "CMR",      "concern": "reproductive toxicity"},
    "diisopentyl phthalate":    {"hazard": "CMR",      "concern": "reproductive toxicity"},

    # Heavy metals
    "lead":                     {"hazard": "CMR",      "concern": "neurotoxin, reproductive toxicity, carcinogen"},
    "lead compounds":           {"hazard": "CMR",      "concern": "neurotoxin, carcinogen"},
    "cadmium":                  {"hazard": "CMR",      "concern": "carcinogen, kidney toxicity"},
    "arsenic":                  {"hazard": "CMR",      "concern": "carcinogen"},
    "mercury":                  {"hazard": "CMR",      "concern": "neurotoxin"},
    "chromium vi":              {"hazard": "CMR",      "concern": "carcinogen, skin sensitiser"},

    # Formaldehyde and releasers
    "formaldehyde":             {"hazard": "CMR",      "concern": "carcinogen (IARC Group 1), skin sensitiser"},
    "dmdm hydantoin":           {"hazard": "RELEASER", "concern": "formaldehyde releaser — carcinogen risk"},
    "quaternium-15":            {"hazard": "RELEASER", "concern": "formaldehyde releaser"},
    "imidazolidinyl urea":      {"hazard": "RELEASER", "concern": "formaldehyde releaser"},
    "diazolidinyl urea":        {"hazard": "RELEASER", "concern": "formaldehyde releaser"},
    "2-bromo-2-nitropropane-1,3-diol": {"hazard": "RELEASER", "concern": "formaldehyde releaser"},

    # PAHs
    "anthracene":               {"hazard": "CMR",      "concern": "carcinogen, PBT substance"},
    "benzo[a]pyrene":           {"hazard": "CMR",      "concern": "carcinogen (IARC Group 1)"},

    # UV filters flagged
    "4-methylbenzylidene camphor": {"hazard": "ED",   "concern": "endocrine disruption"},
    "benzophenone-1":           {"hazard": "ED",       "concern": "endocrine disruption"},
    "benzophenone-3":           {"hazard": "ED",       "concern": "endocrine disruption, skin sensitiser"},
    "homosalate":               {"hazard": "ED",       "concern": "endocrine disruption"},

    # Cleaning-specific
    "sodium dichromate":        {"hazard": "CMR",      "concern": "carcinogen, reproductive toxicity"},
    "trichloroethylene":        {"hazard": "CMR",      "concern": "carcinogen (IARC Group 1)"},
    "1,4-dioxane":              {"hazard": "CMR",      "concern": "probable carcinogen, contaminant in ethoxylated ingredients"},

    # Fragrance allergens (ECHA flagged)
    "musk ambrette":            {"hazard": "BANNED",   "concern": "banned EU cosmetics — neurotoxin"},
    "musk tibetene":            {"hazard": "BANNED",   "concern": "banned EU cosmetics"},
    "6-methylcoumarin":         {"hazard": "BANNED",   "concern": "banned EU cosmetics"},
}

# Hazard severity → score deduction mapping
ECHA_HAZARD_DEDUCTIONS = {
    "CMR":      -25,   # Carcinogenic/Mutagenic/Reprotoxic — severe
    "CMR + ED": -30,   # Both CMR and endocrine disruptor — most severe
    "ED":       -20,   # Endocrine disruptor only
    "RELEASER": -15,   # Formaldehyde releaser
    "BANNED":   -35,   # Banned in EU — automatic major penalty
    "PBT":      -20,   # Persistent, Bioaccumulative, Toxic
    "vPvB":     -15,   # Very Persistent, very Bioaccumulative
}


def check_ingredient_echa(ingredient_name: str) -> dict:
    """
    Check a single ingredient against ECHA SVHC list.
    Returns a result dict with hazard info and score impact.

    Steps:
    1. Normalize ingredient name
    2. Check hardcoded SVHC list (fast, offline)
    3. Try ECHA REST API (live, more comprehensive)
    4. Return combined result
    """
    name_lower = ingredient_name.lower().strip()

    result = {
        "ingredient":     ingredient_name,
        "source":         "ECHA",
        "svhc_listed":    False,
        "hazard_class":   None,
        "concern":        None,
        "score_impact":   0,
        "confidence":     "LOW",
        "api_checked":    False,
        "timestamp":      datetime.now().isoformat()
    }

    # ── Step 1: Check hardcoded SVHC list ─────────────────────
    for svhc_name, svhc_data in SVHC_HARDCODED.items():
        if svhc_name in name_lower or name_lower in svhc_name:
            result.update({
                "svhc_listed":  True,
                "hazard_class": svhc_data["hazard"],
                "concern":      svhc_data["concern"],
                "score_impact": ECHA_HAZARD_DEDUCTIONS.get(svhc_data["hazard"], -10),
                "confidence":   "HIGH",
                "lookup_method": "SVHC_hardcoded"
            })
            return result

    # ── Step 2: Try ECHA REST API ──────────────────────────────
    try:
        response = requests.get(
            ECHA_SEARCH_URL,
            params={"name": ingredient_name, "type": "search"},
            timeout=5,
            headers={"Accept": "application/json"}
        )
        result["api_checked"] = True

        if response.status_code == 200:
            data = response.json()
            substances = data.get("results", [])

            for substance in substances[:3]:   # check top 3 matches
                classifications = substance.get("classifications", [])
                for clf in classifications:
                    hazard_class = clf.get("hazardClass", "")

                    # Check for CMR classification
                    if any(x in hazard_class.upper() for x in
                           ["CARC", "MUTA", "REPR", "STOT"]):
                        result.update({
                            "svhc_listed":    True,
                            "hazard_class":   "CMR",
                            "concern":        f"ECHA classification: {hazard_class}",
                            "score_impact":   -25,
                            "confidence":     "HIGH",
                            "lookup_method":  "ECHA_API"
                        })
                        return result

                    # Check for endocrine disruptor
                    if "ENDOCRINE" in hazard_class.upper():
                        result.update({
                            "svhc_listed":    True,
                            "hazard_class":   "ED",
                            "concern":        "ECHA endocrine disruptor classification",
                            "score_impact":   -20,
                            "confidence":     "HIGH",
                            "lookup_method":  "ECHA_API"
                        })
                        return result

    except requests.exceptions.RequestException:
        # API unavailable — hardcoded list is sufficient for now
        pass

    # ── Step 3: Not found — ingredient is not SVHC listed ─────
    result.update({
        "svhc_listed":    False,
        "score_impact":   0,
        "confidence":     "MEDIUM" if result["api_checked"] else "LOW",
        "lookup_method":  "not_found"
    })
    return result


def check_formula_echa(ingredients: list) -> dict:
    """
    Check an entire ingredient list against ECHA.
    Returns aggregated results and total score impact.

    Args:
        ingredients: list of ingredient name strings

    Returns:
        {
            "svhc_found": [...],         # list of flagged ingredients
            "total_score_impact": int,   # sum of all deductions
            "highest_hazard": str,       # most severe hazard found
            "summary": str               # human-readable summary
        }
    """
    results = []
    total_impact = 0
    flagged = []

    for ingredient in ingredients:
        result = check_ingredient_echa(ingredient)
        results.append(result)

        if result["svhc_listed"]:
            flagged.append(result)
            total_impact += result["score_impact"]

        # Respect rate limits
        time.sleep(0.3)

    # Cap total deduction at -50 (don't double-penalise exhaustively)
    total_impact = max(total_impact, -50)

    # Find highest hazard
    hazard_priority = ["BANNED", "CMR + ED", "CMR", "ED", "RELEASER", "PBT"]
    highest_hazard = None
    for h in hazard_priority:
        if any(f["hazard_class"] == h for f in flagged):
            highest_hazard = h
            break

    # Build summary
    if not flagged:
        summary = "No ECHA SVHC substances detected."
    else:
        names = [f["ingredient"] for f in flagged]
        summary = (f"{len(flagged)} SVHC substance(s) detected: "
                   f"{', '.join(names)}. Score impact: {total_impact}")

    return {
        "source":           "ECHA",
        "ingredients_checked": len(ingredients),
        "svhc_found":       flagged,
        "svhc_count":       len(flagged),
        "total_score_impact": total_impact,
        "highest_hazard":   highest_hazard,
        "summary":          summary,
        "all_results":      results
    }


# ── Self-test ──────────────────────────────────────────────────
if __name__ == "__main__":
    print("NOURA — ECHA Health Engine Self-Test\n")

    test_ingredients = [
        "glycerin",
        "butylparaben",
        "formaldehyde",
        "sodium hyaluronate",
        "benzophenone-3",
        "tocopherol",
        "dmdm hydantoin",
        "aqua"
    ]

    print("Testing individual ingredients:")
    print("-" * 55)
    for ing in test_ingredients:
        r = check_ingredient_echa(ing)
        flag = "🔴 SVHC" if r["svhc_listed"] else "🟢 clear"
        impact = f"({r['score_impact']})" if r["svhc_listed"] else ""
        concern = f"— {r['concern']}" if r["concern"] else ""
        print(f"  {flag} {ing} {impact} {concern}")

    print("\nTesting full formula:")
    print("-" * 55)
    formula_result = check_formula_echa(test_ingredients)
    print(f"  Ingredients checked: {formula_result['ingredients_checked']}")
    print(f"  SVHC found:          {formula_result['svhc_count']}")
    print(f"  Total score impact:  {formula_result['total_score_impact']}")
    print(f"  Highest hazard:      {formula_result['highest_hazard']}")
    print(f"  Summary:             {formula_result['summary']}")
    print("\n✅ ECHA module ready.")

Writing /content/noura_health_echa.py


In [54]:
%%writefile /content/noura_health_fda.py
"""
NOURA Health Engine — FDA OpenFDA Module
Cell 26

Queries the FDA OpenFDA API for:
- Product recalls (cosmetics, food, baby products, cleaning)
- Adverse event reports (consumer complaints, injuries)
- Enforcement actions

Free API — no key required for basic use (1000 requests/hour)
Covers: cosmetics, food, baby products
Weight in Health score: 10%
"""

import requests
import time
from datetime import datetime, timedelta

# ── FDA OpenFDA endpoints ──────────────────────────────────────
FDA_BASE         = "https://api.fda.gov"
FDA_FOOD_ENFORCE = f"{FDA_BASE}/food/enforcement.json"
FDA_COSMETIC_ADV = f"{FDA_BASE}/cosmetics/events.json"

# ── Score deductions ───────────────────────────────────────────
FDA_RECALL_DEDUCTIONS = {
    "Class I":   -30,   # Serious harm — most severe
    "Class II":  -15,   # Temporary adverse consequences
    "Class III": -5,    # Unlikely to cause harm
}

FDA_ADVERSE_SEVERITY = {
    "high":     -20,    # 10+ adverse events reported
    "moderate": -10,    # 3–9 adverse events
    "low":      -5,     # 1–2 adverse events
}


def check_product_recalls(product_name: str, category: str = "cosmetics") -> dict:
    """
    Check if a product or brand has active FDA recalls.
    Returns recall status and score impact.
    """
    result = {
        "product":       product_name,
        "source":        "FDA OpenFDA — Recalls",
        "recalls_found": [],
        "recall_count":  0,
        "score_impact":  0,
        "confidence":    "LOW",
        "api_checked":   False,
        "timestamp":     datetime.now().isoformat()
    }

    try:
        params = {
            "search": f'product_description:"{product_name}"',
            "limit":  10,
            "sort":   "recall_initiation_date:desc"
        }

        response = requests.get(FDA_FOOD_ENFORCE, params=params, timeout=8)
        result["api_checked"] = True

        if response.status_code == 200:
            data = response.json()
            recalls = data.get("results", [])

            # Filter to last 3 years only
            cutoff = datetime.now() - timedelta(days=3 * 365)
            recent = []
            for recall in recalls:
                date_str = recall.get("recall_initiation_date", "")
                try:
                    if datetime.strptime(date_str, "%Y-%m-%d") > cutoff:
                        recent.append(recall)
                except (ValueError, TypeError):
                    recent.append(recall)

            total_impact = 0
            for recall in recent:
                cls = recall.get("classification", "Class III")
                deduction = FDA_RECALL_DEDUCTIONS.get(cls, -5)
                total_impact += deduction
                result["recalls_found"].append({
                    "date":           recall.get("recall_initiation_date"),
                    "classification": cls,
                    "reason":         recall.get("reason_for_recall", "")[:200],
                    "status":         recall.get("status", ""),
                    "deduction":      deduction
                })

            result.update({
                "recall_count": len(recent),
                "score_impact": max(total_impact, -40),
                "confidence":   "HIGH" if recent else "MEDIUM"
            })

        elif response.status_code == 404:
            result["confidence"] = "MEDIUM"  # No results = clean

    except requests.exceptions.RequestException as e:
        result["error"] = str(e)

    return result


def check_adverse_events(product_name: str) -> dict:
    """
    Check FDA cosmetics adverse event reports for a product.
    Returns event count and score impact.
    """
    result = {
        "product":       product_name,
        "source":        "FDA OpenFDA — Adverse Events",
        "event_count":   0,
        "score_impact":  0,
        "severity":      None,
        "top_reactions": [],
        "confidence":    "LOW",
        "api_checked":   False,
        "timestamp":     datetime.now().isoformat()
    }

    try:
        params = {
            "search": f'products.name:"{product_name}"',
            "limit":  5
        }

        response = requests.get(FDA_COSMETIC_ADV, params=params, timeout=8)
        result["api_checked"] = True

        if response.status_code == 200:
            data = response.json()
            events = data.get("results", [])
            count = data.get("meta", {}).get("results", {}).get("total", len(events))

            # Determine severity
            if count >= 10:
                severity = "high"
            elif count >= 3:
                severity = "moderate"
            elif count >= 1:
                severity = "low"
            else:
                severity = None

            # Extract top reaction types
            reactions = []
            for event in events[:3]:
                for reaction in event.get("reactions", [])[:2]:
                    if reaction not in reactions:
                        reactions.append(reaction)

            result.update({
                "event_count":   count,
                "score_impact":  FDA_ADVERSE_SEVERITY.get(severity, 0),
                "severity":      severity,
                "top_reactions": reactions[:5],
                "confidence":    "HIGH" if count > 0 else "MEDIUM"
            })

        elif response.status_code == 404:
            result["confidence"] = "MEDIUM"  # No events = clean

    except requests.exceptions.RequestException as e:
        result["error"] = str(e)

    return result


def check_product_fda(product_name: str, category: str = "cosmetics") -> dict:
    """
    Master FDA check — runs both recalls and adverse events,
    returns combined result with total score impact.

    This is the main function called by the health engine.
    """
    recalls  = check_product_recalls(product_name, category)
    adverse  = check_adverse_events(product_name)

    total_impact = recalls["score_impact"] + adverse["score_impact"]
    total_impact = max(total_impact, -45)   # cap combined deduction

    # Build flags list for report
    flags = []
    if recalls["recall_count"] > 0:
        for r in recalls["recalls_found"]:
            flags.append(f"FDA Recall ({r['classification']}): {r['reason'][:100]}")
    if adverse["event_count"] > 0:
        flags.append(
            f"FDA Adverse Events: {adverse['event_count']} reports"
            + (f" — {', '.join(adverse['top_reactions'])}" if adverse["top_reactions"] else "")
        )

    return {
        "product":          product_name,
        "source":           "FDA OpenFDA",
        "recall_count":     recalls["recall_count"],
        "adverse_count":    adverse["event_count"],
        "flags":            flags,
        "total_score_impact": total_impact,
        "confidence":       "HIGH" if (recalls["api_checked"] and adverse["api_checked"]) else "LOW",
        "details": {
            "recalls": recalls,
            "adverse": adverse
        }
    }


# ── Self-test ──────────────────────────────────────────────────
if __name__ == "__main__":
    print("NOURA — FDA OpenFDA Health Engine Self-Test\n")

    test_products = [
        ("Weleda Skin Food", "cosmetics"),
        ("Cetaphil Moisturizer", "cosmetics"),
        ("Similac Baby Formula", "baby"),
    ]

    for product, category in test_products:
        print(f"Checking: {product} ({category})")
        print("-" * 50)

        result = check_product_fda(product, category)

        recall_status = (f"🔴 {result['recall_count']} recall(s)"
                         if result["recall_count"] > 0 else "🟢 No recalls")
        adverse_status = (f"🔴 {result['adverse_count']} adverse event(s)"
                          if result["adverse_count"] > 0 else "🟢 No adverse events")

        print(f"  Recalls:        {recall_status}")
        print(f"  Adverse events: {adverse_status}")
        print(f"  Score impact:   {result['total_score_impact']}")
        print(f"  Confidence:     {result['confidence']}")
        if result["flags"]:
            for flag in result["flags"]:
                print(f"  ⚠  {flag}")
        print()

    print("✅ FDA module ready.")

Writing /content/noura_health_fda.py


In [55]:
%%writefile /content/noura_health_sccs.py
"""
NOURA Health Engine — EU SCCS Module
Cell 27

Queries the Scientific Committee on Consumer Safety (EU SCCS) data for:
- Cosmetic ingredient safety opinions
- Concentration limits and restrictions
- Banned and restricted substances under EU Cosmetics Regulation 1223/2009
- Ingredients requiring specific warnings

SCCS is the gold standard for cosmetic ingredient safety in Europe.
Every opinion is based on full dossier review by independent scientists.

Covers: cosmetics, baby products (strictest standards)
Weight in Health score: 15%
"""

import requests
import time
from datetime import datetime

# ── EU Cosmetics Regulation endpoints ─────────────────────────
# CosIng — EU Commission cosmetic ingredient database
COSING_API = "https://ec.europa.eu/growth/tools-databases/cosing/index.cfm"
COSING_SEARCH = "https://cosing-connect.zakopower.de/api/ingredients"  # unofficial mirror

# ── EU Annex classifications ───────────────────────────────────
# Under EU Cosmetics Regulation 1223/2009:
# Annex II  = PROHIBITED substances
# Annex III = RESTRICTED substances (with conditions/limits)
# Annex IV  = PERMITTED colorants
# Annex V   = PERMITTED preservatives
# Annex VI  = PERMITTED UV filters

EU_ANNEX_DEDUCTIONS = {
    "Annex II — Prohibited":    -40,   # Banned outright — never acceptable
    "Annex III — Restricted":   -15,   # Allowed only under specific conditions
    "SCCS — Not Safe":          -30,   # SCCS opinion: not safe for use
    "SCCS — Conditionally Safe": -10,  # Safe only at specific concentrations
    "Baby — Not Recommended":   -20,   # Safe for adults, not for children <3y
}

# ── SCCS hardcoded opinions (most relevant, current as of 2026) ─
# Source: SCCS opinions published at:
# https://health.ec.europa.eu/scientific-committees/scientific-committee-consumer-safety-sccs_en

SCCS_OPINIONS = {
    # ── BANNED (Annex II) ──────────────────────────────────────
    "lead acetate": {
        "annex": "Annex II — Prohibited",
        "opinion": "Prohibited in cosmetics — neurotoxin",
        "deduction": -40,
        "baby_safe": False
    },
    "mercury": {
        "annex": "Annex II — Prohibited",
        "opinion": "Prohibited — neurotoxin, bioaccumulative",
        "deduction": -40,
        "baby_safe": False
    },
    "hydroquinone": {
        "annex": "Annex II — Prohibited",
        "opinion": "Prohibited in cosmetics — cytotoxic, potential carcinogen",
        "deduction": -40,
        "baby_safe": False
    },
    "resorcinol": {
        "annex": "Annex III — Restricted",
        "opinion": "Restricted — endocrine disruption, skin sensitiser",
        "deduction": -15,
        "baby_safe": False
    },
    "kojic acid": {
        "annex": "Annex III — Restricted",
        "opinion": "Max 1% in face products — genotoxicity concerns",
        "deduction": -15,
        "baby_safe": False
    },

    # ── PARABENS ──────────────────────────────────────────────
    "methylparaben": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "Max 0.4% — endocrine disruption at higher concentrations",
        "deduction": -8,
        "baby_safe": False,
        "max_concentration": "0.4%"
    },
    "ethylparaben": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "Max 0.4% — moderate endocrine concern",
        "deduction": -8,
        "baby_safe": False,
        "max_concentration": "0.4%"
    },
    "propylparaben": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "Max 0.14% total — endocrine disruption",
        "deduction": -15,
        "baby_safe": False,
        "max_concentration": "0.14% combined with butylparaben"
    },
    "butylparaben": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "Max 0.14% total — endocrine disruption, reproductive toxicity",
        "deduction": -15,
        "baby_safe": False,
        "max_concentration": "0.14% combined with propylparaben"
    },
    "isopropylparaben": {
        "annex": "Annex II — Prohibited",
        "opinion": "Prohibited — insufficient safety data, endocrine concern",
        "deduction": -40,
        "baby_safe": False
    },
    "isobutylparaben": {
        "annex": "Annex II — Prohibited",
        "opinion": "Prohibited — insufficient safety data, endocrine concern",
        "deduction": -40,
        "baby_safe": False
    },

    # ── UV FILTERS ────────────────────────────────────────────
    "benzophenone-3": {
        "annex": "Annex VI — UV Filter (Restricted)",
        "opinion": "Max 6% — endocrine disruption, requires warning label",
        "deduction": -12,
        "baby_safe": False,
        "max_concentration": "6%",
        "warning_required": True
    },
    "homosalate": {
        "annex": "Annex VI — UV Filter (Restricted)",
        "opinion": "Max 7.34% — endocrine disruption confirmed",
        "deduction": -12,
        "baby_safe": False,
        "max_concentration": "7.34%"
    },
    "octocrylene": {
        "annex": "Annex VI — UV Filter (Restricted)",
        "opinion": "Max 10% — benzophenone contamination risk",
        "deduction": -8,
        "baby_safe": False,
        "max_concentration": "10%"
    },
    "4-methylbenzylidene camphor": {
        "annex": "Annex VI — UV Filter (Restricted)",
        "opinion": "Max 4% — endocrine disruptor",
        "deduction": -15,
        "baby_safe": False
    },

    # ── PRESERVATIVES ─────────────────────────────────────────
    "phenoxyethanol": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "Max 1% — not recommended for products on nappy area (babies)",
        "deduction": -5,
        "baby_safe": False,
        "baby_deduction": -20,
        "max_concentration": "1%"
    },
    "chlorphenesin": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "Max 0.3% — not for use around mouth area",
        "deduction": -8,
        "baby_safe": False,
        "max_concentration": "0.3%"
    },
    "mit": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "Methylisothiazolinone — max 0.0015% rinse-off only. Banned leave-on.",
        "deduction": -20,
        "baby_safe": False
    },
    "methylisothiazolinone": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "Max 0.0015% rinse-off only. Banned in leave-on products.",
        "deduction": -20,
        "baby_safe": False
    },
    "kathon cg": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "MIT/MCI blend — rinse-off only, max 0.0015%",
        "deduction": -20,
        "baby_safe": False
    },
    "formaldehyde": {
        "annex": "Annex V — Preservative (Restricted)",
        "opinion": "Max 0.2% (0.1% oral products) — carcinogen warning required",
        "deduction": -25,
        "baby_safe": False,
        "warning_required": True
    },

    # ── RETINOIDS ─────────────────────────────────────────────
    "retinol": {
        "annex": "Annex III — Restricted",
        "opinion": "Max 0.3% face, 0.05% body — photosensitising, not for pregnancy",
        "deduction": -10,
        "baby_safe": False,
        "max_concentration": "0.3% face / 0.05% body"
    },
    "retinyl palmitate": {
        "annex": "Annex III — Restricted",
        "opinion": "Max 0.3% — photosensitising at high concentrations",
        "deduction": -5,
        "baby_safe": False,
        "max_concentration": "0.3%"
    },

    # ── FRAGRANCE ALLERGENS ───────────────────────────────────
    "lilial": {
        "annex": "Annex II — Prohibited",
        "opinion": "Banned 2022 — reproductive toxicity (STOT RE)",
        "deduction": -40,
        "baby_safe": False
    },
    "eugenol": {
        "annex": "Annex III — Restricted",
        "opinion": "Must be declared on label above 0.001% leave-on",
        "deduction": -5,
        "baby_safe": False
    },
    "linalool": {
        "annex": "Annex III — Restricted",
        "opinion": "Must be declared on label — common allergen",
        "deduction": -5,
        "baby_safe": False
    },
    "limonene": {
        "annex": "Annex III — Restricted",
        "opinion": "Must be declared on label — oxidises to allergen",
        "deduction": -5,
        "baby_safe": False
    },
    "cinnamal": {
        "annex": "Annex III — Restricted",
        "opinion": "Must be declared — strong skin sensitiser",
        "deduction": -8,
        "baby_safe": False
    },

    # ── HAIR DYES ─────────────────────────────────────────────
    "ppd": {
        "annex": "Annex III — Restricted",
        "opinion": "p-Phenylenediamine — max 2%, warning required, not for eyebrows",
        "deduction": -15,
        "baby_safe": False
    },
    "p-phenylenediamine": {
        "annex": "Annex III — Restricted",
        "opinion": "Max 2% — strong sensitiser, carcinogenicity concerns",
        "deduction": -15,
        "baby_safe": False
    },

    # ── WELL SUPPORTED (positive signals) ─────────────────────
    "glycerin": {
        "annex": "No restrictions",
        "opinion": "Safe as used — no concentration limits",
        "deduction": 0,
        "baby_safe": True
    },
    "tocopherol": {
        "annex": "No restrictions",
        "opinion": "Safe as used — antioxidant",
        "deduction": 0,
        "baby_safe": True
    },
    "sodium hyaluronate": {
        "annex": "No restrictions",
        "opinion": "Safe as used — well tolerated",
        "deduction": 0,
        "baby_safe": True
    },
    "niacinamide": {
        "annex": "No restrictions",
        "opinion": "Safe as used — max 5% recommended for sensitive skin",
        "deduction": 0,
        "baby_safe": True
    },
    "zinc oxide": {
        "annex": "Annex VI — UV Filter (Permitted)",
        "opinion": "Safe as used up to 25% — not nanomaterial risk",
        "deduction": 0,
        "baby_safe": True
    },
}


def check_ingredient_sccs(ingredient_name: str,
                           category: str = "cosmetics",
                           is_baby: bool = False) -> dict:
    """
    Check a single ingredient against EU SCCS opinions and
    EU Cosmetics Regulation annexes.

    Args:
        ingredient_name: ingredient to check
        category: product category
        is_baby: True if product is for children under 3

    Returns:
        dict with SCCS opinion and score impact
    """
    name_lower = ingredient_name.lower().strip()

    result = {
        "ingredient":    ingredient_name,
        "source":        "EU SCCS",
        "annex":         None,
        "opinion":       None,
        "deduction":     0,
        "baby_safe":     None,
        "warning":       False,
        "max_conc":      None,
        "confidence":    "LOW",
        "timestamp":     datetime.now().isoformat()
    }

    # Check exact and partial matches
    for key, data in SCCS_OPINIONS.items():
        if key in name_lower or name_lower in key or name_lower == key:
            deduction = data["deduction"]

            # Apply stricter baby penalty if applicable
            if is_baby and not data.get("baby_safe", True):
                baby_deduction = data.get("baby_deduction", deduction * 1.5)
                deduction = min(deduction, int(baby_deduction))

            result.update({
                "annex":      data["annex"],
                "opinion":    data["opinion"],
                "deduction":  deduction,
                "baby_safe":  data.get("baby_safe"),
                "warning":    data.get("warning_required", False),
                "max_conc":   data.get("max_concentration"),
                "confidence": "HIGH"
            })
            return result

    # Not found in hardcoded list — treat as unreviewed
    result.update({
        "annex":      "Not reviewed by SCCS",
        "opinion":    "No SCCS opinion available — insufficient data",
        "deduction":  0,
        "confidence": "LOW"
    })
    return result


def check_formula_sccs(ingredients: list,
                        category: str = "cosmetics",
                        is_baby: bool = False) -> dict:
    """
    Check an entire formula against EU SCCS opinions.
    Returns aggregated results with total score impact.

    Args:
        ingredients: list of ingredient name strings
        category: product category
        is_baby: True if product is for children under 3

    Returns:
        dict with all flags, deductions, and summary
    """
    results = []
    total_deduction = 0
    flagged = []
    prohibited = []
    restricted = []
    warnings = []

    for ingredient in ingredients:
        r = check_ingredient_sccs(ingredient, category, is_baby)
        results.append(r)

        if r["deduction"] < 0:
            flagged.append(r)
            total_deduction += r["deduction"]

            if "Prohibited" in (r["annex"] or ""):
                prohibited.append(ingredient)
            elif "Restricted" in (r["annex"] or ""):
                restricted.append(ingredient)

            if r["warning"]:
                warnings.append(f"{ingredient}: {r['opinion']}")

    # Cap total deduction
    total_deduction = max(total_deduction, -50)

    # Build summary
    parts = []
    if prohibited:
        parts.append(f"{len(prohibited)} PROHIBITED ingredient(s): {', '.join(prohibited)}")
    if restricted:
        parts.append(f"{len(restricted)} restricted ingredient(s): {', '.join(restricted)}")
    if not flagged:
        parts.append("No EU Cosmetics Regulation violations detected")

    return {
        "source":              "EU SCCS / EU Cosmetics Regulation 1223/2009",
        "category":            category,
        "is_baby":             is_baby,
        "ingredients_checked": len(ingredients),
        "flagged_count":       len(flagged),
        "prohibited":          prohibited,
        "restricted":          restricted,
        "warnings":            warnings,
        "total_deduction":     total_deduction,
        "summary":             " | ".join(parts),
        "all_results":         results
    }


# ── Self-test ──────────────────────────────────────────────────
if __name__ == "__main__":
    print("NOURA — EU SCCS Health Engine Self-Test\n")

    test_formula = [
        "aqua",
        "glycerin",
        "methylparaben",
        "butylparaben",
        "phenoxyethanol",
        "benzophenone-3",
        "retinol",
        "sodium hyaluronate",
        "lilial",
        "niacinamide",
        "tocopherol",
        "limonene"
    ]

    print("Adult cosmetics formula:")
    print("-" * 55)
    adult = check_formula_sccs(test_formula, "cosmetics", is_baby=False)
    print(f"  Ingredients checked:  {adult['ingredients_checked']}")
    print(f"  Flagged:              {adult['flagged_count']}")
    print(f"  Prohibited:           {adult['prohibited']}")
    print(f"  Restricted:           {adult['restricted']}")
    print(f"  Total deduction:      {adult['total_deduction']}")
    print(f"  Summary:              {adult['summary']}")

    print("\nSame formula — baby product:")
    print("-" * 55)
    baby = check_formula_sccs(test_formula, "baby", is_baby=True)
    print(f"  Total deduction:      {baby['total_deduction']}")
    print(f"  Prohibited:           {baby['prohibited']}")
    print(f"  Restricted:           {baby['restricted']}")
    print(f"\n  Note: Stricter penalties applied for baby category")

    print("\n✅ EU SCCS module ready.")


Writing /content/noura_health_sccs.py


In [56]:
%%writefile /content/noura_health_cir.py
"""
NOURA Health Engine — CIR Module
Cell 28

The Cosmetic Ingredient Review (CIR) is an independent panel of
scientific experts that reviews the safety of cosmetic ingredients.
It is the primary US safety standard for cosmetic ingredients,
complementing the EU SCCS system.

CIR provides:
- Safety assessments with concentration limits
- "Safe as Used" vs "Unsafe" vs "Insufficient Data" conclusions
- Specific use restrictions (rinse-off vs leave-on, etc.)
- Re-evaluation dates when new evidence emerges

Covers: cosmetics, baby products
Weight in Health score: 10%
"""

from datetime import datetime

# ── CIR conclusion types and their deductions ──────────────────
CIR_CONCLUSIONS = {
    "safe_as_used":         0,     # Fully safe — no deduction
    "safe_with_limits":    -8,     # Safe only at specific concentrations
    "insufficient_data":  -10,     # Not enough data to conclude safety
    "unsafe":             -30,     # CIR concluded unsafe
    "not_supported":      -25,     # Safety not supported by available data
}

# ── CIR ingredient database ────────────────────────────────────
# Source: CIR Compendium — https://cir-safety.org/ingredients
# Includes all ingredients with non-trivial findings.
# "Safe as used" ingredients with no restrictions are omitted
# (they contribute 0 deduction and add no signal).

CIR_DATABASE = {

    # ── PRESERVATIVES ─────────────────────────────────────────
    "methylparaben": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "0.4% in leave-on, 0.8% total parabens",
        "notes":          "Safe as used at current concentrations in cosmetics",
        "concern":        "Endocrine activity at high concentrations",
        "deduction":      -5
    },
    "propylparaben": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "0.4% leave-on",
        "notes":          "Safe at current use concentrations",
        "concern":        "Endocrine disruption concerns at elevated levels",
        "deduction":      -8
    },
    "butylparaben": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "0.4% leave-on",
        "notes":          "Safe at current use concentrations",
        "concern":        "Endocrine disruption, not recommended for children",
        "deduction":      -10
    },
    "phenoxyethanol": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "1.0%",
        "notes":          "Safe as preservative at 1%",
        "concern":        "Central nervous system effects at high doses",
        "deduction":      -5
    },
    "methylisothiazolinone": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "0.0015% rinse-off only",
        "notes":          "Not safe in leave-on products",
        "concern":        "Strong skin sensitiser — high allergy rates reported",
        "deduction":      -20
    },
    "imidazolidinyl urea": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "0.5%",
        "notes":          "Formaldehyde releaser — safe at low concentrations",
        "concern":        "Releases formaldehyde — carcinogen risk at higher levels",
        "deduction":      -12
    },
    "dmdm hydantoin": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "0.6%",
        "notes":          "Formaldehyde releaser",
        "concern":        "Formaldehyde release — carcinogen, sensitiser",
        "deduction":      -15
    },
    "quaternium-15": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "0.2%",
        "notes":          "Highest-releasing formaldehyde preservative",
        "concern":        "Formaldehyde releaser — strong sensitiser",
        "deduction":      -18
    },

    # ── UV FILTERS ────────────────────────────────────────────
    "benzophenone-3": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "6%",
        "notes":          "Safe at 6% with warning label",
        "concern":        "Endocrine disruption, systemic absorption",
        "deduction":      -10
    },
    "homosalate": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "15%",
        "notes":          "CIR reviewing endocrine data — limit lowered pending review",
        "concern":        "Endocrine disruption — under re-evaluation",
        "deduction":      -10
    },
    "octinoxate": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "7.5%",
        "notes":          "FDA proposed insufficient safety data for OTC",
        "concern":        "Endocrine disruption, coral reef toxicity",
        "deduction":      -8
    },
    "octocrylene": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "10%",
        "notes":          "Degrades to benzophenone — contamination concern",
        "concern":        "Benzophenone contamination risk on shelf",
        "deduction":      -8
    },

    # ── RETINOIDS ─────────────────────────────────────────────
    "retinol": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "0.3% face / 0.05% body",
        "notes":          "Not for use around eyes or on sun-exposed skin",
        "concern":        "Photosensitising, teratogenic risk in pregnancy",
        "deduction":      -8
    },
    "retinyl palmitate": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "1.0%",
        "notes":          "Photosensitisation concern",
        "concern":        "Photosensitising at higher concentrations",
        "deduction":      -5
    },

    # ── SURFACTANTS ───────────────────────────────────────────
    "sodium lauryl sulfate": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "rinse-off only at normal use levels",
        "notes":          "Not for prolonged skin contact",
        "concern":        "Skin barrier disruption, irritant at high concentrations",
        "deduction":      -8
    },
    "sodium laureth sulfate": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "rinse-off products",
        "notes":          "1,4-dioxane contamination risk from ethoxylation",
        "concern":        "1,4-dioxane contaminant — probable carcinogen",
        "deduction":      -5
    },
    "cocamide dea": {
        "conclusion":     "not_supported",
        "max_conc":       "N/A",
        "notes":          "Listed as possible carcinogen — IARC Group 2B",
        "concern":        "Possible carcinogen, nitrosamine formation",
        "deduction":      -20
    },
    "diethanolamine": {
        "conclusion":     "unsafe",
        "max_conc":       "N/A — avoid",
        "notes":          "CIR: not safe in products that can form nitrosamines",
        "concern":        "Nitrosamine precursor — carcinogen",
        "deduction":      -25
    },

    # ── SKIN LIGHTENERS ───────────────────────────────────────
    "hydroquinone": {
        "conclusion":     "unsafe",
        "max_conc":       "Not safe in cosmetics",
        "notes":          "CIR concluded not safe for cosmetic use",
        "concern":        "Cytotoxic, genotoxic, carcinogenic potential",
        "deduction":      -30
    },
    "kojic acid": {
        "conclusion":     "insufficient_data",
        "max_conc":       "1% recommended maximum",
        "notes":          "Insufficient data — genotoxicity concerns",
        "concern":        "Genotoxicity potential — needs more data",
        "deduction":      -12
    },

    # ── FRAGRANCE COMPONENTS ──────────────────────────────────
    "fragrance": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "IFRA guidelines apply",
        "notes":          "Generic 'fragrance' masks individual ingredients",
        "concern":        "Undisclosed allergens, potential sensitisers",
        "deduction":      -8
    },
    "parfum": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "IFRA guidelines apply",
        "notes":          "EU term for fragrance — same concerns apply",
        "concern":        "Undisclosed allergens, potential sensitisers",
        "deduction":      -8
    },
    "eugenol": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "0.5% leave-on",
        "notes":          "Must be labelled above threshold",
        "concern":        "Skin sensitiser — common allergen",
        "deduction":      -5
    },

    # ── ALCOHOLS ─────────────────────────────────────────────
    "denatured alcohol": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "Normal cosmetic use",
        "notes":          "Drying at high concentrations",
        "concern":        "Skin barrier disruption with prolonged use",
        "deduction":      -3
    },
    "alcohol denat": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "Normal cosmetic use",
        "notes":          "Drying — avoid in high concentrations",
        "concern":        "Skin barrier disruption",
        "deduction":      -3
    },

    # ── TALC ─────────────────────────────────────────────────
    "talc": {
        "conclusion":     "safe_with_limits",
        "max_conc":       "Must be asbestos-free",
        "notes":          "Safe if asbestos-free — inhalation risk for baby powder",
        "concern":        "Asbestos contamination risk, inhalation hazard (baby)",
        "deduction":      -5,
        "baby_deduction": -20
    },

    # ── CLEAN / SAFE INGREDIENTS (explicitly confirmed) ───────
    "glycerin": {
        "conclusion":     "safe_as_used",
        "max_conc":       "No restriction",
        "notes":          "Safe and well-tolerated humectant",
        "concern":        None,
        "deduction":      0
    },
    "niacinamide": {
        "conclusion":     "safe_as_used",
        "max_conc":       "No restriction",
        "notes":          "Safe — well studied, good tolerability",
        "concern":        None,
        "deduction":      0
    },
    "hyaluronic acid": {
        "conclusion":     "safe_as_used",
        "max_conc":       "No restriction",
        "notes":          "Safe humectant — no concerns",
        "concern":        None,
        "deduction":      0
    },
    "sodium hyaluronate": {
        "conclusion":     "safe_as_used",
        "max_conc":       "No restriction",
        "notes":          "Safe humectant",
        "concern":        None,
        "deduction":      0
    },
    "tocopherol": {
        "conclusion":     "safe_as_used",
        "max_conc":       "No restriction",
        "notes":          "Vitamin E — safe antioxidant",
        "concern":        None,
        "deduction":      0
    },
    "panthenol": {
        "conclusion":     "safe_as_used",
        "max_conc":       "No restriction",
        "notes":          "Provitamin B5 — safe and effective",
        "concern":        None,
        "deduction":      0
    },
    "zinc oxide": {
        "conclusion":     "safe_as_used",
        "max_conc":       "25% UV protection use",
        "notes":          "Safe — non-nano preferred",
        "concern":        None,
        "deduction":      0
    },
}


def check_ingredient_cir(ingredient_name: str,
                          is_baby: bool = False) -> dict:
    """
    Check a single ingredient against the CIR database.

    Args:
        ingredient_name: ingredient to check
        is_baby: True if product is for children

    Returns:
        dict with CIR safety data and score impact
    """
    name_lower = ingredient_name.lower().strip()

    result = {
        "ingredient":  ingredient_name,
        "source":      "CIR",
        "conclusion":  None,
        "max_conc":    None,
        "concern":     None,
        "deduction":   0,
        "baby_safe":   None,
        "confidence":  "LOW",
        "timestamp":   datetime.now().isoformat()
    }

    # Check database
    for key, data in CIR_DATABASE.items():
        if key in name_lower or name_lower == key:
            deduction = data["deduction"]

            # Apply baby penalty where applicable
            if is_baby and "baby_deduction" in data:
                deduction = data["baby_deduction"]

            result.update({
                "conclusion": data["conclusion"],
                "max_conc":   data.get("max_conc"),
                "concern":    data.get("concern"),
                "notes":      data.get("notes"),
                "deduction":  deduction,
                "confidence": "HIGH"
            })
            return result

    # Not in database
    result.update({
        "conclusion": "not_reviewed",
        "concern":    "No CIR assessment available",
        "confidence": "LOW"
    })
    return result


def check_formula_cir(ingredients: list,
                       is_baby: bool = False) -> dict:
    """
    Check an entire formula against CIR assessments.
    Returns aggregated results and total score impact.
    """
    results = []
    total_deduction = 0
    flagged = []
    unsafe = []
    restricted = []

    for ingredient in ingredients:
        r = check_ingredient_cir(ingredient, is_baby)
        results.append(r)

        if r["deduction"] < 0:
            flagged.append(r)
            total_deduction += r["deduction"]

            if r["conclusion"] in ("unsafe", "not_supported"):
                unsafe.append(ingredient)
            elif r["conclusion"] in ("safe_with_limits", "insufficient_data"):
                restricted.append(ingredient)

    # Cap total deduction
    total_deduction = max(total_deduction, -45)

    parts = []
    if unsafe:
        parts.append(f"{len(unsafe)} unsafe ingredient(s): {', '.join(unsafe)}")
    if restricted:
        parts.append(f"{len(restricted)} restricted ingredient(s): {', '.join(restricted)}")
    if not flagged:
        parts.append("All ingredients pass CIR safety review")

    return {
        "source":              "CIR — Cosmetic Ingredient Review",
        "is_baby":             is_baby,
        "ingredients_checked": len(ingredients),
        "flagged_count":       len(flagged),
        "unsafe":              unsafe,
        "restricted":          restricted,
        "total_deduction":     total_deduction,
        "summary":             " | ".join(parts),
        "all_results":         results
    }


# ── Self-test ──────────────────────────────────────────────────
if __name__ == "__main__":
    print("NOURA — CIR Health Engine Self-Test\n")

    test_formula = [
        "aqua",
        "glycerin",
        "sodium lauryl sulfate",
        "methylparaben",
        "phenoxyethanol",
        "retinol",
        "fragrance",
        "niacinamide",
        "cocamide dea",
        "tocopherol",
        "talc"
    ]

    print("Adult cosmetics formula:")
    print("-" * 55)
    adult = check_formula_cir(test_formula, is_baby=False)
    print(f"  Ingredients checked: {adult['ingredients_checked']}")
    print(f"  Flagged:             {adult['flagged_count']}")
    print(f"  Unsafe:              {adult['unsafe']}")
    print(f"  Restricted:          {adult['restricted']}")
    print(f"  Total deduction:     {adult['total_deduction']}")
    print(f"  Summary:             {adult['summary']}")

    print("\nSame formula — baby product:")
    print("-" * 55)
    baby = check_formula_cir(test_formula, is_baby=True)
    print(f"  Total deduction:     {baby['total_deduction']}")
    print(f"  (Talc gets -20 penalty instead of -5 for baby)")

    print("\n✅ CIR module ready.")

Writing /content/noura_health_cir.py


In [57]:
%%writefile /content/noura_health_aggregator.py
"""
NOURA Health Engine — Master Aggregator
Cell 29

Combines all health data sources into a single, deterministic health score:

    Source              Weight    Status
    ─────────────────────────────────────
    PubMed              40%       ✅ Built (Weeks 1-10)
    ECHA (SVHC/CMR)     25%       ✅ Cell 25
    EU SCCS             15%       ✅ Cell 27
    CIR                 10%       ✅ Cell 28
    FDA OpenFDA         10%       ✅ Cell 26

Same input → Same output. Always.
Health score feeds into final NOURA score at 70% weight.
"""

import sys
import time
from datetime import datetime

# ── Import all health modules ──────────────────────────────────
sys.path.insert(0, '/content')

try:
    from noura_health_echa import check_formula_echa
    ECHA_AVAILABLE = True
except ImportError:
    ECHA_AVAILABLE = False
    print("⚠  ECHA module not found — using fallback")

try:
    from noura_health_fda import check_product_fda
    FDA_AVAILABLE = True
except ImportError:
    FDA_AVAILABLE = False
    print("⚠  FDA module not found — using fallback")

try:
    from noura_health_sccs import check_formula_sccs
    SCCS_AVAILABLE = True
except ImportError:
    SCCS_AVAILABLE = False
    print("⚠  SCCS module not found — using fallback")

try:
    from noura_health_cir import check_formula_cir
    CIR_AVAILABLE = True
except ImportError:
    CIR_AVAILABLE = False
    print("⚠  CIR module not found — using fallback")

try:
    from noura_config import get_weights, get_verdict, BABY_SCORE_PENALTY
    CONFIG_AVAILABLE = True
except ImportError:
    CONFIG_AVAILABLE = False
    print("⚠  Config not found — using defaults")

# ── Source weights within the Health dimension ─────────────────
HEALTH_SOURCE_WEIGHTS = {
    "pubmed": 0.40,   # Peer-reviewed science — highest weight
    "echa":   0.25,   # EU regulatory — SVHC, CMR, endocrine
    "sccs":   0.15,   # EU cosmetics regulation
    "cir":    0.10,   # US cosmetic ingredient review
    "fda":    0.10,   # Recalls and adverse events
}

# Base health score — deductions applied on top
BASE_HEALTH_SCORE = 100


def _pubmed_score(pubmed_result: dict) -> dict:
    """
    Extract health score contribution from PubMed results.
    PubMed results come from noura_api_v2() already built in the notebook.
    """
    if not pubmed_result:
        return {"score": 70, "confidence": "LOW", "flags": [],
                "source": "PubMed", "note": "No PubMed data provided"}

    # noura_api_v2 returns health_score directly
    raw_score = pubmed_result.get("health_score", 70)
    flags = pubmed_result.get("ingredient_flags", [])
    confidence = pubmed_result.get("confidence", "LOW")

    return {
        "source":     "PubMed",
        "score":      max(0, min(100, raw_score)),
        "flags":      flags,
        "confidence": confidence
    }


def _echa_score(ingredients: list) -> dict:
    """Run ECHA check and convert deductions to 0-100 score."""
    if not ECHA_AVAILABLE or not ingredients:
        return {"score": 75, "confidence": "LOW", "flags": [],
                "source": "ECHA", "note": "ECHA unavailable"}

    result = check_formula_echa(ingredients)
    score = BASE_HEALTH_SCORE + result["total_score_impact"]
    score = max(0, min(100, score))

    flags = [
        f"{f['ingredient']} — {f['hazard_class']}: {f['concern']}"
        for f in result["svhc_found"]
    ]

    return {
        "source":     "ECHA",
        "score":      score,
        "flags":      flags,
        "svhc_count": result["svhc_count"],
        "confidence": result["svhc_found"][0]["confidence"] if result["svhc_found"] else "MEDIUM"
    }


def _sccs_score(ingredients: list, is_baby: bool = False) -> dict:
    """Run EU SCCS check and convert to 0-100 score."""
    if not SCCS_AVAILABLE or not ingredients:
        return {"score": 75, "confidence": "LOW", "flags": [],
                "source": "EU SCCS", "note": "SCCS unavailable"}

    result = check_formula_sccs(ingredients, is_baby=is_baby)
    score = BASE_HEALTH_SCORE + result["total_deduction"]
    score = max(0, min(100, score))

    flags = []
    for name in result["prohibited"]:
        flags.append(f"{name} — PROHIBITED under EU Cosmetics Regulation")
    for name in result["restricted"]:
        flags.append(f"{name} — Restricted (concentration limits apply)")

    return {
        "source":     "EU SCCS",
        "score":      score,
        "flags":      flags,
        "prohibited": result["prohibited"],
        "confidence": "HIGH" if result["flagged_count"] > 0 else "MEDIUM"
    }


def _cir_score(ingredients: list, is_baby: bool = False) -> dict:
    """Run CIR check and convert to 0-100 score."""
    if not CIR_AVAILABLE or not ingredients:
        return {"score": 75, "confidence": "LOW", "flags": [],
                "source": "CIR", "note": "CIR unavailable"}

    result = check_formula_cir(ingredients, is_baby=is_baby)
    score = BASE_HEALTH_SCORE + result["total_deduction"]
    score = max(0, min(100, score))

    flags = []
    for name in result["unsafe"]:
        flags.append(f"{name} — CIR: unsafe")
    for name in result["restricted"]:
        flags.append(f"{name} — CIR: restricted use")

    return {
        "source":     "CIR",
        "score":      score,
        "flags":      flags,
        "confidence": "HIGH" if result["flagged_count"] > 0 else "MEDIUM"
    }


def _fda_score(product_name: str, category: str) -> dict:
    """Run FDA check and convert to 0-100 score."""
    if not FDA_AVAILABLE:
        return {"score": 90, "confidence": "LOW", "flags": [],
                "source": "FDA", "note": "FDA unavailable"}

    result = check_product_fda(product_name, category)
    score = BASE_HEALTH_SCORE + result["total_score_impact"]
    score = max(0, min(100, score))

    return {
        "source":     "FDA OpenFDA",
        "score":      score,
        "flags":      result["flags"],
        "recalls":    result["recall_count"],
        "adverse":    result["adverse_count"],
        "confidence": result["confidence"]
    }


def calculate_health_score(
    product_name:   str,
    ingredients:    list,
    category:       str = "cosmetics",
    pubmed_result:  dict = None
) -> dict:
    """
    Master health score calculator.
    Aggregates all 5 health data sources into one deterministic score.

    Args:
        product_name:  product name (for FDA lookup)
        ingredients:   list of ingredient name strings
        category:      'cosmetics', 'food', 'cleaning', 'baby'
        pubmed_result: existing result from noura_api_v2() if available

    Returns:
        {
            "health_score":    int (0-100),
            "verdict":         str,
            "flags":           list of concern strings,
            "source_scores":   dict of per-source scores,
            "confidence":      str,
            "summary":         str,
            "timestamp":       str
        }
    """
    is_baby = (category == "baby")

    print(f"\n  🔬 Running health analysis for: {product_name}")
    print(f"     Category: {category} | Ingredients: {len(ingredients)}")

    # ── Run all sources ────────────────────────────────────────
    print("     Checking PubMed...", end=" ")
    pubmed  = _pubmed_score(pubmed_result)
    print(f"score: {pubmed['score']}")

    print("     Checking ECHA...", end=" ")
    echa    = _echa_score(ingredients)
    print(f"score: {echa['score']}")

    print("     Checking EU SCCS...", end=" ")
    sccs    = _sccs_score(ingredients, is_baby)
    print(f"score: {sccs['score']}")

    print("     Checking CIR...", end=" ")
    cir     = _cir_score(ingredients, is_baby)
    print(f"score: {cir['score']}")

    print("     Checking FDA...", end=" ")
    fda     = _fda_score(product_name, category)
    print(f"score: {fda['score']}")

    # ── Weighted aggregation ───────────────────────────────────
    weighted_score = (
        pubmed["score"] * HEALTH_SOURCE_WEIGHTS["pubmed"] +
        echa["score"]   * HEALTH_SOURCE_WEIGHTS["echa"]   +
        sccs["score"]   * HEALTH_SOURCE_WEIGHTS["sccs"]   +
        cir["score"]    * HEALTH_SOURCE_WEIGHTS["cir"]    +
        fda["score"]    * HEALTH_SOURCE_WEIGHTS["fda"]
    )

    # Apply baby penalty — same score, stricter verdict threshold
    final_score = round(weighted_score)
    if is_baby:
        final_score = max(0, final_score + BABY_SCORE_PENALTY
                          if CONFIG_AVAILABLE else final_score - 10)

    # ── Collect all flags ──────────────────────────────────────
    all_flags = []
    for source in [echa, sccs, cir, fda, pubmed]:
        all_flags.extend(source.get("flags", []))

    # Deduplicate flags
    seen = set()
    unique_flags = []
    for flag in all_flags:
        key = flag[:50].lower()
        if key not in seen:
            seen.add(key)
            unique_flags.append(flag)

    # ── Determine verdict ──────────────────────────────────────
    if CONFIG_AVAILABLE:
        verdict = get_verdict(final_score, category)
    else:
        if final_score >= 80:   verdict = "CLEAN"
        elif final_score >= 60: verdict = "ACCEPTABLE"
        elif final_score >= 40: verdict = "CAUTION"
        else:                   verdict = "HIGHER_RISK"

    # ── Confidence — based on how many sources returned data ──
    sources_with_data = sum([
        pubmed_result is not None,
        ECHA_AVAILABLE,
        SCCS_AVAILABLE,
        CIR_AVAILABLE,
        FDA_AVAILABLE
    ])
    if sources_with_data >= 4:   confidence = "HIGH"
    elif sources_with_data >= 2: confidence = "MEDIUM"
    else:                         confidence = "LOW"

    # ── Summary ────────────────────────────────────────────────
    if not unique_flags:
        summary = f"No health concerns detected across {sources_with_data} sources."
    else:
        summary = (f"{len(unique_flags)} concern(s) detected. "
                   f"Highest risk: {unique_flags[0][:80]}")

    result = {
        "product":       product_name,
        "category":      category,
        "health_score":  final_score,
        "verdict":       verdict,
        "flags":         unique_flags,
        "flag_count":    len(unique_flags),
        "source_scores": {
            "pubmed": pubmed["score"],
            "echa":   echa["score"],
            "sccs":   sccs["score"],
            "cir":    cir["score"],
            "fda":    fda["score"],
        },
        "source_weights": HEALTH_SOURCE_WEIGHTS,
        "weighted_score": round(weighted_score),
        "final_score":    final_score,
        "confidence":     confidence,
        "summary":        summary,
        "is_baby":        is_baby,
        "timestamp":      datetime.now().isoformat()
    }

    return result


def print_health_report(result: dict):
    """Pretty-print a health score result to console."""
    verdict_icons = {
        "CLEAN":        "🟢",
        "ACCEPTABLE":   "🟡",
        "CAUTION":      "🟠",
        "HIGHER_RISK":  "🔴"
    }
    icon = verdict_icons.get(result["verdict"], "⚪")

    print(f"\n{'═'*55}")
    print(f"  NOURA HEALTH SCORE — {result['product'].upper()}")
    print(f"{'═'*55}")
    print(f"  {icon} Score:     {result['health_score']}/100")
    print(f"  Verdict:   {result['verdict']}")
    print(f"  Category:  {result['category']}")
    print(f"  Confidence:{result['confidence']}")
    print(f"\n  Source breakdown:")
    for source, score in result["source_scores"].items():
        weight = HEALTH_SOURCE_WEIGHTS[source]
        print(f"    {source.upper():<10} {score:>3}/100  (weight: {weight*100:.0f}%)")
    print(f"\n  Weighted health score: {result['weighted_score']}/100")
    if result["is_baby"]:
        print(f"  Baby penalty applied: -10")
    print(f"  Final health score:    {result['final_score']}/100")
    if result["flags"]:
        print(f"\n  ⚠  Concerns ({result['flag_count']}):")
        for flag in result["flags"][:5]:
            print(f"     • {flag[:70]}")
    else:
        print(f"\n  ✅ No health concerns detected")
    print(f"{'═'*55}\n")


# ── Self-test ──────────────────────────────────────────────────
if __name__ == "__main__":
    print("NOURA — Health Engine Aggregator Self-Test")
    print("Testing with two contrasting formulas\n")

    # Test 1: Clean product
    clean_ingredients = [
        "aqua", "glycerin", "sodium hyaluronate",
        "niacinamide", "tocopherol", "panthenol",
        "xanthan gum", "citric acid"
    ]

    clean_result = calculate_health_score(
        product_name  = "Weleda Skin Food",
        ingredients   = clean_ingredients,
        category      = "cosmetics",
        pubmed_result = {"health_score": 88, "confidence": "HIGH",
                         "ingredient_flags": []}
    )
    print_health_report(clean_result)

    # Test 2: Problematic formula
    problem_ingredients = [
        "aqua", "glycerin", "butylparaben",
        "methylisothiazolinone", "fragrance",
        "benzophenone-3", "cocamide dea",
        "dmdm hydantoin", "retinol"
    ]

    problem_result = calculate_health_score(
        product_name  = "Reformulation Candidate",
        ingredients   = problem_ingredients,
        category      = "cosmetics",
        pubmed_result = {"health_score": 42, "confidence": "HIGH",
                         "ingredient_flags": ["butylparaben", "MIT"]}
    )
    print_health_report(problem_result)

    print("✅ Health Engine Aggregator ready.")
    print("   Week 11 complete — all health sources integrated.")

Writing /content/noura_health_aggregator.py


In [59]:
# NOURA — Week 11 Integration Test
# Tests all 5 health modules working together

import sys
sys.path.insert(0, '/content')

# ── Step 1: Verify all modules loaded ─────────────────────────
print("Step 1: Checking all modules...")
modules = {
    "noura_config":             "/content/noura_config.py",
    "noura_health_echa":        "/content/noura_health_echa.py",
    "noura_health_fda":         "/content/noura_health_fda.py",
    "noura_health_sccs":        "/content/noura_health_sccs.py",
    "noura_health_cir":         "/content/noura_health_cir.py",
    "noura_health_aggregator":  "/content/noura_health_aggregator.py",
}

import os
all_present = True
for name, path in modules.items():
    exists = os.path.exists(path)
    status = "✅" if exists else "❌ MISSING"
    print(f"  {status} {name}")
    if not exists:
        all_present = False

if not all_present:
    print("\n⛔ Some modules missing — re-run the %%writefile cells above")
else:
    print("\nAll modules present. Running full test...\n")

    # ── Step 2: Import aggregator ──────────────────────────────
    from noura_health_aggregator import calculate_health_score, print_health_report

    # ── Step 3: Test clean product ─────────────────────────────
    weleda = calculate_health_score(
        product_name  = "Weleda Skin Food",
        ingredients   = ["aqua", "glycerin", "sodium hyaluronate",
                         "niacinamide", "tocopherol", "panthenol"],
        category      = "cosmetics",
        pubmed_result = {"health_score": 88, "confidence": "HIGH",
                         "ingredient_flags": []}
    )
    print_health_report(weleda)

    # ── Step 4: Test problematic product ──────────────────────
    problem = calculate_health_score(
        product_name  = "Reformulation Candidate",
        ingredients   = ["aqua", "glycerin", "butylparaben",
                         "methylisothiazolinone", "fragrance",
                         "benzophenone-3", "dmdm hydantoin"],
        category      = "cosmetics",
        pubmed_result = {"health_score": 42, "confidence": "HIGH",
                         "ingredient_flags": ["butylparaben", "MIT"]}
    )
    print_health_report(problem)

    # ── Step 5: Test baby category ─────────────────────────────
    baby = calculate_health_score(
        product_name  = "Johnson's Baby Lotion",
        ingredients   = ["aqua", "glycerin", "phenoxyethanol",
                         "talc", "fragrance"],
        category      = "baby",
        pubmed_result = {"health_score": 60, "confidence": "MEDIUM",
                         "ingredient_flags": ["phenoxyethanol"]}
    )
    print_health_report(baby)

    # ── Step 6: Sanity checks ──────────────────────────────────
    print("Sanity checks:")
    print("-" * 40)

    assert weleda["health_score"] > problem["health_score"], \
        "❌ FAIL: Clean product should score higher than problematic"
    print("  ✅ Clean product scores higher than problematic")

    assert weleda["verdict"] == "CLEAN", \
        f"❌ FAIL: Weleda should be CLEAN, got {weleda['verdict']}"
    print("  ✅ Weleda verdict is CLEAN")

    assert problem["verdict"] in ("CAUTION", "HIGHER_RISK"), \
        f"❌ FAIL: Problem product should be CAUTION or HIGHER_RISK"
    print("  ✅ Problematic product flagged correctly")

    assert baby["health_score"] <= weleda["health_score"], \
        "❌ FAIL: Baby product with phenoxyethanol should score lower than clean adult product"
    print("  ✅ Baby penalty applied correctly")

    assert len(problem["flags"]) > len(weleda["flags"]), \
        "❌ FAIL: Problem product should have more flags"
    print("  ✅ Flag count reflects product quality")

    print("\n✅ ALL TESTS PASSED — Week 11 health engine fully operational")

Step 1: Checking all modules...
  ✅ noura_config
  ✅ noura_health_echa
  ✅ noura_health_fda
  ✅ noura_health_sccs
  ✅ noura_health_cir
  ✅ noura_health_aggregator

All modules present. Running full test...


  🔬 Running health analysis for: Weleda Skin Food
     Category: cosmetics | Ingredients: 6
     Checking PubMed... score: 88
     Checking ECHA... score: 100
     Checking EU SCCS... score: 100
     Checking CIR... score: 100
     Checking FDA... score: 100

═══════════════════════════════════════════════════════
  NOURA HEALTH SCORE — WELEDA SKIN FOOD
═══════════════════════════════════════════════════════
  🟢 Score:     95/100
  Verdict:   CLEAN
  Category:  cosmetics
  Confidence:HIGH

  Source breakdown:
    PUBMED      88/100  (weight: 40%)
    ECHA       100/100  (weight: 25%)
    SCCS       100/100  (weight: 15%)
    CIR        100/100  (weight: 10%)
    FDA        100/100  (weight: 10%)

  Weighted health score: 95/100
  Final health score:    95/100

  ✅ No health concern

In [60]:
%%writefile /content/noura_environment_config.py
"""
NOURA Environment Engine — Configuration
Cell 30

Defines category-specific environment signal weights and data sources.
All environment scoring modules import from here.

Environment dimension = 15% of final NOURA score (all categories).
Within that 15%, signals are weighted differently per category.
"""

# ═══════════════════════════════════════════════════════════════
# SECTION 1: ENVIRONMENT SIGNALS PER CATEGORY
# Weights sum to 1.0 within each category.
# ═══════════════════════════════════════════════════════════════

ENVIRONMENT_SIGNALS = {

    "cosmetics": {
        "biodegradability": {
            "weight":      0.40,
            "source":      "ECHA REACH — aquatic hazard data",
            "what":        "Do ingredients break down safely in wastewater?",
            "status":      "🔲 Week 12",
            "confidence":  "HIGH when ECHA data available"
        },
        "packaging": {
            "weight":      0.30,
            "source":      "Packaging data — manual/brand-reported",
            "what":        "Recyclable, refillable, minimal, microplastic-free",
            "status":      "🔲 LOW confidence — no API source yet",
            "confidence":  "LOW — data pending"
        },
        "manufacturing": {
            "weight":      0.20,
            "source":      "B Corp + brand certifications",
            "what":        "Manufacturing emissions, ethical sourcing",
            "status":      "🔲 Week 13 (Governance overlap)",
            "confidence":  "LOW — data pending"
        },
        "palm_oil": {
            "weight":      0.10,
            "source":      "RSPO — Roundtable on Sustainable Palm Oil",
            "what":        "RSPO-certified palm oil derivatives",
            "status":      "🔲 Week 13",
            "confidence":  "LOW — data pending"
        }
    },

    "food": {
        "carbon_footprint": {
            "weight":      0.40,
            "source":      "Open Food Facts — Eco-Score (ADEME LCA data)",
            "what":        "GHG emissions across full supply chain",
            "status":      "🔲 Week 12",
            "confidence":  "HIGH when Open Food Facts data available"
        },
        "packaging": {
            "weight":      0.30,
            "source":      "Open Food Facts — packaging materials",
            "what":        "Recyclable, compostable, plastic-free",
            "status":      "🔲 Week 12",
            "confidence":  "MEDIUM — Open Food Facts has partial data"
        },
        "pesticides": {
            "weight":      0.20,
            "source":      "EFSA pesticide residue database",
            "what":        "Persistent pesticides, aquatic toxicity of residues",
            "status":      "🔲 Week 12",
            "confidence":  "MEDIUM"
        },
        "water_usage": {
            "weight":      0.10,
            "source":      "Open Food Facts + HowGood LCA",
            "what":        "Water consumption in production",
            "status":      "🔲 Future — low priority",
            "confidence":  "LOW — data pending"
        }
    },

    "cleaning": {
        "aquatic_toxicity": {
            "weight":      0.50,
            "source":      "ECHA REACH — aquatic hazard classifications",
            "what":        "Harm to aquatic ecosystems after going down drain",
            "status":      "🔲 Week 12",
            "confidence":  "HIGH when ECHA data available"
        },
        "vocs": {
            "weight":      0.30,
            "source":      "ECHA + EU Ecolabel VOC criteria",
            "what":        "Volatile organic compounds — indoor air quality",
            "status":      "🔲 Week 12",
            "confidence":  "MEDIUM"
        },
        "packaging": {
            "weight":      0.20,
            "source":      "EU Ecolabel + brand-reported",
            "what":        "Refillable, concentrated formula, recyclable",
            "status":      "🔲 LOW confidence — no API source yet",
            "confidence":  "LOW — data pending"
        }
    },

    "baby": {
        "biodegradability": {
            "weight":      0.40,
            "source":      "ECHA REACH — persistence and bioaccumulation",
            "what":        "Do ingredients persist in environment and food chain?",
            "status":      "🔲 Week 12",
            "confidence":  "HIGH when ECHA data available"
        },
        "packaging": {
            "weight":      0.30,
            "source":      "Brand-reported + EU Ecolabel",
            "what":        "BPA-free, recyclable, minimal plastic",
            "status":      "🔲 LOW confidence — data pending",
            "confidence":  "LOW"
        },
        "supply_chain": {
            "weight":      0.20,
            "source":      "B Corp + Fair Trade certifications",
            "what":        "Ethical sourcing, no microplastics",
            "status":      "🔲 Week 13",
            "confidence":  "LOW — data pending"
        },
        "cumulative_exposure": {
            "weight":      0.10,
            "source":      "ECHA + EFSA combined persistence data",
            "what":        "Long-term bioaccumulation risk for children",
            "status":      "🔲 Week 12",
            "confidence":  "MEDIUM"
        }
    }
}


# ═══════════════════════════════════════════════════════════════
# SECTION 2: ECHA ENVIRONMENTAL HAZARD CLASSIFICATIONS
# Used for biodegradability and aquatic toxicity scoring.
# Source: ECHA REACH Regulation — CLP Hazard Classification
# ═══════════════════════════════════════════════════════════════

ECHA_AQUATIC_HAZARDS = {
    # Surfactants — cleaning and cosmetics
    "sodium lauryl sulfate": {
        "aquatic_class":    "Chronic 3",
        "biodegradable":    True,
        "concern":          "Aquatic toxicity at high concentrations",
        "deduction":        -8
    },
    "sodium laureth sulfate": {
        "aquatic_class":    "Chronic 3",
        "biodegradable":    True,
        "concern":          "Moderate aquatic toxicity",
        "deduction":        -5
    },
    "cocamidopropyl betaine": {
        "aquatic_class":    "None",
        "biodegradable":    True,
        "concern":          None,
        "deduction":        0
    },
    "benzalkonium chloride": {
        "aquatic_class":    "Acute 1 / Chronic 1",
        "biodegradable":    False,
        "concern":          "Highly toxic to aquatic organisms, persistent",
        "deduction":        -30
    },
    "triclosan": {
        "aquatic_class":    "Acute 1 / Chronic 1",
        "biodegradable":    False,
        "concern":          "Highly toxic, endocrine disruptor, banned EU cosmetics",
        "deduction":        -35
    },
    "triclocarban": {
        "aquatic_class":    "Acute 1 / Chronic 1",
        "biodegradable":    False,
        "concern":          "Highly toxic to aquatic organisms, persistent",
        "deduction":        -30
    },

    # Preservatives
    "methylisothiazolinone": {
        "aquatic_class":    "Acute 1 / Chronic 1",
        "biodegradable":    False,
        "concern":          "Extremely toxic to aquatic life — kills algae at ppb levels",
        "deduction":        -35
    },
    "methylchloroisothiazolinone": {
        "aquatic_class":    "Acute 1 / Chronic 1",
        "biodegradable":    False,
        "concern":          "Extremely toxic to aquatic organisms",
        "deduction":        -35
    },
    "bronopol": {
        "aquatic_class":    "Acute 1 / Chronic 2",
        "biodegradable":    False,
        "concern":          "Toxic to aquatic life, nitrosamine precursor",
        "deduction":        -20
    },

    # UV filters
    "benzophenone-3": {
        "aquatic_class":    "Chronic 2",
        "biodegradable":    False,
        "concern":          "Coral reef toxicity, bioaccumulates in fish",
        "deduction":        -20
    },
    "octocrylene": {
        "aquatic_class":    "Chronic 2",
        "biodegradable":    False,
        "concern":          "Coral reef toxicity, bioaccumulates",
        "deduction":        -20
    },
    "octinoxate": {
        "aquatic_class":    "Chronic 2",
        "biodegradable":    False,
        "concern":          "Coral bleaching — banned Hawaii + Palau",
        "deduction":        -20
    },
    "homosalate": {
        "aquatic_class":    "Chronic 3",
        "biodegradable":    False,
        "concern":          "Bioaccumulates, moderate aquatic concern",
        "deduction":        -12
    },

    # Fragrances and musks
    "galaxolide": {
        "aquatic_class":    "Chronic 4",
        "biodegradable":    False,
        "concern":          "Persistent synthetic musk — bioaccumulates",
        "deduction":        -15
    },
    "tonalide": {
        "aquatic_class":    "Chronic 3",
        "biodegradable":    False,
        "concern":          "Synthetic musk — found in fish tissue worldwide",
        "deduction":        -15
    },

    # Cleaning-specific
    "sodium hypochlorite": {
        "aquatic_class":    "Acute 1",
        "biodegradable":    True,   # breaks down but toxic on contact
        "concern":          "Acutely toxic to aquatic life — degrades quickly",
        "deduction":        -10
    },
    "phosphates": {
        "aquatic_class":    "None — eutrophication risk",
        "biodegradable":    True,
        "concern":          "Causes algal blooms — eutrophication",
        "deduction":        -15
    },
    "nonylphenol ethoxylates": {
        "aquatic_class":    "Chronic 1",
        "biodegradable":    False,
        "concern":          "Highly persistent endocrine disruptor — banned EU",
        "deduction":        -35
    },
    "edta": {
        "aquatic_class":    "Chronic 3",
        "biodegradable":    False,
        "concern":          "Persistent chelating agent — mobilises heavy metals",
        "deduction":        -15
    },

    # Microplastics
    "polyethylene": {
        "aquatic_class":    "Microplastic",
        "biodegradable":    False,
        "concern":          "Microplastic — persistent marine pollutant",
        "deduction":        -25
    },
    "polypropylene": {
        "aquatic_class":    "Microplastic",
        "biodegradable":    False,
        "concern":          "Microplastic — persistent marine pollutant",
        "deduction":        -25
    },
    "nylon-12": {
        "aquatic_class":    "Microplastic",
        "biodegradable":    False,
        "concern":          "Microplastic in cosmetics",
        "deduction":        -25
    },
    "acrylates copolymer": {
        "aquatic_class":    "Microplastic risk",
        "biodegradable":    False,
        "concern":          "Potential microplastic source",
        "deduction":        -15
    },

    # Clean / biodegradable (positive signals)
    "glycerin":             {"aquatic_class": "None", "biodegradable": True,
                             "concern": None, "deduction": 0},
    "sodium hyaluronate":   {"aquatic_class": "None", "biodegradable": True,
                             "concern": None, "deduction": 0},
    "citric acid":          {"aquatic_class": "None", "biodegradable": True,
                             "concern": None, "deduction": 0},
    "sodium bicarbonate":   {"aquatic_class": "None", "biodegradable": True,
                             "concern": None, "deduction": 0},
    "lactic acid":          {"aquatic_class": "None", "biodegradable": True,
                             "concern": None, "deduction": 0},
    "sucrose":              {"aquatic_class": "None", "biodegradable": True,
                             "concern": None, "deduction": 0},
    "tocopherol":           {"aquatic_class": "None", "biodegradable": True,
                             "concern": None, "deduction": 0},
}


# ═══════════════════════════════════════════════════════════════
# SECTION 3: EU ECOLABEL CERTIFIED BRANDS
# Products from these brands get an environment score bonus.
# Source: EU Ecolabel product database
# https://ecolabel.eu/en/products-and-services
# ═══════════════════════════════════════════════════════════════

EU_ECOLABEL_BRANDS = {
    # Cleaning
    "ecover":       {"certified": True, "bonus": 10, "category": "cleaning"},
    "method":       {"certified": True, "bonus": 8,  "category": "cleaning"},
    "seventh generation": {"certified": True, "bonus": 8, "category": "cleaning"},
    "attitude":     {"certified": True, "bonus": 8,  "category": "cleaning"},
    "bio d":        {"certified": True, "bonus": 10, "category": "cleaning"},
    "sonett":       {"certified": True, "bonus": 10, "category": "cleaning"},

    # Cosmetics
    "weleda":       {"certified": True, "bonus": 8,  "category": "cosmetics"},
    "dr. hauschka": {"certified": True, "bonus": 8,  "category": "cosmetics"},
    "lavera":       {"certified": True, "bonus": 8,  "category": "cosmetics"},
    "natessance":   {"certified": True, "bonus": 6,  "category": "cosmetics"},

    # Baby
    "mustela":      {"certified": True, "bonus": 6,  "category": "baby"},
    "burt's bees":  {"certified": True, "bonus": 6,  "category": "baby"},
}


# ═══════════════════════════════════════════════════════════════
# SECTION 4: HELPER FUNCTIONS
# ═══════════════════════════════════════════════════════════════

def get_environment_signals(category: str) -> dict:
    """Return signal weights for a given product category."""
    if category not in ENVIRONMENT_SIGNALS:
        raise ValueError(f"Unknown category '{category}'. "
                         f"Choose from: {list(ENVIRONMENT_SIGNALS.keys())}")
    return ENVIRONMENT_SIGNALS[category]


def get_aquatic_hazard(ingredient: str) -> dict:
    """Return ECHA aquatic hazard data for an ingredient."""
    name_lower = ingredient.lower().strip()
    for key, data in ECHA_AQUATIC_HAZARDS.items():
        if key in name_lower or name_lower == key:
            return {"ingredient": ingredient, **data}
    return {
        "ingredient":    ingredient,
        "aquatic_class": "Unknown",
        "biodegradable": None,
        "concern":       None,
        "deduction":     0
    }


def check_ecolabel(brand_name: str) -> dict:
    """Check if a brand has EU Ecolabel certification."""
    name_lower = brand_name.lower().strip()
    for key, data in EU_ECOLABEL_BRANDS.items():
        if key in name_lower or name_lower == key:
            return {"brand": brand_name, **data}
    return {"brand": brand_name, "certified": False, "bonus": 0}


# ═══════════════════════════════════════════════════════════════
# SELF-TEST
# ═══════════════════════════════════════════════════════════════

if __name__ == "__main__":
    print("NOURA Environment Config — Self Test\n")

    print("Signal weights per category:")
    print("-" * 50)
    for cat, signals in ENVIRONMENT_SIGNALS.items():
        total = sum(s["weight"] for s in signals.values())
        print(f"  {cat.upper():<12} signals: {len(signals)} | "
              f"weights sum: {total:.1f} ✅" if abs(total - 1.0) < 0.001
              else f"  {cat.upper():<12} ❌ weights sum to {total}")

    print("\nAquatic hazard spot checks:")
    print("-" * 50)
    test_ings = ["triclosan", "methylisothiazolinone",
                 "glycerin", "benzophenone-3", "polyethylene"]
    for ing in test_ings:
        r = get_aquatic_hazard(ing)
        flag = "🔴" if r["deduction"] < -20 else \
               "🟡" if r["deduction"] < 0 else "🟢"
        print(f"  {flag} {ing}: {r['aquatic_class']} "
              f"(deduction: {r['deduction']})")

    print("\nEcolabel brand checks:")
    print("-" * 50)
    for brand in ["Weleda", "Ecover", "L'Oreal", "Sonett"]:
        r = check_ecolabel(brand)
        status = f"✅ certified (bonus: +{r['bonus']})" \
                 if r["certified"] else "— not certified"
        print(f"  {brand}: {status}")

    print("\n✅ Environment config ready.")

Writing /content/noura_environment_config.py
