<a href="https://colab.research.google.com/github/AnamariaVLR/noura-rag/blob/main/NOURA_RAG_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
# NOURA - Cell 1: Verify environment
print("NOURA is starting...")
print("Python ready")

NOURA is starting...
Python ready


In [8]:
# NOURA - Cell 2: Scoring methodology (NOURA core IP)

EVIDENCE_HIERARCHY = {
    "systematic_review_meta_analysis": {"base_weight": 1.00, "requires_independence_check": True},
    "rct":                             {"base_weight": 0.85, "requires_independence_check": True},
    "regulatory_opinion":              {"base_weight": 0.75, "requires_independence_check": False},
    "observational_cohort":            {"base_weight": 0.60, "requires_independence_check": True},
    "ewg_hazard":                      {"base_weight": 0.50, "requires_dose_adjustment": True},
    "cosing_regional":                 {"base_weight": 0.45, "requires_independence_check": False},
    "in_vitro":                        {"base_weight": 0.30, "requires_independence_check": False},
    "clinical_case":                   {"base_weight": 0.15, "requires_independence_check": False},
    "expert_opinion":                  {"base_weight": 0.10, "requires_independence_check": True},
}

INDUSTRY_FUNDING_PENALTY = 0.20
HEALTH_HARD_BLOCK = 50
PLANET_FLAG_THRESHOLD = 40

SUFFICIENCY_CAPS = {
    "only_in_vitro_or_case":   60,
    "only_regulatory_strong":  70,
    "only_regulatory_weak":    50,
    "single_rct":              80,
}

CATEGORY_CLAIM_REQUIREMENTS = {
    "skincare": {
        "hydration":   {"min_evidence": "rct", "min_studies": 1},
        "anti_aging":  {"min_evidence": "rct", "min_studies": 2},
        "brightening": {"min_evidence": "observational_cohort", "min_studies": 1},
        "acne":        {"min_evidence": "rct", "min_studies": 2},
    }
}

print("Scoring methodology loaded")
print(f"Evidence sources defined: {len(EVIDENCE_HIERARCHY)}")
print(f"Health hard block threshold: {HEALTH_HARD_BLOCK}")

Scoring methodology loaded
Evidence sources defined: 9
Health hard block threshold: 50


In [9]:
# NOURA - Cell 3: Scoring engine (100% English)

def evaluate_evidence(source_type, industry_funded=False, dose_adjusted=True):
    if source_type not in EVIDENCE_HIERARCHY:
        return {"weight": 0, "source_type": source_type, "flags": [f"Unknown source type: {source_type}"]}

    weight = EVIDENCE_HIERARCHY[source_type]["base_weight"]
    flags = []

    if industry_funded and EVIDENCE_HIERARCHY[source_type].get("requires_independence_check"):
        weight = weight * (1 - INDUSTRY_FUNDING_PENALTY)
        flags.append("Industry-funded study: weight reduced 20%")

    if source_type == "ewg_hazard" and not dose_adjusted:
        weight = 0
        flags.append("EWG score excluded: not adjusted for actual product concentration")

    return {"source_type": source_type, "weight": round(weight, 3), "flags": flags}


def calculate_health_score(evaluated_evidence, prohibited=False):
    if prohibited:
        return {
            "score": 0,
            "verdict": "HIGHER RISK",
            "flag": "Ingredient prohibited by regulatory authority",
            "evidence_situation": "regulatory_block"
        }

    if not evaluated_evidence:
        return {
            "score": None,
            "verdict": "INSUFFICIENT DATA",
            "flag": "No scientific evidence retrieved for this ingredient",
            "evidence_situation": "no_evidence"
        }

    source_types = [e["source_type"] for e in evaluated_evidence]
    only_lab = all(t in {"in_vitro", "clinical_case"} for t in source_types)
    only_regulatory = all(t in {"regulatory_opinion", "cosing_regional"} for t in source_types)

    weights = [e["weight"] for e in evaluated_evidence]
    avg_weight = sum(weights) / len(weights)
    raw_score = round(avg_weight * 100, 1)

    # Apply sufficiency caps
    if only_lab:
        score = min(raw_score, SUFFICIENCY_CAPS["only_in_vitro_or_case"])
        flag = "Health score capped at 60 — only lab-based evidence retrieved; human clinical data insufficient"
        situation = "only_lab"
    elif only_regulatory:
        score = min(raw_score, SUFFICIENCY_CAPS["only_regulatory_strong"])
        flag = "Health score capped at 70 — regulatory approval present but no clinical studies retrieved"
        situation = "only_regulatory"
    else:
        score = raw_score
        flag = None
        situation = "sufficient"

    # Assign verdict
    if score is None or score < HEALTH_HARD_BLOCK:
        verdict = "HIGHER RISK"
    elif score >= 71:
        verdict = "WELL SUPPORTED"
    elif score >= 41:
        verdict = "LIMITED SUPPORT"
    else:
        verdict = "HIGHER RISK"

    return {
        "score": score,
        "verdict": verdict,
        "flag": flag,
        "evidence_situation": situation
    }

print("Scoring engine loaded")

Scoring engine loaded


In [10]:
# NOURA - Cell 4: PubMed connection
import requests

def search_pubmed(ingredient, max_results=10):
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"

    search_response = requests.get(
        f"{base_url}esearch.fcgi",
        params={"db": "pubmed", "term": f"{ingredient} safety skin human",
                "retmax": max_results, "retmode": "json"}
    )
    ids = search_response.json()["esearchresult"]["idlist"]

    if not ids:
        return {"ingredient": ingredient, "studies_found": 0, "studies": []}

    summary_response = requests.get(
        f"{base_url}esummary.fcgi",
        params={"db": "pubmed", "id": ",".join(ids), "retmode": "json"}
    )
    data = summary_response.json()

    if "result" not in data:
        return {"ingredient": ingredient, "studies_found": 0, "studies": []}

    studies = []
    for uid in ids:
        article = data["result"].get(uid, {})
        if isinstance(article, dict) and "title" in article:
            studies.append({
                "id": uid,
                "title": article.get("title", ""),
                "year": article.get("pubdate", "")[:4],
                "source": "PubMed"
            })

    return {"ingredient": ingredient, "studies_found": len(studies), "studies": studies}

print("PubMed connection ready")

PubMed connection ready


In [11]:
# NOURA - Cell 5: Evidence classifier

def classify_evidence_type(title):
    title_lower = title.lower()

    if any(w in title_lower for w in ["meta-analysis", "systematic review", "cochrane"]):
        return "systematic_review_meta_analysis"
    elif any(w in title_lower for w in ["randomized", "rct", "controlled trial", "double-blind"]):
        return "rct"
    elif any(w in title_lower for w in ["cohort", "observational", "prospective", "retrospective"]):
        return "observational_cohort"
    elif any(w in title_lower for w in ["guideline", "regulatory", "safety assessment", "efsa", "fda"]):
        return "regulatory_opinion"
    elif any(w in title_lower for w in ["in vitro", "cell culture", "in-vitro", "mechanistic"]):
        return "in_vitro"
    elif any(w in title_lower for w in ["case report", "case study"]):
        return "clinical_case"
    else:
        return "in_vitro"

print("Evidence classifier loaded")

Evidence classifier loaded


In [12]:
# NOURA - Cell 6: Full evaluation pipeline

def noura_evaluate(ingredient, category="skincare"):

    # Step 1: Search PubMed
    pubmed_results = search_pubmed(ingredient, max_results=10)

    # Step 2: Classify and evaluate each study
    evaluated = []
    evidence_count = {}

    for study in pubmed_results.get("studies", []):
        source_type = classify_evidence_type(study["title"])
        ev = evaluate_evidence(source_type)
        ev["study_title"] = study["title"][:80]
        ev["year"] = study["year"]
        evaluated.append(ev)
        evidence_count[source_type] = evidence_count.get(source_type, 0) + 1

    # Step 3: Calculate score
    result = calculate_health_score(evaluated)

    # Step 4: Build evidence summary
    evidence_str = " + ".join([f"{v} {k.replace('_', ' ')}"
                                for k, v in evidence_count.items()])

    # Step 5: Display NOURA assessment
    print(f"NOURA Health Assessment: {ingredient.title()} ({category})")
    print("=" * 65)
    print(f"Score:   {result['score']}/100")
    print(f"Verdict: {result['verdict']}")
    print()
    print(f"Studies retrieved:  {pubmed_results['studies_found']} (PubMed)")
    print(f"Evidence types:     {evidence_str if evidence_str else 'None'}")
    print()

    if result['flag']:
        print(f"Note: {result['flag']}")
        print()

    print("Evidence breakdown:")
    for e in evaluated:
        print(f"  [{e['year']}] {e['source_type'].replace('_', ' ')} "
              f"(weight: {e['weight']}) — {e['study_title']}...")

    print()
    print("What would you like next?")
    print("  - View full source links")
    print("  - Compare with alternatives")
    print("  - Check regulatory status")
    print("  - Assess another ingredient")
    print("=" * 65)
    print()

    return result


# Test
noura_evaluate("niacinamide")
noura_evaluate("parabens safety cosmetics")

NOURA Health Assessment: Niacinamide (skincare)
Score:   48.0/100
Verdict: HIGHER RISK

Studies retrieved:  10 (PubMed)
Evidence types:     2 rct + 7 in vitro + 1 systematic review meta analysis

Evidence breakdown:
  [2025] rct (weight: 0.85) — Safety and efficacy of niosomal and conventional tranexamic acid/niacinamide vs....
  [2025] in vitro (weight: 0.3) — Emerging topical therapies for melasma: a comparative analysis of efficacy and s...
  [2025] in vitro (weight: 0.3) — Exploring the Cardiovascular Impacts of Oral Nicotinamide: A Comprehensive Narra...
  [2025] systematic review meta analysis (weight: 1.0) — Axillary Hyperpigmentation Treatment: A Systematic Review of the Literature....
  [2025] in vitro (weight: 0.3) — Clinical Efficacy of a Novel Topical Formulation on Periorbital Dark Circles: An...
  [2025] in vitro (weight: 0.3) — Pbserum Specific Acne Scars®: a cutting-edge approach utilizing triple enzymatic...
  [2025] in vitro (weight: 0.3) — Benefits of a Multifunction

{'score': None,
 'verdict': 'INSUFFICIENT DATA',
 'flag': 'No scientific evidence retrieved for this ingredient',
 'evidence_situation': 'no_evidence'}