<a href="https://colab.research.google.com/github/AnamariaVLR/noura-rag/blob/main/NOURA_RAG_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
# NOURA - Cell 1: Verify environment
print("NOURA is starting...")
print("Python ready")

NOURA is starting...
Python ready


In [13]:
# NOURA - Celda 2: Metodología de scoring (IP de NOURA)

EVIDENCE_HIERARCHY = {
    "systematic_review_meta_analysis": {"base_weight": 1.00, "requires_independence_check": True},
    "rct":                             {"base_weight": 0.85, "requires_independence_check": True},
    "regulatory_opinion":              {"base_weight": 0.75, "requires_independence_check": False},
    "observational_cohort":            {"base_weight": 0.60, "requires_independence_check": True},
    "ewg_hazard":                      {"base_weight": 0.50, "requires_dose_adjustment": True},
    "cosing_regional":                 {"base_weight": 0.45, "requires_independence_check": False},
    "in_vitro":                        {"base_weight": 0.30, "requires_independence_check": False},
    "clinical_case":                   {"base_weight": 0.15, "requires_independence_check": False},
    "expert_opinion":                  {"base_weight": 0.10, "requires_independence_check": True},
}

INDUSTRY_FUNDING_PENALTY = 0.20

HEALTH_HARD_BLOCK = 50      # Si score < 50, siempre "Avoid"
PLANET_FLAG_THRESHOLD = 40  # Si score < 40, flag ambiental

SUFFICIENCY_CAPS = {
    "only_in_vitro_or_case": 60,
    "only_regulatory_strong": 70,
    "only_regulatory_weak": 50,
    "single_rct": 80,
}

CATEGORY_CLAIM_REQUIREMENTS = {
    "skincare": {
        "hydration":  {"min_evidence": "rct", "min_studies": 1},
        "anti_aging": {"min_evidence": "rct", "min_studies": 2},
        "brightening":{"min_evidence": "observational_cohort", "min_studies": 1},
        "acne":       {"min_evidence": "rct", "min_studies": 2},
    }
}

print("Metodología NOURA cargada ✓")
print(f"Fuentes de evidencia definidas: {len(EVIDENCE_HIERARCHY)}")
print(f"Umbral de bloqueo Health Score: {HEALTH_HARD_BLOCK}")
print(f"Categorías con reglas de claim: {list(CATEGORY_CLAIM_REQUIREMENTS.keys())}")

Metodología NOURA cargada ✓
Fuentes de evidencia definidas: 9
Umbral de bloqueo Health Score: 50
Categorías con reglas de claim: ['skincare']


In [14]:
# NOURA - Celda 3: Motor de scoring

def evaluar_evidencia(tipo, financiado_industria=False, ajustado_dosis=True):
    if tipo not in EVIDENCE_HIERARCHY:
        return {"peso": 0, "flag": f"Fuente desconocida: {tipo}"}

    peso = EVIDENCE_HIERARCHY[tipo]["base_weight"]
    flags = []

    if financiado_industria and EVIDENCE_HIERARCHY[tipo].get("requires_independence_check"):
        peso = peso * (1 - INDUSTRY_FUNDING_PENALTY)
        flags.append("Financiado por industria: peso reducido 20%")

    if tipo == "ewg_hazard" and not ajustado_dosis:
        peso = 0
        flags.append("EWG rechazado: no ajustado por concentración real")

    return {"tipo": tipo, "peso": round(peso, 3), "flags": flags}


def calcular_health_score(evidencias_evaluadas, prohibido=False):
    if prohibido:
        return {"score": 0, "veredicto": "EVITAR", "razon": "Ingrediente prohibido por regulación"}

    if not evidencias_evaluadas:
        return {"score": None, "veredicto": "DATOS INSUFICIENTES", "razon": "Sin evidencia científica disponible"}

    tipos = [e["tipo"] for e in evidencias_evaluadas]
    evidencia_humana = {"systematic_review_meta_analysis", "rct", "observational_cohort", "regulatory_opinion"}
    solo_invitro = all(t in {"in_vitro", "clinical_case"} for t in tipos)
    solo_regulatorio = all(t in {"regulatory_opinion", "cosing_regional"} for t in tipos)

    pesos = [e["peso"] for e in evidencias_evaluadas]
    promedio = sum(pesos) / len(pesos)
    score_raw = round(promedio * 100, 1)

    # Aplicar caps según suficiencia
    if solo_invitro:
        score = min(score_raw, SUFFICIENCY_CAPS["only_in_vitro_or_case"])
        flag = "Solo evidencia de laboratorio — cap aplicado en 60"
    elif solo_regulatorio:
        score = min(score_raw, SUFFICIENCY_CAPS["only_regulatory_strong"])
        flag = "Solo aprobación regulatoria — cap aplicado en 70"
    else:
        score = score_raw
        flag = None

    # Hard block
    if score < HEALTH_HARD_BLOCK:
        veredicto = "EVITAR"
    elif score >= 71:
        veredicto = "RECOMENDADO"
    elif score >= 41:
        veredicto = "PRECAUCIÓN"
    else:
        veredicto = "EVITAR"

    return {"score": score, "veredicto": veredicto, "flag": flag}


print("Motor de scoring cargado ✓")

Motor de scoring cargado ✓


In [15]:
# NOURA - Celda 4: Primera evaluación real
# Producto: Retinol serum 0.5%

evidencias_retinol = [
    evaluar_evidencia("systematic_review_meta_analysis", financiado_industria=False),
    evaluar_evidencia("rct", financiado_industria=False),
    evaluar_evidencia("regulatory_opinion"),
    evaluar_evidencia("ewg_hazard", ajustado_dosis=True),
]

resultado = calcular_health_score(evidencias_retinol)

print("=== NOURA EVALÚA: Retinol Serum 0.5% ===")
print(f"Health Score:  {resultado['score']}")
print(f"Veredicto:     {resultado['veredicto']}")
print(f"Flag:          {resultado['flag']}")
print()
print("Evidencia utilizada:")
for e in evidencias_retinol:
    print(f"  - {e['tipo']}: peso {e['peso']}")

=== NOURA EVALÚA: Retinol Serum 0.5% ===
Health Score:  77.5
Veredicto:     RECOMENDADO
Flag:          None

Evidencia utilizada:
  - systematic_review_meta_analysis: peso 1.0
  - rct: peso 0.85
  - regulatory_opinion: peso 0.75
  - ewg_hazard: peso 0.5


In [16]:
# NOURA - Cell 5: Connect to PubMed
import requests

def search_pubmed(ingredient, max_results=5):
    """
    Searches PubMed for studies about a given ingredient.
    Returns a list of study titles and their IDs.
    """
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"

    # Step 1: Search for relevant study IDs
    search_url = f"{base_url}esearch.fcgi"
    search_params = {
        "db": "pubmed",
        "term": f"{ingredient} safety skin human",
        "retmax": max_results,
        "retmode": "json"
    }

    search_response = requests.get(search_url, params=search_params)
    search_data = search_response.json()
    ids = search_data["esearchresult"]["idlist"]

    if not ids:
        return {"ingredient": ingredient, "studies_found": 0, "studies": []}

    # Step 2: Fetch titles for those IDs
    summary_url = f"{base_url}esummary.fcgi"
    summary_params = {
        "db": "pubmed",
        "id": ",".join(ids),
        "retmode": "json"
    }

    summary_response = requests.get(summary_url, params=summary_params)
    summary_data = summary_response.json()

    studies = []
    for uid in ids:
        article = summary_data["result"].get(uid, {})
        studies.append({
            "id": uid,
            "title": article.get("title", "Title not available"),
            "year": article.get("pubdate", "")[:4],
            "source": "PubMed"
        })

    return {
        "ingredient": ingredient,
        "studies_found": len(studies),
        "studies": studies
    }

# Test: search for retinol
results = search_pubmed("retinol")

print(f"=== PUBMED SEARCH: retinol ===")
print(f"Studies found: {results['studies_found']}")
print()
for s in results["studies"]:
    print(f"[{s['year']}] {s['title']}")

=== PUBMED SEARCH: retinol ===
Studies found: 5

[2026] Hydroxypinacolone 9-cis Retinoate Mitigates UV-Induced Photoaging by Modulating ECM, Fibroblasts, Inflammation, and Melanogenesis.
[2026] Development of Efficient Supramolecular Photostabilizer for Carotenoids and Retinoids: Analyses and Application Research.
[2026] Dual targeting of human and bacterial hyaluronidases by skincare bioactives: Mechanistic basis and functional evidence.
[2026] Chitosan/β-glucan/cystine multifunctional hydrogel loading retinol liposomes for efficiently treating UV-induced skin damage and aging.
[2025] Evaluation of a Retinol Nanoemulsion Gel Enriched with Black Seed and Rosehip Oils for Acne Management.
