phase3/phase3_score.py

In [None]:
import pandas as pd

In [None]:
try:
    from .config import MODEL_WEIGHTS
except ImportError:
    from config import MODEL_WEIGHTS

----------------------------------<br>
Research-tool / anesthetic penalties<br>
----------------------------------

In [None]:
TOOL_PENALTY_TERMS = [
    "anesthetic", "anaesthetic", "barbiturate", "sedative",
    "research tool", "experimental tool",
    "nmda antagonist", "dizocilpine", "mk-801",
    "thiopental", "ketamine", "propofol"
]

In [None]:
def apply_tool_penalty(drug_name: str, score: float) -> float:
    """
    Penalize compounds that are likely research tools or anesthetics
    rather than disease-modifying therapies.
    """
    if score <= 0:
        return score
    d = (drug_name or "").lower()
    if any(term in d for term in TOOL_PENALTY_TERMS):
        return score * 0.2
    return score

In [None]:
def paper_score(row):
    """
    Per-paper score:
    - rewards positive net signal (pos_hits - neg_hits)
    - caps signal so long abstracts don't dominate
    - adds outcome diversity bonus
    """
    base = MODEL_WEIGHTS.get(row.get("model", "unknown"), 0.2)
    pos = float(row.get("pos_hits", 0) or 0)
    neg = float(row.get("neg_hits", 0) or 0)
    signal = pos - neg
    if signal <= 0:
        return 0.0
    capped = min(signal, 6.0)
    outcomes = str(row.get("outcomes", "") or "")
    outcome_count = len([x for x in outcomes.split(";") if x.strip()])
    outcome_bonus = 0.3 * outcome_count
    return base * capped + outcome_bonus