In [1]:
import sys
from pathlib import Path

PROJECT_ROOT = Path().resolve().parents[0]
sys.path.append(str(PROJECT_ROOT))

print("Project root:", PROJECT_ROOT)

Project root: /shared/crollo/faers-radar


In [2]:
import duckdb
import pandas as pd

from faers_signals.config import WAREHOUSE_DB_PATH
from faers_signals.embeddings import (
    EmbeddingConfig,
    compute_embedding_novelty,
)

con = duckdb.connect(str(WAREHOUSE_DB_PATH))
con

<_duckdb.DuckDBPyConnection at 0x7f57472a8c70>

In [3]:
cfg = EmbeddingConfig(
    min_pair_n11=5,
    min_drug_n11=100,
    min_event_n11=100,
    n_components=50,
    random_state=0,
)

cooc, drug_emb, event_emb, novelty_df = compute_embedding_novelty(cfg=cfg, con=con)

cooc.head(), drug_emb.head(), event_emb.head(), novelty_df.head()


(  drugname_norm                        pt  n11_total
 0  AZATHIOPRINE       Enterococcal sepsis        6.0
 1   MONTELUKAST                     Rales       87.0
 2    PREDNISONE  Pneumothorax spontaneous       37.0
 3      BYDUREON                   Fatigue       86.0
 4    DICLOFENAC  Patent ductus arteriosus       23.0,
                           drugname_norm      dim_0     dim_1     dim_2  \
 0  .ALPHA.-PYRROLIDINOVALEROTHIOPHENONE   0.707298 -0.557516  0.190433   
 1                    .ALPHA.-TOCOPHEROL   9.368548 -2.292316  5.298416   
 2            .ALPHA.-TOCOPHEROL ACETATE   4.142402 -0.967946  2.226891   
 3         .ALPHA.-TOCOPHEROL ACETATE, D  10.023363 -1.095214  5.231878   
 4        .ALPHA.-TOCOPHEROL ACETATE, DL   8.251019 -1.878489  4.720150   
 
       dim_3     dim_4      dim_5     dim_6     dim_7     dim_8  ...    dim_40  \
 0 -1.504412  1.422772  -0.746404  0.595651  0.174575 -0.677973  ... -0.025112   
 1  4.596232  3.452407   8.530148  9.060537 -1.939250  4.56

In [4]:
novelty_df.head(20)

Unnamed: 0,drugname_norm,pt,n11_total,observed_log,predicted_log,residual,structural_z
0,OXBRYTA,Sickle cell anaemia with crisis,10916.0,9.298076,0.4368,8.861276,7.63239
1,ELIGARD,Intercepted product preparation error,8435.0,9.040264,0.539511,8.500753,7.280495
2,PAXLOVID,Disease recurrence,20038.0,9.905436,1.560606,8.34483,7.128305
3,NEULASTA,Device adhesion issue,11121.0,9.31668,1.004449,8.312231,7.096486
4,PARAGARD T 380A,Foreign body in reproductive tract,6961.0,8.848222,0.613962,8.23426,7.020381
5,ETHINYL ESTRADIOL\NORELGESTROMIN,Product adhesion issue,2763.0,7.924434,0.209507,7.714927,6.513478
6,GENOTROPIN,Device information output issue,5191.0,8.554874,0.860876,7.693998,6.49305
7,PARAGARD T 380A,Reproductive complication associated with device,3651.0,8.20303,0.535988,7.667043,6.46674
8,PLUVICTO,Ill-defined disorder,3596.0,8.187855,0.522621,7.665235,6.464975
9,SPRAVATO,Dissociation,3071.0,8.030084,0.379546,7.650538,6.450631


In [5]:
# Only look at clearly strong, structurally surprising associations
strong_structural = novelty_df[
    (novelty_df["n11_total"] >= 10) &
    (novelty_df["structural_z"] > 2.0)  # > 2 SD above model expectation
].copy()

strong_structural.head(20)


Unnamed: 0,drugname_norm,pt,n11_total,observed_log,predicted_log,residual,structural_z
0,OXBRYTA,Sickle cell anaemia with crisis,10916.0,9.298076,0.4368,8.861276,7.63239
1,ELIGARD,Intercepted product preparation error,8435.0,9.040264,0.539511,8.500753,7.280495
2,PAXLOVID,Disease recurrence,20038.0,9.905436,1.560606,8.34483,7.128305
3,NEULASTA,Device adhesion issue,11121.0,9.31668,1.004449,8.312231,7.096486
4,PARAGARD T 380A,Foreign body in reproductive tract,6961.0,8.848222,0.613962,8.23426,7.020381
5,ETHINYL ESTRADIOL\NORELGESTROMIN,Product adhesion issue,2763.0,7.924434,0.209507,7.714927,6.513478
6,GENOTROPIN,Device information output issue,5191.0,8.554874,0.860876,7.693998,6.49305
7,PARAGARD T 380A,Reproductive complication associated with device,3651.0,8.20303,0.535988,7.667043,6.46674
8,PLUVICTO,Ill-defined disorder,3596.0,8.187855,0.522621,7.665235,6.464975
9,SPRAVATO,Dissociation,3071.0,8.030084,0.379546,7.650538,6.450631


In [6]:
row = strong_structural.iloc[0]
row


drugname_norm                            OXBRYTA
pt               Sickle cell anaemia with crisis
n11_total                                10916.0
observed_log                            9.298076
predicted_log                             0.4368
residual                                8.861276
structural_z                             7.63239
Name: 0, dtype: object

In [7]:
drug = row["drugname_norm"]
pt = row["pt"]

ts = con.execute("""
    SELECT *
    FROM signals_quarterly
    WHERE drugname_norm = ?
      AND pt = ?
    ORDER BY quarter_idx
""", [drug, pt]).fetchdf()

ts[["year", "quarter", "n11", "ror", "ror_ci_low", "ror_ci_high"]].head()

Unnamed: 0,year,quarter,n11,ror,ror_ci_low,ror_ci_high
0,2020,Q1,51,3882.179807,2521.647088,5976.776101
1,2020,Q2,291,2957.753754,2274.861099,3845.644588
2,2020,Q3,444,6068.318179,4602.958408,8000.177767
3,2020,Q4,470,6067.443025,4530.770859,8125.298323
4,2021,Q1,419,8152.200812,6001.513098,11073.603773


### Temporal + Embeddings

In [11]:
from faers_signals.emergence import (
    EmergenceConfig,
    compute_global_emergence_scores,
)

# 1. Global temporal novelty
cfg_em = EmergenceConfig(min_points=4, min_n11_latest=5)
global_em = compute_global_emergence_scores(
    cfg=cfg_em,
    min_total_drug_reports=500,
    max_drugs=None,
    con=con,
)

# 2. Embedding novelty (novelty_df from above)

combined = global_em.merge(
    novelty_df[["drugname_norm", "pt", "structural_z"]],
    on=["drugname_norm", "pt"],
    how="left",
)

combined.head(20)


[info] Computing global emergence for 3985 drugs (min_total_drug_reports=500)
[info] [1/3985] DUPIXENT (n_reports=364758)
[info] [2/3985] ZANTAC (n_reports=325161)
[info] [3/3985] PREDNISONE (n_reports=291409)
[info] [4/3985] HUMIRA (n_reports=277769)
[info] [5/3985] METHOTREXATE (n_reports=203080)
[info] [6/3985] ASPIRIN (n_reports=202115)
[info] [7/3985] REVLIMID (n_reports=192365)
[info] [8/3985] DEXAMETHASONE (n_reports=169924)
[info] [9/3985] OMEPRAZOLE (n_reports=164992)
[info] [10/3985] ATORVASTATIN (n_reports=161809)
[info] [11/3985] ACETAMINOPHEN (n_reports=161668)
[info] [12/3985] GABAPENTIN (n_reports=161385)
[info] [13/3985] RANITIDINE (n_reports=159490)
[info] [14/3985] METFORMIN (n_reports=149996)
[info] [15/3985] PROACTIV MD ADAPALENE ACNE TREATMENT (n_reports=149530)
[info] [16/3985] ELIQUIS (n_reports=146119)
[info] [17/3985] AMLODIPINE (n_reports=144950)
[info] [18/3985] FUROSEMIDE (n_reports=132868)
[info] [19/3985] PANTOPRAZOLE (n_reports=126032)
[info] [20/3985] EN

Unnamed: 0,drugname_norm,pt,signal_score,emergence_z,slope_log_ror,latest_ror,latest_ror_ci_low,latest_n11,n_points,structural_z
0,EUMOVATE,Pyrexia,1259.480377,93.432873,2.338708,317.5851,53.792267,6,4,0.578844
1,TRIMETAZIDINE,Left ventricular hypertrophy,985.904996,48.736033,1.86286,52018.26,8904.212478,8,4,0.861261
2,DOXIL,Acute myeloid leukaemia,759.527772,35.924692,3.332449,568.2665,211.215537,5,4,0.259369
3,PROMETHAZINE,Suspected suicide,733.825938,14.712282,5.10868,17385.89,3939.917413,6,4,0.270575
4,CLOZAPINE,Investigation noncompliance,708.321371,59.486525,1.362013,6261.905,375.520079,16,4,
5,TRIMETAZIDINE,Pulmonary valve disease,674.495534,19.54661,2.273648,3901789.0,148217.559804,8,4,1.626762
6,MEPOLIZUMAB,Mycobacterium avium complex infection,477.583693,79.113968,1.035764,338.7513,154.950946,8,4,0.140996
7,COUMADIN,Sopor,471.610709,15.576601,4.319534,1105.869,421.173108,5,6,1.639785
8,AZITHROMYCIN ANHYDROUS,Deafness neurosensory,465.769039,17.867825,3.724746,1093.942,456.114213,6,4,1.553089
9,RELYVRIO,Abdominal discomfort,455.449565,11.322415,5.488734,1522.453,85.742695,6,6,1.140441


In [12]:
combined["global_novelty_score"] = combined["signal_score"] * (1 + combined["structural_z"].clip(lower=0))
combined.sort_values("global_novelty_score", ascending=False).head(20)

Unnamed: 0,drugname_norm,pt,signal_score,emergence_z,slope_log_ror,latest_ror,latest_ror_ci_low,latest_n11,n_points,structural_z,global_novelty_score
0,EUMOVATE,Pyrexia,1259.480377,93.432873,2.338708,317.5851,53.792267,6,4,0.578844,1988.523601
1,TRIMETAZIDINE,Left ventricular hypertrophy,985.904996,48.736033,1.86286,52018.26,8904.212478,8,4,0.861261,1835.026813
5,TRIMETAZIDINE,Pulmonary valve disease,674.495534,19.54661,2.273648,3901789.0,148217.559804,8,4,1.626762,1771.739002
14,PLAN B ONE?STEP,Pregnancy after post coital contraception,340.099769,10.721595,2.746124,103900.0,6201.864519,16,4,3.145462,1409.870718
7,COUMADIN,Sopor,471.610709,15.576601,4.319534,1105.869,421.173108,5,6,1.639785,1244.951045
8,AZITHROMYCIN ANHYDROUS,Deafness neurosensory,465.769039,17.867825,3.724746,1093.942,456.114213,6,4,1.553089,1189.149646
10,PROMETHAZINE HYDROCHLORIDE,Viral test positive,373.67902,35.345812,1.106323,14129.08,5013.751843,7,4,1.8287,1057.025705
9,RELYVRIO,Abdominal discomfort,455.449565,11.322415,5.488734,1522.453,85.742695,6,6,1.140441,974.862774
2,DOXIL,Acute myeloid leukaemia,759.527772,35.924692,3.332449,568.2665,211.215537,5,4,0.259369,956.525513
23,RAPAFLO,Reversible airways obstruction,267.954565,3.544982,4.981377,3889991.0,147769.387503,8,7,2.526901,945.049147
