In [8]:
import pandas as pd

# === Load CSVs with dtype normalization and cleaning ===

def load_clean_csvs():
    def read(file, **kwargs):
        return pd.read_csv(file, low_memory=False, **kwargs)

    df_label = read("product_label.csv")
    df_to_rxnorm = read("product_to_rxnorm.csv")
    df_rxnorm_product = read("vocab_rxnorm_product.csv")
    df_rxnorm_ingredient = read("vocab_rxnorm_ingredient.csv")
    df_ing_to_prod = read("vocab_rxnorm_ingredient_to_product.csv")
    df_effect = read("product_adverse_effect.csv", dtype={"effect_meddra_id": str})
    df_meddra = read("vocab_meddra_adverse_effect.csv")
    df_high_conf = read("high_confidence.csv")

    # Normalize ID types as strings and strip trailing decimals
    for df, col in [
        (df_effect, "effect_meddra_id"),
        (df_meddra, "meddra_id"),
        (df_high_conf, "ingredient_id"),
        (df_high_conf, "effect_meddra_id"),
        (df_ing_to_prod, "ingredient_id"),
        (df_ing_to_prod, "product_id"),
        (df_rxnorm_ingredient, "rxnorm_id"),
        (df_rxnorm_product, "rxnorm_id"),
    ]:
        df[col] = df[col].astype(str).str.split(".").str[0]

    return {
        "label": df_label,
        "to_rxnorm": df_to_rxnorm,
        "rx_product": df_rxnorm_product,
        "rx_ingredient": df_rxnorm_ingredient,
        "ing_to_prod": df_ing_to_prod,
        "effect": df_effect,
        "meddra": df_meddra,
        "high_conf": df_high_conf
    }

# === Query Function ===

def search_label_info(query):
    data = load_clean_csvs()
    df_label = data["label"]
    
    # Search
    if isinstance(query, int):
        label_match = df_label[df_label["label_id"] == query]
    else:
        label_match = df_label[df_label["source_product_name"].str.contains(query, case=False, na=False)]

    if label_match.empty:
        print("❌ No matching product label found.")
        return

    for _, label_row in label_match.iterrows():
        label_id = label_row["label_id"]
        print(f"\n🔎 Found label_id: {label_id}")
        print(label_row.to_string())

        # === RxNorm Product Info ===
        df_to_rxnorm = data["to_rxnorm"]
        df_rx_product = data["rx_product"]
        rx_info = pd.merge(df_to_rxnorm, df_rx_product, left_on="rxnorm_product_id", right_on="rxnorm_id", how="left")
        rx_info = rx_info[rx_info["label_id"] == label_id]
        
        if not rx_info.empty:
            print("\n💊 RxNorm Product Info:")
            print(rx_info[["rxnorm_id", "rxnorm_name", "rxnorm_term_type"]])
        else:
            print("\n💊 No RxNorm info found.")
        
        # === Adverse Effects ===
        df_effect = data["effect"]
        df_meddra = data["meddra"]
        effects = df_effect[df_effect["product_label_id"] == label_id].copy()

        if effects.empty:
            print("\n⚠️ No adverse effects found.")
        else:
            effects["effect_meddra_id"] = effects["effect_meddra_id"].astype(str).str.split(".").str[0]
            effects = pd.merge(effects, df_meddra, left_on="effect_meddra_id", right_on="meddra_id", how="left")
            effects = effects.dropna(subset=["meddra_id"])  # drop unmatched
            print("\n⚠️ Adverse Effects:")
            print(effects[["effect_id", "meddra_id", "meddra_name", "meddra_term_type"]].reset_index(drop=True))

        # === Linked Ingredients ===
        df_ing_to_prod = data["ing_to_prod"]
        df_rx_ing = data["rx_ingredient"]

        if rx_info.empty:
            print("\n🧬 No RxNorm product to search ingredients.")
            ingredient_info = pd.DataFrame()
        else:
            rx_id = rx_info.iloc[0]["rxnorm_id"]
            ing_links = df_ing_to_prod[df_ing_to_prod["product_id"] == str(rx_id)]
            ingredient_info = pd.merge(ing_links, df_rx_ing, left_on="ingredient_id", right_on="rxnorm_id", how="left")

            if ingredient_info.empty:
                print("\n🧬 No linked ingredients found.")
            else:
                print("\n🧬 Linked Ingredients:")
                print(ingredient_info[["ingredient_id", "rxnorm_name"]])

        # === High Confidence Ingredient-Effect Links ===
        if not ingredient_info.empty:
            df_high_conf = data["high_conf"]
            df_meddra = data["meddra"]

            df_high_conf["ingredient_id"] = df_high_conf["ingredient_id"].astype(str)
            ingredient_info["ingredient_id"] = ingredient_info["ingredient_id"].astype(str)

            high_conf = pd.merge(df_high_conf, ingredient_info, on="ingredient_id", how="inner")
            high_conf = pd.merge(high_conf, df_meddra, left_on="effect_meddra_id", right_on="meddra_id", how="left")

            if not high_conf.empty:
                print("\n✅ High Confidence Ingredient–Effect Links:")
                print(high_conf[["ingredient_id", "rxnorm_name", "meddra_id", "meddra_name"]])
            else:
                print("\n✅ No high-confidence effects found.")


search_label_info('ブイフェンド200mg静注用')


🔎 Found label_id: 1
label_id                                                               1
source                                                                JP
source_product_name                                       ブイフェンド200mg静注用
source_product_id                                               00050557
source_label_url       https://www.kegg.jp/medicus-bin/japic_med?japi...

💊 RxNorm Product Info:
     rxnorm_id                                    rxnorm_name rxnorm_term_type
1486    546624  voriconazole 40 MG/ML Oral Suspension [Vfend]              SBD

⚠️ Adverse Effects:
     effect_id meddra_id         meddra_name meddra_term_type
0       483707  10040560               Shock               PT
1       483708  10002218         Anaphylaxis              LLT
2       483709  10061355           Poisoning               PT
3       483710  10059284  Epidermal necrosis               PT
4       483711  10028851            Necrosis               PT
..         ...       ...               