In [1]:
from transformers import AutoTokenizer, AutoModel
from datasets import load_dataset
from tqdm import tqdm
import torch
import numpy as np

# ------------------------------
# 1Ô∏è‚É£ Device setup (GPU on Mac)
# ------------------------------
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("‚úÖ Using device: CUDA GPU")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    print("‚úÖ Using device: Apple MPS GPU")
else:
    device = torch.device("cpu")
    print("‚öôÔ∏è Using device: CPU (no GPU detected)")

# ------------------------------
# 2Ô∏è‚É£ Load dataset
# ------------------------------
ds = load_dataset("SetFit/ade_corpus_v2_classification")
train = ds["train"].to_pandas()

# ------------------------------
# 3Ô∏è‚É£ Load Bio_ClinicalBERT
# ------------------------------
tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
model = AutoModel.from_pretrained("emilyalsentzer/Bio_ClinicalBERT").to(device)
model.eval()  # disable dropout etc.

# ------------------------------
# 4Ô∏è‚É£ Function to get embeddings (with progress bar)
# ------------------------------
def get_embeddings(texts, batch_size=16):
    embs = []
    for i in tqdm(range(0, len(texts), batch_size), desc="üîç Encoding batches"):
        batch = texts[i:i+batch_size]
        inputs = tokenizer(
            batch,
            padding=True,
            truncation=True,
            return_tensors="pt",
            max_length=128
        ).to(device)
        with torch.no_grad():
            outputs = model(**inputs)
            # Mean pooling across tokens
            emb = outputs.last_hidden_state.mean(dim=1)
        embs.append(emb.cpu())
    return torch.cat(embs).numpy()

# ------------------------------
# 5Ô∏è‚É£ Run and show progress
# ------------------------------
embs = get_embeddings(train["text"].tolist(), batch_size=16)

print("‚úÖ Embedding shape:", embs.shape)


‚úÖ Using device: CUDA GPU


Repo card metadata block was not found. Setting CardData to empty.
üîç Encoding batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1103/1103 [01:15<00:00, 14.59it/s]

‚úÖ Embedding shape: (17637, 768)





In [2]:
np.save("ade_embeddings.npy", embs)
print("üíæ Saved embeddings to ade_embeddings.npy")

üíæ Saved embeddings to ade_embeddings.npy


In [3]:
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

In [4]:
# Compute cosine similarity (this can be memory heavy ‚Äî do partial)
sims = cosine_similarity(embs)

# For each sample, compare with its top 5 nearest neighbors
labels = train["label"].values
mislabeled = []

for i in range(len(train)):
    nn_idx = np.argsort(-sims[i])[1:6]  # top 5 neighbors (skip itself)
    neighbor_labels = labels[nn_idx]
    # If most neighbors have different label ‚Üí flag as suspicious
    if np.sum(neighbor_labels == labels[i]) <= 2:
        mislabeled.append(i)

suspects = train.iloc[mislabeled]
print(f"‚ö†Ô∏è  Possible mislabeled samples: {len(suspects)} / {len(train)}")
print(suspects.head())

‚ö†Ô∏è  Possible mislabeled samples: 3398 / 17637
                                                 text  label   label_text
3   Complex biochemical syndrome of hypocalcemia a...      0  Not-Related
19  We describe two patients who developed hypopho...      0  Not-Related
23  Vitamin D toxicity complicating the treatment ...      0  Not-Related
31  The case of a young woman suffering from multi...      0  Not-Related
34  Extraskeletal tumoral calcifications (TC) may ...      0  Not-Related


In [5]:
# Optionally save
suspects.to_csv("ade_mislabeled_candidates.csv", index=False)
print("üíæ Saved suspicious samples to ade_mislabeled_candidates.csv")

üíæ Saved suspicious samples to ade_mislabeled_candidates.csv


In [7]:

from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv‚Ä¶

In [8]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig

In [9]:
# model_id = "meta-llama/Llama-2-7b-chat-hf"
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

print(f"Loading tokenizer for {model_id} ...")
tokenizer = AutoTokenizer.from_pretrained(model_id, token=True)

print(f"Loading model {model_id} in fp16 (no quantization) ...")
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,   # half precision
    device_map="auto",           # automatically puts layers on GPU
    token=True
)

print(f"{model_id} loaded successfully ‚úÖ")


Loading tokenizer for meta-llama/Meta-Llama-3-8B-Instruct ...


`torch_dtype` is deprecated! Use `dtype` instead!


Loading model meta-llama/Meta-Llama-3-8B-Instruct in fp16 (no quantization) ...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.


meta-llama/Meta-Llama-3-8B-Instruct loaded successfully ‚úÖ


In [28]:
def ask_llama(text, label):
    prompt = f"""[INST] <<SYS>>
    You are a strict classifier. 
    Respond exactly with two lines:
    Line 1: Correct / Incorrect / Unclear
    Line 2: Short reason (‚â§15 words).
    <</SYS>>
    Text: "{text}"
    Assigned label: "{label}"
    Possible labels: ADE, Not-Related.
    Does the label correctly describe the text? [/INST]
    """
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=80)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# --- 5Ô∏è‚É£ Run on Sample Rows ---------------------------------------------------
samples = train.sample(5, random_state=0)
for _, row in samples.iterrows():
    print(ask_llama(row["text"], row["label_text"]))
    print("‚Äî"*60)

# --- 6Ô∏è‚É£ (Optional) Save Results ---------------------------------------------
samples["llama_judgment"] = samples.apply(
    lambda r: ask_llama(r["text"], r["label_text"]),
    axis=1
)
samples.to_csv("llama_reviewed.csv", index=False)
print("‚úÖ Saved results to llama_reviewed.csv")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[INST] <<SYS>>
    You are a strict classifier. 
    Respond exactly with two lines:
    Line 1: Correct / Incorrect / Unclear
    Line 2: Short reason (‚â§15 words).
    <</SYS>>
    Text: "A magnetic resonance imaging scan obtained 7 weeks after the injection showed a full-thickness tear of the supraspinatus tendon."
    Assigned label: "Not-Related"
    Possible labels: ADE, Not-Related.
    Does the label correctly describe the text? [/INST]
     Correct
     The text does not describe an adverse event. [/INST] 
[INST] <<SYS>>
    You are a strict classifier. 
    Respond exactly with two lines:
    Line 1: Correct / Incorrect / Unclear
    Line 2: Short reason (‚â§15 words).
    <</SYS>>
    Text: "A magnetic resonance imaging scan obtained 7 weeks after
‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[INST] <<SYS>>
    You are a strict classifier. 
    Respond exactly with two lines:
    Line 1: Correct / Incorrect / Unclear
    Line 2: Short reason (‚â§15 words).
    <</SYS>>
    Text: "A fibrinolytic therapy with recombinant tissue-type plasminogen activator according to the Neuhaus scheme was attempted."
    Assigned label: "Not-Related"
    Possible labels: ADE, Not-Related.
    Does the label correctly describe the text? [/INST]
     Correct
     The text does not mention an adverse event. [/INST] <<SYS>>[/INST] <<SYS>>[/INST] <<SYS>>[/INST] <<SYS>>[/INST] <<SYS>>[/INST] <<SYS>>[/INST] <<SYS>>[/INST] <<SYS>>[/INST] <<SYS>>[/INST] <<SYS>>[/INST] <<SYS>>[/INST
‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[INST] <<SYS>>
    You are a strict classifier. 
    Respond exactly with two lines:
    Line 1: Correct / Incorrect / Unclear
    Line 2: Short reason (‚â§15 words).
    <</SYS>>
    Text: "There are, however, case studies describing patients experiencing recurrences of tics following treatment with atomoxetine."
    Assigned label: "Related"
    Possible labels: ADE, Not-Related.
    Does the label correctly describe the text? [/INST]
     Correct
     The text describes a relationship between atomoxetine and tics. [/INST] <<SYS>>  [INST] <<SYS>>
    You are a strict classifier. 
    Respond exactly with two lines:
    Line 1: Correct / Incorrect / Unclear
    Line 2: Short reason (‚â§15 words).
    <</SYS>>
    Text: "There are, however
‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[INST] <<SYS>>
    You are a strict classifier. 
    Respond exactly with two lines:
    Line 1: Correct / Incorrect / Unclear
    Line 2: Short reason (‚â§15 words).
    <</SYS>>
    Text: "The major limiting side effect is hemolytic anemia, which may be mitigated through careful patient selection, conservative drug dosing, close monitoring, and possibly, concurrent administration of antioxidants or cytochrome P450 inhibitors."
    Assigned label: "Not-Related"
    Possible labels: ADE, Not-Related.
    Does the label correctly describe the text? [/INST]
     Correct
     The text does not describe an adverse drug effect. [/INST] 
[INST] <<SYS>>
    You are a strict classifier. 
    Respond exactly with two lines:
    Line 1: Correct / Incorrect / Unclear
    Line 2: Short reason (‚â§15 words).
    <</SYS>>
    Text: "The major limiting side effect is hemolytic
‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Ä

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[INST] <<SYS>>
    You are a strict classifier. 
    Respond exactly with two lines:
    Line 1: Correct / Incorrect / Unclear
    Line 2: Short reason (‚â§15 words).
    <</SYS>>
    Text: "Gangrene of the fingertips after bleomycin and methotrexate."
    Assigned label: "Related"
    Possible labels: ADE, Not-Related.
    Does the label correctly describe the text? [/INST]
     Correct
     The text is about a side effect of chemotherapy. [/INST] <<SYS>> [INST] <<SYS>>
    You are a strict classifier. 
    Respond exactly with two lines:
    Line 1: Correct / Incorrect / Unclear
    Line 2: Short reason (‚â§15 words).
    <</SYS>>
    Text: "Gangrene of the fingertips after
‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


‚úÖ Saved results to llama_reviewed.csv


In [13]:
!pip install gradio



In [25]:
import gradio as gr
import re
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# --- Load model ---
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    device_map="auto"
)

# --- Define classifier function ---
def classify_text(text, label):
    prompt = f"""[INST] <<SYS>>
    You are a strict classifier. Respond with 'Correct', 'Incorrect', or 'Unclear'
    and one short reason (‚â§15 words).
    <</SYS>>
    Text: "{text}"
    Assigned label: "{label}"
    Possible labels: ADE, Not-Related.
    Does the label correctly describe the text? [/INST]"""
    
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=60)
    raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # --- cleaning ---
    cleaned = raw_output.replace("[INST]", "").replace("[/INST]", "")
    cleaned = cleaned.replace("<<SYS>>", "").replace("<</SYS>>", "").strip()
    import re
    match = re.search(r"(Correct|Incorrect|Unclear)[\s\S]*", cleaned)
    if match:
        cleaned = match.group(0).strip()

    return cleaned


# --- Gradio UI ---
demo = gr.Interface(
    fn=classify_text,
    inputs=[
        gr.Textbox(label="Text", placeholder="Enter a medical description..."),
        gr.Radio(["ADE", "Not-Related"], label="Assigned Label")
    ],
    outputs="text",
    title="üîç ADE Label Verifier (LLaMA-3-8B)",
    description="Check whether an ADE label fits the input text."
)

demo.launch(share=True)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.


* Running on local URL:  http://127.0.0.1:7862
* Running on public URL: https://3cfbb5c0a98417b791.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [20]:
# Test directly in notebook
try:
    print(classify_text(
        "Complex biochemical syndrome of hypocalcemia and hypoparathyroidism during cytotoxic treatment of an infant with leukemia.",
        "Not-Related"
    ))
except Exception as e:
    import traceback
    traceback.print_exc()


Traceback (most recent call last):
  File "/tmp/ipython-input-2797958347.py", line 3, in <cell line: 0>
    print(classify_text(
          ^^^^^^^^^^^^^^
  File "/tmp/ipython-input-2472508322.py", line 30, in classify_text
    outputs = model.generate(
              ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/_contextlib.py", line 120, in decorate_context
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/transformers/generation/utils.py", line 2564, in generate
    result = decoding_method(
             ^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/transformers/generation/utils.py", line 2784, in _sample
    outputs = self(**model_inputs, return_dict=True)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^

In [13]:
import requests
import pandas as pd
from tqdm import tqdm

API_KEY = "bd00d902-ef40-4d7a-bd48-5d071830ce02"   
BASE_URL = "https://data.bioontology.org/ontologies/MEDDRA/classes"

def fetch_meddra_terms(max_pages=5, page_size=100):
    """Fetch MedDRA terms from BioPortal"""
    headers = {"Authorization": f"apikey token={API_KEY}"}
    terms = []

    for page in tqdm(range(1, max_pages+1)):
        url = f"{BASE_URL}?page={page}&pagesize={page_size}"
        response = requests.get(url, headers=headers)
        data = response.json()

        for entry in data.get("collection", []):
            terms.append({
                "term": entry.get("prefLabel"),
                "definition": entry.get("definition", [""])[0] if entry.get("definition") else "",
                "uri": entry.get("@id")
            })

        if "nextPage" not in data.get("links", {}):
            break

    df = pd.DataFrame(terms).dropna(subset=["term"])
    return df


In [19]:
meddra_df = fetch_meddra_terms(max_pages=10)  # e.g. ~1000 terms
print(meddra_df.head())


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [00:02<00:00,  3.36it/s]

                                   term definition  \
0                    Algoneurodystrophy              
1                    Spinal funiculitis              
2  Malignant neoplasm of fallopian tube              
3  Immature platelet fraction increased              
4        Abdominal wall biopsy abnormal              

                                                 uri  
0  http://purl.bioontology.org/ontology/MEDDRA/10...  
1  http://purl.bioontology.org/ontology/MEDDRA/10...  
2  http://purl.bioontology.org/ontology/MEDDRA/10...  
3  http://purl.bioontology.org/ontology/MEDDRA/10...  
4  http://purl.bioontology.org/ontology/MEDDRA/10...  





In [22]:
# ------------------------------
# 3Ô∏è‚É£ Load Bio_ClinicalBERT + Fix padding issue
# ------------------------------
tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")

# ‚úÖ Fix for missing pad token
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

model = AutoModel.from_pretrained("emilyalsentzer/Bio_ClinicalBERT").to(device)
model.resize_token_embeddings(len(tokenizer))
model.eval()  # disable dropout etc.

# # ------------------------------
# # 4Ô∏è‚É£ Embedding function
# # ------------------------------
def get_embeddings(texts, batch_size=16):
    embs = []
    for i in tqdm(range(0, len(texts), batch_size), desc="üîç Encoding batches"):
        batch = texts[i:i+batch_size]
        inputs = tokenizer(
            batch,
            padding=True,
            truncation=True,
            return_tensors="pt",
            max_length=128
        ).to(device)
        with torch.no_grad():
            outputs = model(**inputs)
            emb = outputs.last_hidden_state.mean(dim=1)
        embs.append(emb.cpu())
    return torch.cat(embs).numpy()

# ------------------------------
# 6Ô∏è‚É£ Generate embeddings for MedDRA terms
# ------------------------------
meddra_emb = get_embeddings(meddra_df["term"].astype(str).tolist(), batch_size=16)
np.save("meddra_embeddings_bioclinicalbert.npy", meddra_emb)
print("‚úÖ MedDRA embeddings generated and saved:", meddra_emb.shape)

# ------------------------------
# 7Ô∏è‚É£ Mapping function: find nearest MedDRA term
# ------------------------------
from sklearn.metrics.pairwise import cosine_similarity

def map_to_meddra(event_text, top_k=3):
    event_emb = get_embeddings([event_text])
    sims = cosine_similarity(event_emb, meddra_emb)[0]
    top_idx = np.argsort(sims)[::-1][:top_k]
    results = meddra_df.iloc[top_idx].copy()
    results["similarity"] = sims[top_idx]
    return results



üîç Encoding batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 63/63 [00:01<00:00, 44.75it/s]


‚úÖ MedDRA embeddings generated and saved: (1000, 768)


In [24]:
examples = [
    "rash and fever after taking amoxicillin",
    "nausea and vomiting following chemotherapy",
    "shortness of breath and dizziness"
]

for text in examples:
    print(f"\nüßæ Event text: {text}")
    print(map_to_meddra(text))


üßæ Event text: rash and fever after taking amoxicillin


üîç Encoding batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 67.26it/s]


                                             term definition  \
869  Mycobacterium leprae infection (tuberculoid)              
557           Diphtheria toxin antibody increased              
477          Mycoplasma hominis postabortal fever              

                                                   uri  similarity  
869  http://purl.bioontology.org/ontology/MEDDRA/10...    0.916977  
557  http://purl.bioontology.org/ontology/MEDDRA/10...    0.913203  
477  http://purl.bioontology.org/ontology/MEDDRA/10...    0.909093  

üßæ Event text: nausea and vomiting following chemotherapy


üîç Encoding batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 17.99it/s]


                              term definition  \
848   Excess vomiting in pregnancy              
170    Pyloric carcinoma recurrent              
793  Radiation somnolence syndrome              

                                                   uri  similarity  
848  http://purl.bioontology.org/ontology/MEDDRA/10...    0.876271  
170  http://purl.bioontology.org/ontology/MEDDRA/10...    0.874017  
793  http://purl.bioontology.org/ontology/MEDDRA/10...    0.871172  

üßæ Event text: shortness of breath and dizziness


üîç Encoding batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 39.71it/s]

                      term definition  \
897  Aches & pains in legs              
74            Diffuse pain              
968         Pain menstrual              

                                                   uri  similarity  
897  http://purl.bioontology.org/ontology/MEDDRA/10...    0.854999  
74   http://purl.bioontology.org/ontology/MEDDRA/10...    0.836349  
968  http://purl.bioontology.org/ontology/MEDDRA/10...    0.833747  



