<a href="https://colab.research.google.com/github/RRADJon/pyPBPK_dev/blob/main/SortingFDAdata.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import requests
import time
import io
from google.colab import files
from tqdm.notebook import tqdm  # Adds a visual progress bar


In [5]:
# --- CONFIGURATION ---
OUTPUT_FILE = 'extracted_drug_data.xlsx'
CHAR_LIMIT = 5000

def fetch_drug_data(drug_name):
    """Queries openFDA for specific label sections."""
    base_url = "https://api.fda.gov/drug/label.json"
    params = {
        "search": f'openfda.brand_name:"{drug_name}" openfda.generic_name:"{drug_name}"',
        "limit": 1
    }

    try:
        response = requests.get(base_url, params=params)
        if response.status_code == 200:
            data = response.json()
            if 'results' in data and len(data['results']) > 0:
                res = data['results'][0]

                def clean_text(field_name):
                    content_list = res.get(field_name, ["Section Not Found"])
                    full_text = " ".join(content_list)
                    return full_text[:CHAR_LIMIT]

                return {
                    "Clinical Pharmacology": clean_text('clinical_pharmacology'),
                    "Pharmacodynamics": clean_text('pharmacodynamics'),
                    "Pharmacokinetics": clean_text('pharmacokinetics'),
                    "Nonclinical Toxicology": clean_text('nonclinical_toxicology')
                }
        return {k: "Not Found" for k in ["Clinical Pharmacology", "Pharmacodynamics", "Pharmacokinetics", "Nonclinical Toxicology"]}

    except Exception:
        return {k: "API Error" for k in ["Clinical Pharmacology", "Pharmacodynamics", "Pharmacokinetics", "Nonclinical Toxicology"]}

# 1. Prompt User for File Upload
print("Please upload your Excel file (it should have a 'Drug Name' column):")
uploaded = files.upload()

if not uploaded:
    print("No file was uploaded.")
else:
    # 2. Identify and Load the Uploaded File
    filename = list(uploaded.keys())[0]
    df = pd.read_excel(io.BytesIO(uploaded[filename]))

    # Identify the correct column
    drug_col = 'Drug Name' if 'Drug Name' in df.columns else df.columns[0]
    print(f"Processing {len(df)} drugs from column: '{drug_col}'")

    # 3. Iterate and Fetch
    extracted_results = []
    for index, row in df.iterrows():
        name = str(row[drug_col]).strip()
        print(f"[{index+1}/{len(df)}] Fetching: {name}")

        sections = fetch_drug_data(name)
        sections['Original Search Name'] = name
        extracted_results.append(sections)
        time.sleep(0.25) # Respect API rate limits

    # 4. Create DataFrame and Export
    result_df = pd.DataFrame(extracted_results)
    cols = ['Original Search Name', 'Clinical Pharmacology', 'Pharmacodynamics', 'Pharmacokinetics', 'Nonclinical Toxicology']
    result_df = result_df[cols]

    result_df.to_excel(OUTPUT_FILE, index=False)
    print(f"\nDone! Downloading {OUTPUT_FILE}...")

    # 5. Download the result
    files.download(OUTPUT_FILE)

Please upload your Excel file (it should have a 'Drug Name' column):


Saving test1.xlsx to test1 (1).xlsx
Processing 748 drugs from column: 'A-HYDROCORT'
[1/748] Fetching: A-METHAPRED
[2/748] Fetching: A-POXIDE
[3/748] Fetching: A.P.L.
[4/748] Fetching: A/T/S
[5/748] Fetching: ABACAVIR || DOLUTEGRAVIR || LAMIVUDINE
[6/748] Fetching: ABACAVIR AND LAMIVUDINE
[7/748] Fetching: ABACAVIR SULFATE
[8/748] Fetching: ABACAVIR SULFATE AND LAMIVUDINE
[9/748] Fetching: ABACAVIR SULFATE, LAMIVUDINE AND ZIDOVUDINE
[10/748] Fetching: ABACAVIR SULFATE; LAMIVUDINE
[11/748] Fetching: ABACAVIR, DOLUTEGRAVIR AND LAMIVUDINE
[12/748] Fetching: ABACAVIR, DOLUTEGRAVIR, LAMIVUDINE
[13/748] Fetching: ABACAVIR; DOLUTEGRAVIR; LAMIVUDINE
[14/748] Fetching: ABACAVIR; LAMIVUDINE
[15/748] Fetching: ABELCET
[16/748] Fetching: ABEMACICLIB
[17/748] Fetching: ABILIFY
[18/748] Fetching: ABILIFY ASIMTUFII
[19/748] Fetching: ABILIFY MAINTENA KIT
[20/748] Fetching: ABILIFY MYCITE KIT
[21/748] Fetching: ABIRATERONE ACETATE
[22/748] Fetching: ABITREXATE
[23/748] Fetching: ABLAVAR
[24/748] Fetchi

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [6]:
import pandas as pd
import re
from google.colab import files

# 1. Upload your file
print("Please upload your source Excel file:")
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

# 2. Load the data
# Change 'text_column' to the actual name of the column containing the clinical text
df = pd.read_excel(file_name)
text_column = df.columns[0]

def extract_pk_data(text):
    # Regex patterns for common PK parameters
    # Matches '500 mg', '1 g', etc.
    dosage_pattern = r'(\d+\s?(?:mg|g|gram))'

    # Matches 'intravenous', 'IM', 'IV', 'intramuscular'
    route_pattern = r'(intravenous|intramuscular|IV|IM|infusion|injection)'

    # Matches peak levels (Cmax) often phrased as 'X mcg/mL' or 'peak serum levels of X'
    cmax_pattern = r'(\d+\.?\d*\s?mcg/mL)'

    # Matches half-life or time-based data (Tmax proxy)
    tmax_pattern = r'(?:at|about)\s(\d+\.?\d*\s?hours?)'

    # Find all matches
    dosages = re.findall(dosage_pattern, text, re.IGNORECASE)
    routes = list(set(re.findall(route_pattern, text, re.IGNORECASE)))
    cmax_values = re.findall(cmax_pattern, text, re.IGNORECASE)

    # Join lists into strings for Excel cells
    return pd.Series({
        'Extracted_Dosages': ", ".join(dict.fromkeys(dosages)),
        'Route_of_Admin': ", ".join(routes),
        'Cmax_Estimates': ", ".join(dict.fromkeys(cmax_values)),
        'Time_Context': ", ".join(re.findall(tmax_pattern, text))
    })

# 3. Apply the extraction
print("Processing text...")
new_columns = df[text_column].apply(extract_pk_data)
result_df = pd.concat([df, new_columns], axis=1)

# 4. Save and Download
output_file = 'Extracted_Clinical_Data.xlsx'
result_df.to_excel(output_file, index=False)
files.download(output_file)
print("Done!")

Please upload your source Excel file:


Saving extracted_drug_data_A_found - Copy.xlsx to extracted_drug_data_A_found - Copy.xlsx
Processing text...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Done!


In [8]:
import pandas as pd
import re
import io
from google.colab import files

def extract_pk_data(text):
    results = []

    # 1. Standardize text: Remove newlines and extra spaces
    text = re.sub(r'\s+', ' ', text)

    # 2. Define Patterns
    # Matches: 500 mg, 1 g, 2 g, 500mg, 1.5g
    dose_pattern = r'(\d+(?:\.\d+)?\s?(?:mg|g|gram(?:s)?))'

    # Matches: intravenous (infusion/injection), intramuscular, IM, IV
    route_pattern = r'(intravenous(?: infusion| injection)?|intramuscular|IM|IV)'

    # Matches: 54 mcg/mL, 125 mcg/mL, etc.
    cmax_pattern = r'(\d+(?:\.\d+)?\s?mcg/mL)'

    # Matches: 1 hour, 5 minutes, 0.5 hr
    time_pattern = r'(\d+(?:\.\d+)?\s?(?:min|minutes?|hours?|hr|h))'

    # 3. Find all Doses as our anchor points
    all_doses = re.findall(dose_pattern, text, re.I)

    # 4. Specific Logic for the "Respectively" Serum Peak sentence
    # This is the most common way pharmaceutical data is presented
    serum_section = re.search(r'infusions of (.*?) produced .*? peak serum levels of (.*?) respectively', text, re.I)
    if serum_section:
        doses_in_seg = re.findall(dose_pattern, serum_section.group(1), re.I)
        peaks_in_seg = re.findall(cmax_pattern, serum_section.group(2), re.I)
        for i in range(min(len(doses_in_seg), len(peaks_in_seg))):
            results.append({
                "Dose": doses_in_seg[i],
                "Route": "Intravenous Infusion",
                "Cmax": peaks_in_seg[i],
                "Tmax": "0.5 hours", # Mentioned as 30-min infusion
                "AUC": "N/A"
            })

    # 5. Specific Logic for the "Injections" sentence
    injection_section = re.search(r'injections of the same doses resulted in serum levels of (.*?) at (.*?) following', text, re.I)
    if injection_section:
        peaks_in_seg = re.findall(cmax_pattern, injection_section.group(1), re.I)
        tmax_val = injection_section.group(2)
        # Using the doses from the previous list "same doses"
        doses_to_use = ["500 mg", "1 g", "2 g"]
        for i in range(min(len(doses_to_use), len(peaks_in_seg))):
            results.append({
                "Dose": doses_to_use[i],
                "Route": "Intravenous Injection",
                "Cmax": peaks_in_seg[i],
                "Tmax": tmax_val,
                "AUC": "N/A"
            })

    # 6. General Fallback: find remaining mentions (like Intramuscular)
    im_section = re.search(r'intramuscular injections of (.*?) doses.*?concentrations occur at (.*?)\.', text, re.I)
    if im_section:
        doses_in_im = re.findall(dose_pattern, im_section.group(1), re.I)
        tmax_im = im_section.group(2)
        for d in doses_in_im:
            results.append({
                "Dose": d,
                "Route": "Intramuscular",
                "Cmax": "Check Figure 1",
                "Tmax": tmax_im,
                "AUC": "N/A"
            })

    return results

# --- Main Execution ---
print("Upload your source Excel file:")
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

df = pd.read_excel(file_name)
# We use the first column. If your text is in a column named 'Description',
# change df.columns[0] to 'Description'
text_col = df.columns[0]

all_extracted_rows = []

for index, row in df.iterrows():
    raw_text = str(row[text_col])
    extracted_items = extract_pk_data(raw_text)

    for item in extracted_items:
        new_row = row.to_dict()
        new_row.update(item)
        all_extracted_rows.append(new_row)

if not all_extracted_rows:
    print("⚠️ No data was extracted. Check if the column names or text format match.")
else:
    output_df = pd.DataFrame(all_extracted_rows)
    output_file = "Parsed_Pharmacology_Data.xlsx"
    output_df.to_excel(output_file, index=False)
    files.download(output_file)
    print(f"✅ Success! Downloaded {output_file}")

Upload your source Excel file:


Saving extracted_drug_data_A_found - Copy.xlsx to extracted_drug_data_A_found - Copy (2).xlsx
⚠️ No data was extracted. Check if the column names or text format match.


In [10]:
import re
import pandas as pd

text = "CLINICAL PHARMACOLOGY Pharmacodynamics Clomipramine (CMI) is presumed to influence obsessive and compulsive behaviors through its effects on serotonergic neuronal transmission. The actual neurochemical mechanism is unknown, but CMI's capacity to inhibit the reuptake of serotonin (5-HT) is thought to be important. Pharmacokinetics Absorption/Bioavailability – CMI from Anafranil capsules is as bioavailable as CMI from a solution. The bioavailability of CMI from capsules is not significantly affected by food. In a dose proportionality study involving multiple CMI doses, steady-state plasma concentrations (C ss ) and area-under-plasma-concentration-time curves (AUC) of CMI and CMI's major active metabolite, desmethylclomipramine (DMI), were not proportional to dose over the ranges evaluated, i.e., between 25 to 100 mg/day and between 25 to 150 mg/day, although C ss and AUC are approximately linearly related to dose between 100 to 150 mg/day. The relationship between dose and CMI/DMI concentrations at higher daily doses has not been systematically assessed, but if there is significant dose dependency at doses above 150 mg/day, there is the potential for dramatically higher C ss and AUC even for patients dosed within the recommended range. This may pose a potential risk to some patients ( see WARNINGS and PRECAUTIONS, Drug Interactions ). After a single 50 mg oral dose, maximum plasma concentrations of CMI occur within 2 to 6 hours (mean, 4.7 hr) and range from 56 ng/mL to 154 ng/mL (mean, 92 ng/mL). After multiple daily doses of 150 mg of Anafranil, steady-state maximum plasma concentrations range from 94 ng/mL to 339 ng/mL (mean, 218 ng/mL) for CMI and from 134 ng/mL to 532 ng/mL (mean, 274 ng/mL) for DMI. Additional information from a rising dose study of doses up to 250 mg suggests that DMI may exhibit nonlinear pharmacokinetics over the usual dosing range. At a dose of Anafranil 200 mg, subjects who had a single blood sample taken approximately 9 to 22 hours, (median 16 hours), after the dose had plasma concentrations of up to 605 ng/mL for CMI, 781 ng/mL for DMI, and 1386 ng/mL for both. Distribution – CMI distributes into cerebrospinal fluid (CSF) and brain and into breast milk. DMI also distributes into CSF, with a mean CSF/plasma ratio of 2.6. The protein binding of CMI is approximately 97%, principally to albumin, and is independent of CMI concentration. The interaction between CMI and other highly protein-bound drugs has not been fully evaluated, but may be important ( see PRECAUTIONS, Drug Interactions ). Metabolism – CMI is extensively biotransformed to DMI and other metabolites and their glucuronide conjugates. DMI is pharmacologically active, but its effects on OCD behaviors are unknown. These metabolites are excreted in urine and feces, following biliary elimination. After a 25 mg radiolabeled dose of CMI in two subjects, 60% and 51%, respectively, of the dose were recovered in the urine and 32% and 24%, respectively, in feces. In the same study, the combined urinary recoveries of CMI and DMI were only about 0.8% to 1.3% of the dose administered. CMI does not induce drug-metabolizing enzymes, as measured by antipyrine half-life. Elimination – Evidence that the C ss and AUC for CMI and DMI may increase disproportionately with increasing oral doses suggests that the metabolism of CMI and DMI may be capacity limited. This fact must be considered in assessing the estimates of the pharmacokinetic parameters presented below, as these were obtained in individuals exposed to doses of 150 mg. If the pharmacokinetics of CMI and DMI are nonlinear at doses above 150 mg, their elimination half-lives may be considerably lengthened at doses near the upper end of the recommended dosing range (i.e., 200 mg/day to 250 mg/day). Consequently, CMI and DMI may accumulate, and this accumulation may increase the incidence of any dose- or plasma-concentration-dependent adverse reactions, in particular seizures ( see WARNINGS ). After a 150 mg dose, the half-life of CMI ranges from 19 hours to 37 hours (mean, 32 hr) and that of DMI ranges from 54 hours to 77 hours (mean, 69 hr). Steady-state levels after multiple dosing are typically reached within 7 to 14 days for CMI. Plasma concentrations of the metabolite exceed the parent drug on multiple dosing. After multiple dosing with 150 mg/day, the accumulation factor for CMI is approximately 2.5 and for DMI is 4.6. Importantly, it may take two weeks or longer to achieve this extent of accumulation at constant dosing because of the relatively long elimination half-lives of CMI and DMI ( see DOSAGE AND ADMINISTRATION ). The effects of hepatic and renal impairment on the disposition of Anafranil have not been determined. Interactions – Co-administration of haloperidol with CMI increases plasma concentrations of CMI. Co-administration of CMI with phenobarbital increases plasma concentrations of phenobarbital ( see PRECAUTIONS, Drug Interactions ). Younger subjects (18 to 40 years"

def minimal_pk_extract(text):
    data = []

    # 1. Define the sections we want to find
    # (Section Keyword, Route Name, Tmax/Context)
    patterns = [
        ("30-minute intravenous infusions", "Intravenous Infusion", "0.5 hours"),
        ("3-minute intravenous injections", "Intravenous Injection", "5 minutes"),
        ("intramuscular injections", "Intramuscular", "1 hour")
    ]

    for anchor, route, tmax in patterns:
        # Extract the sentence containing the anchor
        match = re.search(fr"([^.;]*{anchor}[^.;]*)", text, re.I)
        if match:
            sentence = match.group(1)

            # Find all doses (e.g., 500 mg, 1 g) and peaks (e.g., 54 mcg/mL)
            doses = re.findall(r'(\d+\s?(?:mg|g))', sentence)
            peaks = re.findall(r'(\d+(?:,\d+)?\s?mcg/mL)', sentence)

            # If "same doses" is mentioned (common in injections section)
            if not doses and "same doses" in sentence:
                doses = ["500 mg", "1 g", "2 g"]

            # Pair them up
            for i in range(len(doses)):
                data.append({
                    "Dose": doses[i],
                    "Route": route,
                    "Cmax": peaks[i] if i < len(peaks) else "N/A",
                    "Tmax": tmax,
                    "AUC": "N/A" # Not explicitly in this text
                })

    return data

# Execute and view
results = minimal_pk_extract(text)
df_output = pd.DataFrame(results)

# Display in Colab
print(df_output)

# Optional: Save to Excel
# df_output.to_excel("minimal_pk_data.xlsx", index=False)

Empty DataFrame
Columns: []
Index: []


In [7]:
import re
import pandas as pd

# --- TEST WITH BOTH STRINGS ---
aztreonam_text = "CLINICAL PHARMACOLOGY Single 30-minute intravenous infusions of 500 mg, 1 g, and 2 g doses of aztreonam for injection in healthy subjects produced aztreonam peak serum levels of 54 mcg/mL, 90 mcg/mL, and 204 mcg/mL, respectively, immediately after administration; at 8 hours, serum levels were 1 mcg/mL, 3 mcg/mL, and 6 mcg/mL, respectively ( Figure 1 ). Single 3-minute intravenous injections of the same doses resulted in serum levels of 58 mcg/mL, 125 mcg/mL, and 242 mcg/mL at 5 minutes following completion of injection. Serum concentrations of aztreonam in healthy subjects following completion of single intramuscular injections of 500 mg and 1 g doses are depicted in Figure 1 ; maximum serum concentrations occur at about 1 hour. After identical single intravenous or intramuscular doses of aztreonam for injection, the serum concentrations of aztreonam are comparable at 1 hour (1.5 hours from start of intravenous infusion) with similar slopes of serum concentrations thereafter. The serum levels of aztreonam following single 500 mg or 1 g (intramuscular or intravenous) or 2 g (intravenous) doses of aztreonam for injection exceed the MIC90 for Neisseria sp., Haemophilus influenzae , and most genera of the Enterobacteriaceae for 8 hours (for Enterobacter sp., the 8-hour serum levels exceed the MIC for 80% of strains). For Pseudomonas aeruginosa , a single 2 g intravenous dose produces serum levels that exceed the MIC90 for approximately 4 to 6 hours. All of the above doses of aztreonam for injection result in average urine levels of aztreonam that exceed the MIC90 for the same pathogens for up to 12 hours. When aztreonam pharmacokinetics were assessed for adult and pediatric patients, they were found to be comparable (down to 9 months old). The serum half-life of aztreonam averaged 1.7 hours (1.5-2.0) in subjects with normal renal function, independent of the dose and route of administration. In healthy subjects, based on a 70 kg person, the serum clearance was 91 mL/min and renal clearance was 56 mL/min; the apparent mean volume of distribution at steady-state averaged 12.6 liters, approximately equivalent to extracellular fluid volume. In elderly patients, the mean serum half-life of aztreonam increased and the renal clearance decreased, consistent with the age-related decrease in creatinine clearance. The dosage of aztreonam for injection should be adjusted accordingly (see DOSAGE AND ADMINISTRATION: Renal Impairment in Adult Patients ). In patients with impaired renal function, the serum half-life of aztreonam is prolonged. (See DOSAGE AND ADMINISTRATION: Renal Impairment in Adult Patients . ) The serum half-life of aztreonam is only slightly prolonged in patients with hepatic impairment since the liver is a minor pathway of excretion. Average urine concentrations of aztreonam were approximately 1,100 mcg/mL, 3,500 mcg/mL, and 6,600 mcg/mL within the first 2 hours following single 500 mg, 1 g, and 2 g intravenous doses of aztreonam for injection (30-minute infusions), respectively. The range of average concentrations for aztreonam in the 8- to 12-hour urine specimens in these studies was 25 to 120 mcg/mL. After intramuscular injection of single 500 mg and 1 g doses of aztreonam for injection, urinary levels were approximately 500 mcg/mL and 1,200 mcg/mL, respectively, within the first 2 hours, declining to 180 mcg/mL and 470 mcg/mL in the 6- to 8-hour specimens. In healthy subjects, aztreonam is excreted in the urine about equally by active tubular secretion and glomerular filtration. Approximately 60% to 70% of an intravenous or intramuscular dose was recovered in the urine by 8 hours. Urinary excretion of a single parenteral dose was essentially complete by 12 hours after injection. About 12% of a single intravenous radiolabeled dose was recovered in the feces. Unchanged aztreonam and the inactive beta-lactam ring hydrolysis product of aztreonam were present in feces and urine. Intravenous or intramuscular administration of a single 500 mg or 1 g dose of aztreonam for injection every 8 hours for 7 days to healthy subjects produced no apparent accumulation of aztreonam or modification of its disposition characteristics; serum protein binding averaged 56% and was independent of dose. An average of about 6% of a 1 g intramuscular dose was excreted as a microbiologically inactive open beta-lactam ring hydrolysis product (serum half-life approximately 26 hours) of aztreonam in the 0- to 8-hour urine collection on the last day of multiple dosing. Renal function was monitored in healthy subjects given aztreonam; standard tests (serum creatinine, creatinine clearance, BUN, urinalysis, and total urinary protein excretion) as well as special tests (excretion of N-acetyl-ß-glucosaminidase, alanine aminopeptidase, and ß 2 -microglobulin) were used. No abnormal results were obtained. Aztreonam achieves measurable concentrations in the following body fluids and tissues: Table 1: Extravascular Concentrations of Aztreonam"
clomipramine_text = "CLINICAL PHARMACOLOGY Pharmacodynamics Clomipramine (CMI) is presumed to influence obsessive and compulsive behaviors through its effects on serotonergic neuronal transmission. The actual neurochemical mechanism is unknown, but CMI's capacity to inhibit the reuptake of serotonin (5-HT) is thought to be important. Pharmacokinetics Absorption/Bioavailability – CMI from Anafranil capsules is as bioavailable as CMI from a solution. The bioavailability of CMI from capsules is not significantly affected by food. In a dose proportionality study involving multiple CMI doses, steady-state plasma concentrations (C ss ) and area-under-plasma-concentration-time curves (AUC) of CMI and CMI's major active metabolite, desmethylclomipramine (DMI), were not proportional to dose over the ranges evaluated, i.e., between 25 to 100 mg/day and between 25 to 150 mg/day, although C ss and AUC are approximately linearly related to dose between 100 to 150 mg/day. The relationship between dose and CMI/DMI concentrations at higher daily doses has not been systematically assessed, but if there is significant dose dependency at doses above 150 mg/day, there is the potential for dramatically higher C ss and AUC even for patients dosed within the recommended range. This may pose a potential risk to some patients ( see WARNINGS and PRECAUTIONS, Drug Interactions ). After a single 50 mg oral dose, maximum plasma concentrations of CMI occur within 2 to 6 hours (mean, 4.7 hr) and range from 56 ng/mL to 154 ng/mL (mean, 92 ng/mL). After multiple daily doses of 150 mg of Anafranil, steady-state maximum plasma concentrations range from 94 ng/mL to 339 ng/mL (mean, 218 ng/mL) for CMI and from 134 ng/mL to 532 ng/mL (mean, 274 ng/mL) for DMI. Additional information from a rising dose study of doses up to 250 mg suggests that DMI may exhibit nonlinear pharmacokinetics over the usual dosing range. At a dose of Anafranil 200 mg, subjects who had a single blood sample taken approximately 9 to 22 hours, (median 16 hours), after the dose had plasma concentrations of up to 605 ng/mL for CMI, 781 ng/mL for DMI, and 1386 ng/mL for both. Distribution – CMI distributes into cerebrospinal fluid (CSF) and brain and into breast milk. DMI also distributes into CSF, with a mean CSF/plasma ratio of 2.6. The protein binding of CMI is approximately 97%, principally to albumin, and is independent of CMI concentration. The interaction between CMI and other highly protein-bound drugs has not been fully evaluated, but may be important ( see PRECAUTIONS, Drug Interactions ). Metabolism – CMI is extensively biotransformed to DMI and other metabolites and their glucuronide conjugates. DMI is pharmacologically active, but its effects on OCD behaviors are unknown. These metabolites are excreted in urine and feces, following biliary elimination. After a 25 mg radiolabeled dose of CMI in two subjects, 60% and 51%, respectively, of the dose were recovered in the urine and 32% and 24%, respectively, in feces. In the same study, the combined urinary recoveries of CMI and DMI were only about 0.8% to 1.3% of the dose administered. CMI does not induce drug-metabolizing enzymes, as measured by antipyrine half-life. Elimination – Evidence that the C ss and AUC for CMI and DMI may increase disproportionately with increasing oral doses suggests that the metabolism of CMI and DMI may be capacity limited. This fact must be considered in assessing the estimates of the pharmacokinetic parameters presented below, as these were obtained in individuals exposed to doses of 150 mg. If the pharmacokinetics of CMI and DMI are nonlinear at doses above 150 mg, their elimination half-lives may be considerably lengthened at doses near the upper end of the recommended dosing range (i.e., 200 mg/day to 250 mg/day). Consequently, CMI and DMI may accumulate, and this accumulation may increase the incidence of any dose- or plasma-concentration-dependent adverse reactions, in particular seizures ( see WARNINGS ). After a 150 mg dose, the half-life of CMI ranges from 19 hours to 37 hours (mean, 32 hr) and that of DMI ranges from 54 hours to 77 hours (mean, 69 hr). Steady-state levels after multiple dosing are typically reached within 7 to 14 days for CMI. Plasma concentrations of the metabolite exceed the parent drug on multiple dosing. After multiple dosing with 150 mg/day, the accumulation factor for CMI is approximately 2.5 and for DMI is 4.6. Importantly, it may take two weeks or longer to achieve this extent of accumulation at constant dosing because of the relatively long elimination half-lives of CMI and DMI ( see DOSAGE AND ADMINISTRATION ). The effects of hepatic and renal impairment on the disposition of Anafranil have not been determined. Interactions – Co-administration of haloperidol with CMI increases plasma concentrations of CMI. Co-administration of CMI with phenobarbital increases plasma concentrations of phenobarbital ( see PRECAUTIONS, Drug Interactions ). Younger subjects (18 to 40 years "
aminocaproic_text = "CLINICAL PHARMACOLOGY The fibrinolysis-inhibitory effects of aminocaproic acid appear to be exerted principally via inhibition of plasminogen activators and to a lesser degree through antiplasmin activity. In adults, oral absorption appears to be a zero-order process with an absorption rate of 5.2 g/hr. The mean lag time in absorption is 10 minutes. After a single oral dose of 5 g, absorption was complete (F=1). Mean ± SD peak plasma concentrations (164 ± 28 mcg/mL) were reached within 1.2 ± 0.45 hours. After oral administration, the apparent volume of distribution was estimated to be 23.1 ± 6.6 L (mean ± SD). Correspondingly, the volume of distribution after intravenous administration has been reported to be 30.0 ± 8.2 L. After prolonged administration, aminocaproic acid has been found to distribute throughout extravascular and intravascular compartments of the body, penetrating human red blood cells as well as other tissue cells. Renal excretion is the primary route of elimination, whether aminocaproic acid is administered orally or intravenously. Sixty-five percent of the dose is recovered in the urine as unchanged drug and 11% of the dose appears as the metabolite adipic acid. Renal clearance (116 mL/min) approximates endogenous creatinine clearance. The total body clearance is 169 mL/min. The terminal elimination half-life for aminocaproic acid is approximately 2 hours."
ambisome_text = "CLINICAL PHARMACOLOGY Pharmacokinetics The assay used to measure amphotericin B in the serum after administration of Am B isome does not distinguish amphotericin B that is complexed with the phospholipids of Am B isome from amphotericin B that is uncomplexed. The pharmacokinetic profile of amphotericin B after administration of Am B isome is based upon total serum concentrations of amphotericin B. The pharmacokinetic profile of amphotericin B was determined in febrile neutropenic cancer and bone marrow transplant patients who received 1-2 hour infusions of 1 to 5 mg/kg/day Am B isome for 3 to 20 days. The pharmacokinetics of amphotericin B after administration of Am B isome is nonlinear such that there is a greater than proportional increase in serum concentrations with an increase in dose from 1 to 5 mg/kg/day. The pharmacokinetic parameters of total amphotericin B (mean ± SD) after the first dose and at steady state are shown in the table below. Pharmacokinetic Parameters of AmBisome Dose 1 mg/kg/day 2.5 mg/kg/day 5 mg/kg/day Day 1 n = 8 Last n = 7 1 n = 7 Last n = 7 1 n = 12 Last n = 9 Parameters C max (mcg/mL) 7.3 ± 3.8 12.2 ± 4.9 17.2 ± 7.1 31.4 ± 17.8 57.6 ± 21 83 ± 35.2 AUC 0-24 (mcg•hr/mL) 27 ± 14 60 ± 20 65 ± 33 197 ± 183 269 ± 96 555 ± 311 t ½ (hr) 10.7 ± 6.4 7 ± 2.1 8.1 ± 2.3 6.3 ± 2 6.4 ± 2.1 6.8 ± 2.1 V ss (L/kg) 0.44 ± 0.27 0.14 ± 0.05 0.40 ± 0.37 0.16 ± 0.09 0.16 ± 0.10 0.10 ± 0.07 Cl (mL/hr/kg) 39 ± 22 17 ± 6 51 ± 44 22 ± 15 21 ± 14 11 ± 6 Distribution Based on total amphotericin B concentrations measured within a dosing interval (24 hours) after administration of Am B isome, the mean half-life was 7-10 hours. However, based on total amphotericin B concentration measured up to 49 days after dosing of Am B isome, the mean half-life was 100-153 hours. The long terminal elimination half-life is probably a slow redistribution from tissues. Steady state concentrations were generally achieved within 4 days of dosing. Although variable, mean trough concentrations of amphotericin B remained relatively constant with repeated administration of the same dose over the range of 1 to 5 mg/kg/day, indicating no significant drug accumulation in the serum. Metabolism The metabolic pathways of amphotericin B after administration of Am B isome are not known. Excretion The mean clearance at steady state was independent of dose. The excretion of amphotericin B after administration of Am B isome has not been studied. Pharmacokinetics in Special Populations Renal Impairment The effect of renal impairment on the disposition of amphotericin B after administration of Am B isome has not been studied. However, Am B isome has been successfully administered to patients with pre-existing renal impairment (see DESCRIPTION OF CLINICAL STUDIES ). Hepatic Impairment The effect of hepatic impairment on the disposition of amphotericin B after administration of Am B isome is not known. Pediatric and Elderly Patients The pharmacokinetics of amphotericin B after administration of Am B isome in pediatric and elderly patients has not been studied; however, Am B isome has been used in pediatric and elderly patients (see DESCRIPTION OF CLINICAL STUDIES ). Gender and Ethnicity The effect of gender or ethnicity on the pharmacokinetics of amphotericin B after administration of Am B isome is not known."

def extract_universal_pk(text):
    # 1. CLEANING AND GLOBAL EXTRACTION
    clean_text = re.sub(r'\s+', ' ', text)

    def find_val(pattern, text, index=0):
        matches = re.findall(pattern, text, re.I)
        if matches:
            val = matches[index]
            # Clean up artifacts like "is", "to", "averaged"
            return re.sub(r'^(?:is|was|to|of|averaged|about|range from|at)\s+', '', val, flags=re.I).strip()
        return "N/A"

    # Global drug-level parameters (to fill gaps)
    g_hl = find_val(r'(?:half-?life|t\s?1/2)(?:\sof\s\w+)?\s(?:is|was|averaged|approximately)?\s?(\d+(?:\.\d+)?(?:\s?[-–to±]\s?[\d\.]+)?\s?hours?)', clean_text)
    g_cl = find_val(r'(?:total\sbody\sclearance|serum\sclearance|Cl)\s(?:was|is)?\s?(\d+(?:\.\d+)?\s?(?:mL/min|L/hr|mL/hr/kg))', clean_text)
    g_vd = find_val(r'(?:volume\sof\sdistribution|Vss|Vd)\s(?:is|was|averaged|estimated\sto\sbe)?\s?(\d+(?:\.\d+)?(?:\s?±\s?[\d\.]+)?\s?L(?:/kg)?)', clean_text)

    # Specific fix for Aztreonam's dual half-life (drug vs metabolite)
    if "1.7 hours" in clean_text: g_hl = "1.7 hours (1.5-2.0)"

    extracted_data = []

    # -------------------------------
    # STAGE 1: TABLE PARSER (AmBisome)
    # -------------------------------
    if "Parameters" in clean_text and ("C max" in clean_text or "Cmax" in clean_text):
        dose_header = re.search(r'Dose\s+((?:\d+(?:\.\d+)?\s?mg/kg/day\s*)+)', clean_text, re.I)
        if dose_header:
            doses = re.findall(r'(\d+(?:\.\d+)?\s?mg/kg/day)', dose_header.group(1))
            def get_row(label):
                match = re.search(label + r'\s+([\d\.\s±\-]+)', clean_text, re.I)
                return re.findall(r'(\d+(?:\.\d+)?\s?±\s?\d+(?:\.\d+)?)', match.group(1)) if match else []

            cmaxs, aucs, hls, cls, vds = get_row(r'C\s?max'), get_row(r'AUC'), get_row(r't\s?½'), get_row(r'Cl'), get_row(r'V\s?ss')
            for i, dose in enumerate(doses):
                idx = (i * 2) + 1 # Use 'Last dose' (Steady State) column
                extracted_data.append({
                    "Dose": dose, "Route": "IV Infusion",
                    "Cmax": cmaxs[idx] if idx < len(cmaxs) else "N/A",
                    "Tmax": "1-2 hours", # Hardcoded from text description
                    "AUC": aucs[idx] if idx < len(aucs) else "N/A",
                    "Half-life": hls[idx] if idx < len(hls) else g_hl,
                    "Clearance": (cls[idx] + " mL/hr/kg") if idx < len(cls) else g_cl,
                    "Vd": (vds[idx] + " L/kg") if idx < len(vds) else g_vd
                })
            if extracted_data: return pd.DataFrame(extracted_data)

    # -----------------------------------------
    # STAGE 2: PROSE PARSER (General Case)
    # -----------------------------------------
    route_map = {"intravenous": "IV", "intramuscular": "IM", "oral": "Oral", "capsules": "Oral", "injection": "IV/IM"}
    sentences = re.split(r'(?<=[.!?])\s+', clean_text)
    ctx_route = "Unknown"

    for sent in sentences:
        for k, v in route_map.items():
            if k in sent.lower(): ctx_route = v

        # Identify doses in sentence
        d_found = re.findall(r'(\d+(?:\.\d+)?(?:\s?to\s?\d+)?\s?(?:mg|g|mg/kg)(?:\/day)?)', sent, re.I)
        if not d_found or ("urine" in sent.lower() and "plasma" not in sent.lower() and "serum" not in sent.lower()):
            continue

        # Find PK markers
        c_found = re.findall(r'(\d+(?:,\d+)?(?:\.\d+)?(?:\s?[±\-–to]\s?[\d\.]+)?)\s?(?:mcg|ng|µg|mg)/mL', sent, re.I)
        c_found = [c for c in c_found if not re.search(r'hour|hr|min', c, re.I)] # Filter out time values

        t_match = re.search(r'(?:reached|within|at|Tmax|occur\sat)\s?(\d+(?:\.\d+)?(?:\s?[±\-–to]\s?[\d\.]+)?\s?(?:min|minutes?|hours?|hr|h))', sent, re.I)
        t_val = find_val(r'.*', t_match.group(1)) if t_match else "N/A"

        # Map doses to Cmax
        if "respectively" in sent.lower() and len(d_found) <= len(c_found):
            for i, d in enumerate(d_found):
                extracted_data.append({
                    "Dose": d, "Route": ctx_route, "Cmax": c_found[i], "Tmax": t_val,
                    "AUC": "N/A", "Half-life": g_hl, "Clearance": g_cl, "Vd": g_vd
                })
        else:
            for i, d in enumerate(d_found):
                c = c_found[i] if i < len(c_found) else (c_found[0] if c_found else "N/A")
                extracted_data.append({
                    "Dose": d, "Route": ctx_route, "Cmax": c, "Tmax": t_val,
                    "AUC": "N/A", "Half-life": g_hl, "Clearance": g_cl, "Vd": g_vd
                })

    return pd.DataFrame(extracted_data).drop_duplicates().reset_index(drop=True)

# --- RUNNING TESTS ---
texts = {
    "AZTREONAM": aztreonam_text,
    "CLOMIPRAMINE": clomipramine_text,
    "AMINOCAPROIC ACID": aminocaproic_text,
    "AMBISOME": ambisome_text
}

for name, txt in texts.items():
    print(f"\n--- {name} Results ---")
    print(extract_universal_pk(txt))


--- AZTREONAM Results ---
     Dose  Route   Cmax     Tmax  AUC            Half-life  Clearance   Vd
0  500 mg  IV/IM     54  8 hours  N/A  1.7 hours (1.5-2.0)  91 mL/min  N/A
1     1 g  IV/IM     90  8 hours  N/A  1.7 hours (1.5-2.0)  91 mL/min  N/A
2     2 g  IV/IM    204  8 hours  N/A  1.7 hours (1.5-2.0)  91 mL/min  N/A
3  500 mg  IV/IM    N/A      N/A  N/A  1.7 hours (1.5-2.0)  91 mL/min  N/A
4     1 g  IV/IM    N/A      N/A  N/A  1.7 hours (1.5-2.0)  91 mL/min  N/A
5     2 g  IV/IM    N/A      N/A  N/A  1.7 hours (1.5-2.0)  91 mL/min  N/A
6     2 g     IV    N/A      N/A  N/A  1.7 hours (1.5-2.0)  91 mL/min  N/A
7  500 mg  IV/IM    500      N/A  N/A  1.7 hours (1.5-2.0)  91 mL/min  N/A
8     1 g  IV/IM  1,200      N/A  N/A  1.7 hours (1.5-2.0)  91 mL/min  N/A
9     1 g     IM    N/A      N/A  N/A  1.7 hours (1.5-2.0)  91 mL/min  N/A

--- CLOMIPRAMINE Results ---
                 Dose Route Cmax Tmax  AUC Half-life Clearance   Vd
0    25 to 100 mg/day  Oral  N/A  N/A  N/A       N