In [2]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Neat ChEMBL Bioactivity Report (Jupyter Enhanced with Aggregation)

Help:
- Pulls all human bioactivities (IC50, Ki, KA, etc.) for a given compound.
- Looks up each target’s preferred name.
- Builds a DataFrame:
  * Deduplicates by Target + Activity type using the median value.
  * Shows count of how many measurements were aggregated.
- Outputs:
  * Styled HTML table with widgets (JupyterLab).
  * Markdown-style table in CLI.

Usage (CLI):
    python chembl_bioactivity_fixed.py <compound>

Requirements:
    pip install chembl-webresource-client pandas tabulate ipywidgets
"""
import sys
import pandas as pd
from chembl_webresource_client.new_client import new_client
from tabulate import tabulate


def get_chembl_id(compound: str) -> str:
    """Lookup compound by preferred name in ChEMBL."""
    mol_client = new_client.molecule
    res = mol_client.filter(pref_name__iexact=compound)
    if not res:
        raise ValueError(f"No ChEMBL entry for '{compound}'")
    return res[0]['molecule_chembl_id']


def fetch_activities(chembl_id: str) -> list[dict]:
    """Fetch all Homo sapiens bioactivities for the given ChEMBL ID."""
    act_client = new_client.activity
    acts = act_client.filter(
        molecule_chembl_id=chembl_id,
        target_organism__iexact='Homo sapiens'
    ).only([
        'target_chembl_id',
        'standard_type',
        'standard_value',
        'standard_units'
    ])
    return list(acts)


def fetch_target_names(target_ids: set[str]) -> dict[str, str]:
    """Map each ChEMBL target ID to its preferred name."""
    tgt_client = new_client.target
    names = {}
    for tid in target_ids:
        rec = tgt_client.filter(target_chembl_id=tid).only(['pref_name'])
        names[tid] = rec[0]['pref_name'] if rec else tid
    return names


def build_activity_df(acts: list[dict]) -> pd.DataFrame:
    """Build a tidy DataFrame, aggregate duplicates, compute Kd (nM) for KA."""
    rows = []
    for a in acts:
        tid = a.get('target_chembl_id') or 'Unknown'
        typ = a.get('standard_type') or ''
        val = a.get('standard_value') or ''
        unit = a.get('standard_units') or ''
        kd_nm = None
        if typ.upper() == 'KA' and val and unit.strip() in ['M^-1', 'M-1', '1/M']:
            try:
                kd_m = 1.0 / float(val)
                kd_nm = round(kd_m * 1e9, 2)
            except Exception:
                kd_nm = None
        try:
            val_float = float(val) if val not in [None, ''] else None
        except ValueError:
            val_float = None
        rows.append({
            'Target (ChEMBL)': tid,
            'Activity': typ,
            'Value': val_float,
            'Units': unit,
            'Kd (nM)': kd_nm
        })

    df = pd.DataFrame(rows)

    # Replace ChEMBL IDs with human-readable names
    unique_tids = set(df['Target (ChEMBL)'])
    name_map = fetch_target_names(unique_tids)
    df['Target (ChEMBL)'] = df['Target (ChEMBL)'].map(name_map)

    # Drop rows without usable values
    df = df.dropna(subset=['Value'], how='all')

    # Aggregate duplicates: group by Target + Activity + Units
    agg_df = (
        df.groupby(['Target (ChEMBL)', 'Activity', 'Units'], dropna=False)
        .agg({
            'Value': 'median',
            'Kd (nM)': 'median',
        })
        .reset_index()
    )

    # Add count of measurements per group
    counts = (
        df.groupby(['Target (ChEMBL)', 'Activity', 'Units']).size().reset_index(name='n')
    )
    agg_df = agg_df.merge(counts, on=['Target (ChEMBL)', 'Activity', 'Units'])

    agg_df = agg_df[['Target (ChEMBL)', 'Activity', 'Value', 'Units', 'Kd (nM)', 'n']]
    agg_df.rename(columns={'n': 'Count'}, inplace=True)

    return agg_df.reset_index(drop=True)


def display_df(df: pd.DataFrame):
    """Display interactive sortable table in Jupyter, or Markdown in CLI."""
    try:
        from IPython.display import display
        from itables import show
        # Interactive, sortable, searchable DataFrame
        show(df, classes="display compact cell-border", maxBytes=0)
    except ImportError:
        # Fallback to Markdown table in CLI
        from tabulate import tabulate
        print(tabulate(df, headers='keys', tablefmt='github', showindex=False))


def interactive_mode():
    """Interactive widget for JupyterLab."""
    try:
        import ipywidgets as widgets
        from IPython.display import display, clear_output

        text = widgets.Text(
            value='scopolamine',
            description='Compound:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='400px')
        )
        button = widgets.Button(description="Search")
        output = widgets.Output()

        def on_click(b):
            with output:
                clear_output()
                try:
                    compound = text.value
                    print(f"\n🔍 Looking up '{compound}' in ChEMBL…")
                    chembl_id = get_chembl_id(compound)
                    print(f"   → Found ChEMBL ID: {chembl_id}\n")

                    print("📋 Fetching human bioactivities…")
                    acts = fetch_activities(chembl_id)
                    df = build_activity_df(acts)
                    print(f"\n🏷  Retrieved {len(df)} aggregated records:")
                    display_df(df)
                except Exception as e:
                    print(f"❌ Error: {e}")

        button.on_click(on_click)
        display(widgets.VBox([text, button, output]))
    except ImportError:
        print("ipywidgets is not installed. Run: pip install ipywidgets")


def main():
    if 'ipykernel' in sys.modules:
        interactive_mode()
    else:
        compound = sys.argv[1] if len(sys.argv) > 1 else 'scopolamine'
        print(f"\n🔍 Looking up '{compound}' in ChEMBL…")
        chembl_id = get_chembl_id(compound)
        print(f"   → Found ChEMBL ID: {chembl_id}\n")

        print("📋 Fetching human bioactivities…")
        acts = fetch_activities(chembl_id)
        df = build_activity_df(acts)
        print(f"\n🏷  Retrieved {len(df)} aggregated records:\n")
        display_df(df)


if __name__ == '__main__':  # pragma: no cover
    main()


VBox(children=(Text(value='scopolamine', description='Compound:', layout=Layout(width='400px'), style=TextStyl…