In [None]:
!ls ../data_prep

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import rich
import seaborn as sns
from plotly.subplots import make_subplots

import hts_utils

# Dose Response Curve Fit Parameters from syn5522627 (from qhts-protocol-dump-headers.txt)

Note that our pre-processing puts everything in micromolar units.

* ZERO: asymptote of response curve at zero concentation
* INF: asymptote of response curve at max concentration
* MAXR: response at max concentration
* AC50: concentration at half max response in uM
* LAC50: log AC50
* HILL: hill slope from curve fit
* DATA0: response at first conc ( as % of DMSO control. So 100 == DMSO)
* DATA1: response at second conc
* ...
* CONC0: first conc in uM
* CONC1: second conc in uM
* ...

# Compound Scoring

Method reference https://molpharm.aspetjournals.org/content/92/4/414.long

## Effectiveness

We measure the effectiveness $E$ of a compound as the difference between the response at zero concentration (${\rm ZERO}$) and infinite concentration (${\rm INF}$).

$$
E = {\rm ZERO} - {\rm INF}
$$

The relative effectiveness of a compound exposed to two different cell lines (a reference line and a test line) is, 

$$
\Delta E = \frac{
E_{ref}}{E_{test}}
$$

## Potentcy

We measure the potency of a compound with ${\rm AC50}$ 
(the concentration at half-maximum response).
Note that a higher ${\rm AC50 }$ concentration indicates a lower potentcy and vice-versa. 
We define the relative AC50 ($\Delta {\rm AC50}$) and relative potency ($\Delta P$) of a compound exposed to two cell lines as, 

$$
\Delta {\rm AC50} 
= \left[ \log {\rm AC50 }_{ref} -  \log {\rm AC50 }_{test} \right]
= \log \frac{{\rm AC50 }_{ref}}{{\rm AC50 }_{test}} 
$$

$$
\Delta P
= - \Delta {\rm AC50}
= -\left[ \log \frac{{\rm AC50 }_{ref}}{{\rm AC50 }_{test}} \right]
= \log \frac{{\rm AC50 }_{test}}{{\rm AC50 }_{ref}}
$$

## Drug Score v0

$\Delta P$ is one way to score compounds.

### Sensitivity

When ${\rm AC50}_{test}$ < ${\rm AC50}_{ref}$,
the compound was effective in the test cell line at a lower concentration than was needed in the ref cell line.
In this case, we say the test cell line is sensitive to the compound. 

$$
{\rm AC50}_{test} < {\rm AC50}_{ref}, 
\quad \frac{{\rm AC50}_{test}}{{\rm AC50}_{ref}} < 1, 
\quad \Delta P = \log \frac{{\rm AC50}_{test}}{{\rm AC50}_{ref}} < 0
$$

### Resistance

When ${\rm AC50}_{test}$ > ${\rm AC50}_{ref}$,
the compound needed to be present at a higher concentration in the test cell line than in the ref cell line to be effective.
In this case, we say the test cell line is resistant to the compound. 

$$
{\rm AC50}_{test} > {\rm AC50}_{ref}, 
\quad \frac{{\rm AC50}_{test}}{{\rm AC50}_{ref}} > 1, 
\quad \Delta P = \log \frac{{\rm AC50}_{test}}{{\rm AC50}_{ref}} > 0
$$

## Drug Score v1

One way to create a single number score for a compound,

$$
S = \log \frac{E}{{\rm AC50}}
$$

$$
\Delta S = S_{ref} - S_{test} = 
\log \left( \frac{E}{{\rm AC50}} \right)_{ref}
- \log \left( \frac{E}{{\rm AC50}} \right)_{test}
$$

$$
\Delta S = 
\log \left[ 
\left( \frac{E_{ref}}{E_{test}} \right)
\left( \frac{{\rm AC50}_{test}}{{\rm AC50}_{ref}} \right)
\right]
= 
\log \left( \frac{E_{ref}}{E_{test}} \right) + 
\log \left( \frac{{\rm AC50}_{test}}{{\rm AC50}_{ref}} \right)
=
\log \Delta E + \Delta P
$$

# Read in Data

In [None]:
file_path = "../data_prep/syn5522627-clean.csv"
df_hts = hts_utils.hts_read(file_path, filter_curve_cols=True)
df_hts = hts_utils.hts_add_vars(df_hts)

In [None]:
df_hts.iloc[0]

In [None]:
df_hts.shape

In [None]:
df_hts["Cell line"].value_counts()

In [None]:
cell_lines = sorted(list(df_hts["Cell line"].unique()))
cell_lines

In [None]:
rich.print(hts_utils.CELL_LINE_META)

# Reproduce Figure 1

In [None]:
ref_line = "ipnNF95.11C"
tumor_line = "ipNF95.11b C/T"
print(ref_line)
print(tumor_line)

df_ratios = hts_utils.hts_compare(df_hts, ref_line, tumor_line)

In [None]:
df_check = df_ratios[
    df_ratios["name"].isin(["Doxorubicin", "Mitoxantrone", "Aclarubicin"])
][
    [
        "name",
        "ref_line",
        "ref_LAC50",
        "ref_EFF",
        "ref_log(EFF/AC50)",
        "tumor_line",
        "tumor_LAC50",
        "tumor_EFF",
        "tumor_log(EFF/AC50)",
        "log(AC50_t/r)",
        "log(EFF/AC50_r/t)",
    ]
]

In [None]:
df_check.iloc[1]

In [None]:
df_check.iloc[0]

In [None]:
df_check.iloc[2]

In [None]:
df_ratios.columns

In [None]:
df_hts

# Reproduce Drug List

In [None]:
r2_thresh = 0.8
eff_thresh = 0.0
df_ana = df_hts[(df_hts["R2"] > r2_thresh) & (df_hts["EFF"] > eff_thresh)]

ref_line = "ipnNF95.11C"
tumor_lines = ["ipNF05.5 Mixed Clones", "ipNF06.2A", "ipNF95.11b C/T", "ipNF95.6"]

print(ref_line)
print(tumor_lines)

df_ratios = pd.DataFrame()
for tumor_line in tumor_lines:
    df_ratios = pd.concat(
        [df_ratios, hts_utils.hts_compare(df_ana, ref_line, tumor_line)]
    )

In [None]:
df_ratios.head(10)

In [None]:
tlkeys = [tl.split(" ")[0] for tl in tumor_lines]

In [None]:
tlkeys

In [None]:
df_scores = (
    df_ana[df_ana["Cell line"] == ref_line][["NCGC SID", "name", "target", "MoA"]]
    .reset_index(drop=True)
    .set_index("NCGC SID")
)
for line, key in zip(tumor_lines, tlkeys):
    df_scores[f"ds_{key}"] = df_ratios[df_ratios["tumor_line"] == line][
        "log(EFF/AC50_r/t)"
    ]


df_scores["ds_mean"] = df_ratios.groupby("NCGC SID")["log(EFF/AC50_r/t)"].agg("mean")
df_scores["ds_var"] = df_ratios.groupby("NCGC SID")["log(EFF/AC50_r/t)"].agg("var")
df_scores["num_lines"] = df_ratios.groupby("NCGC SID").size()
df_scores["num_lines"] = df_scores["num_lines"].fillna(0).astype(int)
df_scores = df_scores[df_scores["num_lines"] > 0]

In [None]:
df_scores

In [None]:
df_scores[df_scores["name"].isin(["Doxorubicin", "Mitoxantrone", "Aclarubicin"])]

In [None]:
df_resist = df_scores[
    (df_scores["ds_mean"] > 0.5) & 
    (df_scores["num_lines"] >= 3)
].sort_values(["target", "ds_mean"], ascending=[True, False])

In [None]:
df_sensi = df_scores[
    (df_scores["ds_mean"] < -0.5) &
    (df_scores["num_lines"] >= 3)
].sort_values(["target", "ds_mean"], ascending=[True, True])

In [None]:
df_resist.head(10)

In [None]:
df_sensi.head(10)

In [None]:
df_resist

# Lines of Constant Score

In [None]:
npts = 50
dss = np.linspace(-3.0, 3.0, 11)
color_discrete_map = {ds: px.colors.diverging.balance[ii] for ii, ds in enumerate(dss)}
dps = np.linspace(-4.0, 4.0, npts)

In [None]:
rows = []
for ds in dss:
    for dp in dps:
        lde = ds - dp
        rows.append({"ds": ds, "dp": dp, "lde": lde})
df_plt = pd.DataFrame(rows)
df_plt["de"] = 10 ** df_plt["lde"]

In [None]:
df_plt

In [None]:
fig = px.line(
    df_plt,
    x="dp",
    y="de",
    color="ds",
    color_discrete_map=color_discrete_map,
    width=600,
    height=500,
)
fig.update_yaxes(range=[0, 5])
fig.show()

In [None]:
px.colors.diverging.balance[0]

In [None]:
cbtitle = "\u0394 s"
df_ratios[cbtitle] = df_ratios["log(EFF/AC50_r/t)"]
fig = px.scatter(
    df_ratios,
    x="log(AC50_t/r)",
    y="EFF_r/t",
    color=cbtitle,
    color_continuous_scale=px.colors.diverging.balance,
    width=800,
    height=700,
)

xtitle = r"""
$
\Large{
\Delta p = 
\log \frac{\rm AC50_t}{\rm AC50_r}
}
$
"""

ytitle = r"""
$\Large{
\Delta {\rm EFF} = 
\frac{\rm EFF_r}{\rm EFF_t}}
$
"""


fig.update_xaxes(title=xtitle)
fig.update_yaxes(title=ytitle)
fig.update_layout(
    font=dict(size=20),
    margin=dict(l=90, r=20, t=20, b=80),
)


fig.show()

In [None]:
cbtitle = "\u0394 s"
df_ratios[cbtitle] = df_ratios["log(EFF/AC50_r/t)"]
fig = px.density_contour(
    df_ratios,
    x="log(AC50_t/r)",
    y="EFF_r/t",
    marginal_x="histogram",
    marginal_y="histogram",
    #    color=cbtitle,
    #    color_continuous_scale=px.colors.sequential.Bluered,
    width=800,
    height=700,
)

xtitle = r"""
$
\Large{
\Delta p = 
\log \frac{\rm AC50_t}{\rm AC50_r}
}
$
"""

ytitle = r"""
$\Large{
\Delta {\rm EFF} = 
\frac{\rm EFF_r}{\rm EFF_t}}
$
"""


# fig.update_xaxes(title=xtitle)
# fig.update_yaxes(title=ytitle)
fig.update_layout(
    font=dict(size=20),
    margin=dict(l=90, r=20, t=20, b=80),
)


fig.show()

In [None]:
fig = px.scatter(
    df_ratios,
    x="log(AC50_t/r)",
    y="log(EFF/AC50_r/t)",
    width=700,
    height=700,
)

xtitle = r"""
$
\Large{
\Delta p = 
\log \frac{\rm AC50_t}{\rm AC50_r}
}
$
"""

ytitle = r"""
$\Large{
\Delta s = 
\log
\frac{\rm EFF_r}{\rm EFF_t}}
\frac{\rm AC50_t}{\rm AC50_r}
$
"""

fig.update_xaxes(title=xtitle)
fig.update_yaxes(title=ytitle)
fig.update_layout(font=dict(size=20), margin=dict(l=90, r=20, t=20, b=80))
fig.show()