In [None]:
!ls ../data_prep

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns 
import hts_utils

# Overview

## Dose Response Curve Fit Parameters from syn5522627 (from qhts-protocol-dump-headers.txt)

Note that our pre-processing puts everything in micromolar units.

* ZERO: asymptote of response curve at zero concentation
* INF: asymptote of response curve at max concentration
* MAXR: response at max concentration
* AC50: concentration at half max response in uM
* LAC50: log AC50
* HILL: hill slope from curve fit
* DATA0: response at first conc ( as % of DMSO control. So 100 == DMSO)
* DATA1: response at second conc
* ...
* CONC0: first conc in uM
* CONC1: second conc in uM
* ...

## Parameters added following https://molpharm.aspetjournals.org/content/92/4/414.long


We measure the effectiveness ${\rm EFF}$ of a compound as the difference between the response at zero concentration (${\rm ZERO}$) and infinite concentration (${\rm INF}$).

$$
{\rm EFF} = {\rm ZERO} - {\rm INF}
$$

The relative effectiveness of a compound exposed to two different cell lines (a reference line and a test line) is, 

$$
\Delta {\rm EFF} = \frac{
{\rm EFF}_{ref}
}{
{\rm EFF}_{test}
}
$$

We measure the potency of a compound with ${\rm AC50}$ (the concentration at half-maximum response).
Note that a higher ${\rm AC50 }$ concentration indicates a lower potentcy and vice-versa. 
The relative potency of a compound exposed to two cell lines is, 

$$
\Delta {\rm AC50} 
= \left[ \log {\rm AC50 }_{ref} -  \log {\rm AC50 }_{test} \right]
= \log \frac{{\rm AC50 }_{ref}}{{\rm AC50 }_{test}} 
$$

$$
\Delta {\rm p AC50} 
= -\left[ \log {\rm AC50 }_{ref} -  \log {\rm AC50 }_{test} \right]
= -\left[ \log \frac{{\rm AC50 }_{ref}}{{\rm AC50 }_{test}} \right]
$$

One way to create a single number score for a compound is to combine relative effectiveness and relative potency into a single number, 

$$
s = \log \frac{{\rm EFF}}{{\rm AC50}}
$$

$$
\Delta s = s_{ref} - s_{test} = 
\log \left( \frac{{\rm EFF}}{{\rm AC50}} \right)_{ref}
- \log \left( \frac{{\rm EFF}}{{\rm AC50}} \right)_{test}
$$

$$
\Delta s = 
\log \left[ 
\left( \frac{{\rm EFF}_{ref}}{{\rm EFF}_{test}} \right)
\left( \frac{{\rm AC50}_{test}}{{\rm AC50}_{ref}} \right)
\right]
$$

# Read in Data

In [None]:
file_path = "../data_prep/syn5522627-clean.csv"
df_hts = hts_utils.hts_read(file_path)
df_hts = hts_utils.hts_add_vars(df_hts)

In [None]:
df_hts.columns

In [None]:
df_hts.head()

# Plot Single Curve

In [None]:
import plotly.io as pio

print(pio.templates)

# template = "plotly"
template = "plotly_dark"
# template = "presentation"
# template = "simple_white"
# template = "ggplot2"
# template = "none"
pio.templates.default = template

In [None]:
def get_good_ref_curves(df_hts, cell_line=None, name=None):
    df = df_hts[
        (df_hts["R2"] > 0.9)
        & (np.abs(1 - df_hts["HILL"]) < 0.1)
        & (np.abs(100 - (df_hts["ZERO"] - df_hts["INF"])) < 20)
    ].copy()
    if cell_line is not None:
        df = df[df["Cell line"] == cell_line]
    if name is not None:
        df = df[df["name"] == name]
    return df

In [None]:
df_plt = df_hts[df_hts["name"] == "Orantinib"]

color = "white"
ii = 0
row = df_plt.iloc[ii]
print(ii, row[hts_utils.SHOW_COLS].to_dict())
fig = go.Figure()
fig = hts_utils.fig_add_compound(fig, row, color=color)
title = hts_utils.get_single_cellline_single_compound_title(row)
margin = dict(r=100, t=150)
fig = hts_utils.fig_update_layout(
    fig, margin, title=title, axes_color=color, global_font_size=16, title_font_size=16
)
fig.show()

# Plot Two Curves

In [None]:
cell_line = "ipnNF95.11C"
df_plt = get_good_ref_curves(df_hts, cell_line=cell_line)

fig = go.Figure()

ii = 3
row = df_plt.iloc[ii]
color = hts_utils.COLORS[0]
fig = hts_utils.fig_add_compound(
    fig, row, color=color, add_measured=False, showlegend=True, legend_name=row["name"]
)
print(ii, row[hts_utils.SHOW_COLS].to_dict())

ii = 8
row = df_plt.iloc[ii]
color = hts_utils.COLORS[2]
fig = hts_utils.fig_add_compound(
    fig, row, color=color, add_measured=False, showlegend=True, legend_name=row["name"]
)
print(ii, row[hts_utils.SHOW_COLS].to_dict())

margin = dict(r=100, t=150)
title = "Cell Line: {} ({}, {})".format(
    row["Cell line"],
    hts_utils.CELL_LINE_META[cell_line]["source"],
    hts_utils.CELL_LINE_META[cell_line]["status"],
)
fig = hts_utils.fig_update_layout(
    fig,
    margin,
    title=title,
    axes_color="white",
    ymin=-10,
    width=600,
    height=650,
    global_font_size=16,
    title_font_size=16,
)
fig.update_layout(
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.1,
        xanchor="center",
        x=0.5,
    )
)
fig.show()

# Plot All Cell Lines For Compound

In [None]:
df_plt = df_hts[df_hts["name"] == "Orantinib"]
fig = go.Figure()

for ii in range(0, df_plt.shape[0]):
    row = df_plt.iloc[ii]
    color = hts_utils.COLORS[ii]
    fig = hts_utils.fig_add_compound(
        fig,
        row,
        color=color,
        add_measured=False,
        add_params=False,
        add_annotations=False,
        showlegend=True,
        legend_name=row["Cell line"],
    )
    print(ii, row[hts_utils.SHOW_COLS].to_dict())


margin = dict(r=100, t=150)
title = "Compound: {}<br>Target: {}<br>MoA: {}".format(
    row["name"],
    row["target"],
    row["MoA"],
)
fig = hts_utils.fig_update_layout(
    fig,
    margin,
    title=title,
    axes_color="white",
    ymin=-10,
    width=None,
    height=800,
    global_font_size=16,
    title_font_size=16,
)
#
fig.show()

In [None]:
#plt_cols = ["R2", "AC50", "INF", "ZERO", "MAXR", "EFF"]
plt_cols = ["R2", "AC50", "EFF"]
sns.pairplot(df_hts[plt_cols].sample(100))

In [None]:
sns.histplot(df_hts, x="PHILL")

In [None]:
px.histogram(df_hts, "LAC50")

In [None]:
df_plt

In [None]:
df_hts["Cell line"].value_counts()

In [None]:
df_hts["R2"].isnull().sum()

In [None]:
fig = px.histogram(df_hts, "log(EFF/AC50)")
fig.show()
fig = px.histogram(df_hts, "LAC50")
fig.show()

In [None]:
px.scatter(df_hts, x="LAC50", y="EFF", color="R2", height=800)

In [None]:
df_hts[hts_utils.SHOW_COLS]

In [None]:
cell_lines = sorted(list(df_hts["Cell line"].unique()))
cell_lines

In [None]:
ref_line = cell_lines[1]
tumor_line = cell_lines[-1]
df_ratios = hts_utils.hts_compare(df_hts, ref_line, tumor_line)

In [None]:
df_ratios

In [None]:
target = "PIK3CA"
df_plt = df_ratios[df_ratios["target"] == target].sort_values("log(EFF/AC50_r/t)")

In [None]:
px.bar(
    df_plt,
    x="name",
    y="log(EFF/AC50_r/t)",
    title=f"{ref_line} over {tumor_line} (target={target})",
    height=600,
)

In [None]:
df_ratios[df_ratios["target"] == "TOP2A"]