In [None]:
%load_ext autoreload
%matplotlib inline

In [None]:
import os
import sys

import numpy as np
import pandas as pd
import plotly as pl

In [None]:
sys.path.insert(0, "..")

np.random.random(20121020)

pl.offline.init_notebook_mode(connected=True)

# Make gene x cellline

In [None]:
# Load CCLE mutation-amplification-deletion x cellline
mutation_amplification_deletion_x_cellline = gct.read_gct(
    "../data/ccle/mutation__gene_x_ccle_cellline.gct"
)

print(
    "mutation_amplification_deletion_x_cellline.shape: {}".format(
        mutation_amplification_deletion_x_cellline.shape
    )
)

# Keep only mutations (*_MUT)
mutation_x_cellline = mutation_amplification_deletion_x_cellline.loc[
    mutation_amplification_deletion_x_cellline.index.str.endswith("MUT")
]

print("mutation_x_cellline.shape: {}".format(mutation_x_cellline.shape))

# Remove '_MUT' suffix
mutation_x_cellline.index = [l[0] for l in mutation_x_cellline.index.str.split(sep="_")]

# Select genes

In [None]:
# Read Foundation One genes
foundation_one_genes = (
    gmt.read_gmts("../data/gene_sets/cancer_gene_sets/cancer_gene_sets.gmt")
    .loc["FoundationOne"]
    .dropna()
    .tolist()
)

print("N Foundation One genes: {}".format(len(foundation_one_genes)))

# Keep only mutations in the Foundation One genes
selected_mutation_x_cellline = mutation_x_cellline.loc[
    [i in foundation_one_genes for i in mutation_x_cellline.index]
]

print("selected_mutation_x_cellline: {}".format(selected_mutation_x_cellline.shape))

# Make compound-sensitivity x cellline

In [None]:
# Read CCLE compound-sensitivity x cellline
compound_sensitivity_x_cellline = gct.read_gct(
    "../data/ccle/ctd2__compound_x_ccle_cellline.gct"
)

# Flip value signs because lower the viability score, better the response
compound_sensitivity_x_cellline *= -1

compound_sensitivity_x_cellline = pd.DataFrame(
    a2d.normalize(compound_sensitivity_x_cellline.values, "0-1"),
    index=compound_sensitivity_x_cellline.index,
    columns=compound_sensitivity_x_cellline.columns,
)

plot.plot_heatmap(
    compound_sensitivity_x_cellline,
    title="compound_sensitivity_x_cellline {}".format(
        compound_sensitivity_x_cellline.shape
    ),
    xlabel="Cellline",
    ylabel="Compound",
)

# Get cell lines common in selected-mutation x cellline and compound-sensitivity x cellline
common_celllines = (
    selected_mutation_x_cellline.columns & compound_sensitivity_x_cellline.columns
).sort_values()

print("common_celllines.size: {}".format(common_celllines.size))

# Match selected mutations to compound sensitivity

In [None]:
compound = "PLX-4720"
print("Matching mutations to {} ...".format(compound))

compound_sensitivity = compound_sensitivity_x_cellline.loc[
    compound, common_celllines
].dropna()
print("\tcompound_sensitivity.size: {}".format(compound_sensitivity.size))

print()
match_result = make_match_panel(
    compound_sensitivity,
    selected_mutation_x_cellline.loc[:, compound_sensitivity.index],
    n_jobs=4,
    n_features=10,
    n_samplings=0,
    n_permutations=0,
    features_type="binary",
)

# Infer compound response from mutations

## Set up variables

In [None]:
mutation_1 = "BRAF"
mutation_2 = "KRAS"

print("Mutaiton 1: {}".format(mutation_1))
print("Mutaiton 2: {}".format(mutation_2))

m1 = np.array(selected_mutation_x_cellline.loc[mutation_1, compound_sensitivity.index])
m2 = np.array(selected_mutation_x_cellline.loc[mutation_2, compound_sensitivity.index])
r = np.array(compound_sensitivity)
rb = (0.6 < r).astype(int)

plot.plot_distribution(m1, title="{} Distribution".format(mutation_1))
plot.plot_distribution(m2, title="{} Distribution".format(mutation_2))
plot.plot_distribution(r, title="{} Distribution".format(compound))
plot.plot_distribution(rb, title="{} (binarized) Distribution".format(compound))

grid_size = 8

## Compute P(R | Mi)

In [None]:
p_rb__m1, p_rb1__m1 = infer(
    [m1, rb], grid_size=grid_size, target=1, variable_names=[mutation_1, compound]
)

p_rb__m2, p_rb1__m2 = infer(
    [m2, rb], grid_size=grid_size, target=1, variable_names=[mutation_2, compound]
)

## Compute P(R | M1, M2)

In [None]:
p_rb__m1m2, p_rb1__m1m2 = infer_assuming_independence(
    [m1, m2, rb],
    grid_size=grid_size,
    target=1,
    variable_names=[mutation_1, mutation_2, compound],
)

p_rb__m1m2, p_rb1__m1m2 = infer(
    [m1, m2, rb],
    grid_size=grid_size,
    target=1,
    variable_names=[mutation_1, mutation_2, compound],
)

## Make Bayesian nomograms

In [None]:
p_rb1 = rb.sum() / rb.size

print(p_rb1)

In [None]:
plot_bayesian_nomogram(
    [mutation_1, mutation_2],
    [m1, m2],
    [p_rb__m1, p_rb__m2],
    p_rb1,
    1 - p_rb1,
    n_lors_marks=2,
    file_path="../results/nomogram.png",
)

plot_bayesian_nomogram(
    [mutation_1, mutation_2],
    [m1, m2],
    [p_rb__m1, p_rb__m2],
    p_rb1,
    1 - p_rb1,
    sample=[1, 1],
    n_lors_marks=2,
    file_path="../results/nomogram_with_sample.png",
)