In [1]:
import numpy as np
import plotly.graph_objs as go
from ipywidgets import widgets
from static_allelic_freq import get_static_plot
from filenames_helper.collect_filenames import collect_filenames




In [2]:
# define column_names
column_names = dict(sample_barcode='Tumor_Sample_Barcode',
                    hugo='Hugo_Symbol',
                    tumor_freq='tumor_f',
                    gnomad_freq='gnomADg_AF',
                    variant_class='Variant_Classification')
purity_column = 'wxs_purity'

# define parameters
jitter = 0.3
line_length = 0.8
filter_disc_num = 20

# define workspace name
workspace = 'broad-firecloud-ibmwatson/Getz_Ebert_IBM_13-583_Exomes_Analysis'

In [3]:
## Get list of maf filenames

In [8]:
### From Terra
maf_list = collect_filenames(workspace=workspace, set_type='pair',
                             set_name='group_102',
                             column_name='mutation_validator_validated_maf')

In [9]:
### Or From File
# Pull filenames from a text file
# Can be useful to specify a mix of local, downloaded mafs and urls hosted in a bucket
# Give each filename on its own line, or separate them by commas or tabs

# maf_list = collect_filenames(filepath='/Users/me/my_documents/my_directory/my_file.txt')

In [10]:
# get basic static plot
fig, samples, gnomad_min, gnomad_max = get_static_plot(maf_list, workspace, column_names, purity_column,
                                                       jitter=jitter, line_length=line_length)


Columns (14,32,35,36,37,40,41,45,46,48,49,51,53,55,58,62,63,64,66,67,68,69,70,71,72,75,79,82,85,87,88,91,92,93,96,98,99,100,102,103,104,105,106,113,114,115,116,118,120,122,124,125,130,133,134,138,140,141,144,147,149,150,151,153,159,160,161,164,165,168,171,173,175,176,178,182,184,186,192,194,198,199,201,202,205,206,209,210,211,212,214,215,218,224,226,227,229,230,232,233,234,235,239,245,248,249,250,252,256,259,260,266,267,268,270,272,276,278,282,285,288,289,290,293,297,300,302,303,307,309,310,314,317,321,322,326,329,330,334,336,337,340,341,342,343,345,349,350,351,355,357,358,359,360,361,362,366,369,371,373,374,399,403,411,412,413,415,429,441,451,455,456,458,460,461,463,464,468,470,471,472,473,474,475,476,480,481,482,484,487,489,490,492,493,494,496,498,499,500,501,502,504,506,509,542,543) have mixed types.Specify dtype option on import or set low_memory=False.



# Add Widgets

In [11]:
gnomAD_slider = widgets.FloatSlider(
    value=0,
    min=0,
    max=max(samples[column_names.get('gnomad_freq')]),
    step=max(samples[column_names.get('gnomad_freq')]) / filter_disc_num,
    disabled=False,
    description='GnomAD Frequency Filter: ',
    readout_format='.5f'
)

gnomAD_nan = widgets.Checkbox(
    value=True,
    description='Allow NaN gnomAD points',
    disabled=False,
    indent=False
)

all_mutations = samples[column_names.get('variant_class')].unique().tolist()
mutation_select = widgets.SelectMultiple(
    options=all_mutations,
    value=all_mutations,
    description='Mutation Types:  ',
    disabled=False
)

ipython_fig = go.FigureWidget(fig)

df_w_filter = samples.copy()

def response(change):
    filter_list = [m in mutation_select.value and
                   ((g >= gnomAD_slider.value and not np.isnan(g))
                   or (np.isnan(g) and gnomAD_nan.value))
                   for m, g in zip(samples[column_names.get('variant_class')],
                                   samples[column_names.get('gnomad_freq')])]
    df_w_filter['filter'] = filter_list
    updated_sum_data = df_w_filter.groupby(level=[0], sort=False).sum()['filter'].values.flatten()

    temp_df = samples[filter_list]
    y = temp_df[column_names.get('tumor_freq')]
    x = temp_df['x_jitter']
    t_count = temp_df['t_ref_count'].values + temp_df['t_alt_count'].values
    with ipython_fig.batch_update():
        ipython_fig.data[0].x = x
        ipython_fig.data[0].y = y
        ipython_fig.data[0].marker['color'] = temp_df[column_names.get('gnomad_freq')]
        ipython_fig.data[0].marker['cmin'] = gnomad_min
        ipython_fig.data[0].marker['cmax'] = gnomad_max
        ipython_fig.data[0].customdata = np.stack((temp_df[column_names.get('hugo')].tolist(),
                                                   temp_df['Protein_Change'].tolist(),
                                                   t_count), axis=-1)
        ipython_fig.data[1].z = updated_sum_data
        for x_val, this_sum in enumerate(updated_sum_data):
            ipython_fig['layout']['annotations'][x_val].update(text=str(this_sum))

mutation_select.observe(response, names="value")
gnomAD_slider.observe(response, names="value")
gnomAD_nan.observe(response, names="value")

widgets.VBox([mutation_select, ipython_fig, gnomAD_slider, gnomAD_nan])

VBox(children=(SelectMultiple(description='Mutation Types:  ', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12…