# Plot yeast RBD DMS data

## Import modules and read data
Import Python modules:

In [1]:
import altair as alt

import pandas as pd

Disable max rows specifier for Altair:

In [2]:
_ = alt.data_transformers.disable_max_rows()

Read the deep mutational scanning data, and reduce to site-level data, calculating the max and total site-based metrics:

In [3]:
dms_mut_data = pd.read_csv('../results/merged_data/yeast_RBD_DMS_data.csv')

# calculate site metrics and fill missing sites as 0
sites = list(range(dms_mut_data['site'].min(), dms_mut_data['site'].max() + 1))
dms_data = (
    dms_mut_data
    .groupby(['condition', 'condition_type', 'condition_subtype', 'study', 'site'],
             as_index=False, dropna=False)
    .aggregate(total_escape=pd.NamedAgg('mut_escape', 'sum'),
               max_escape=pd.NamedAgg('mut_escape', 'max'),
               )
    )
dms_data = (pd.merge_ordered(dms_data,
                             pd.DataFrame({'site': sites}),
                             on='site',
                             left_by=['condition', 'study'],
                             )
            .fillna(0)
            )

# make a "unique condition" column
dms_data = (dms_data
            .merge(dms_data[['condition', 'study']]
                           .drop_duplicates()
                           .assign(n=lambda x: x.groupby('condition')['study'].transform('nunique'),
                                   condition_unique=lambda x: x.apply(
                                                               lambda r: r['condition'] if r['n'] == 1
                                                                         else r['condition'] + '(' + r['study'] + ')',
                                                               axis=1)
                                   ),
                   on=['condition', 'study']
                   )
            )
assert len(dms_data) == len(dms_data.groupby(['site', 'condition_unique']))

dms_data

Unnamed: 0,condition,condition_type,condition_subtype,study,site,total_escape,max_escape,n,condition_unique
0,AZD1061,antibody,clinical,Dong 2021,331,0.032706,0.005524,1,AZD1061
1,AZD1061,antibody,clinical,Dong 2021,332,0.034237,0.005990,1,AZD1061
2,AZD1061,antibody,clinical,Dong 2021,333,0.025487,0.002917,1,AZD1061
3,AZD1061,antibody,clinical,Dong 2021,334,0.030717,0.009528,1,AZD1061
4,AZD1061,antibody,clinical,Dong 2021,335,0.021808,0.003185,1,AZD1061
...,...,...,...,...,...,...,...,...,...
8035,subject K (day 29),serum,convalescent,Greaney 2021b,527,0.005310,0.002243,1,subject K (day 29)
8036,subject K (day 29),serum,convalescent,Greaney 2021b,528,0.009810,0.002504,1,subject K (day 29)
8037,subject K (day 29),serum,convalescent,Greaney 2021b,529,0.031831,0.009543,1,subject K (day 29)
8038,subject K (day 29),serum,convalescent,Greaney 2021b,530,0.024201,0.007600,1,subject K (day 29)


## Make interactive plots
First make a zoom bar for the sites:

In [15]:
width = 800

zoom_brush = alt.selection_interval(
                encodings=['x'],
                mark=alt.BrushConfig(stroke='black', strokeWidth=2))

zoom_bar = (
    alt.Chart(dms_data)
    .mark_rect(color='lightgray')
    .encode(x='site:O')
    .add_selection(zoom_brush)
    .properties(width=width,
                height=15,
                title='zoom bar')
    )

lineplot = (
    alt.Chart(dms_data)
    .mark_line(point=True,
               interpolate='step',
               clip=True)
    .encode(x='site:O',
            y='total_escape:Q',
            color='condition_unique')
    .interactive(bind_y=False)  # https://github.com/altair-viz/altair/issues/1512#issuecomment-691720690
    .transform_filter(zoom_brush)
    .properties(width=width,
                height=200)
    )

chart = alt.vconcat(zoom_bar, lineplot)

chart

NameError: name 'width' is not defined

Combine plots: