## Interactive plot of results

In [2]:
import numpy as np 
import pandas as pd 
import bokeh.io
import bokeh.plotting
from bokeh.models import * 
from bokeh.themes import Theme
from bokeh.transform import linear_cmap

from matplotlib.colors import ListedColormap

import matplotlib.pyplot as plt

bokeh.io.output_notebook()

## Plotting style

Credit to Griffin Chure.

In [3]:
def color_palette():
    """
    Returns a dictionary of the PBOC color palette
    """
    return {'green': '#7AA974', 'light_green': '#BFD598',
              'pale_green': '#DCECCB', 'yellow': '#EAC264',
              'light_yellow': '#F3DAA9', 'pale_yellow': '#FFEDCE',
              'blue': '#738FC1', 'light_blue': '#A9BFE3',
              'pale_blue': '#C9D7EE', 'red': '#D56C55', 'light_red': '#E8B19D',
              'pale_red': '#F1D4C9', 'purple': '#AB85AC',
              'light_purple': '#D4C2D9', 'dark_green':'#7E9D90', 'dark_brown':'#905426'}

def bokeh_theme():
    """A custom bokeh theme to match PBoC 2e colors"""
    theme_json = {'attrs':
            {'figure': {
                'background_fill_color': '#E3E7E9',
                'outline_line_color': '#FFFFFF',
            },
            'Axis': {
            'major_tick_in': 4,
            'major_tick_line_width': 1,
            'axis_label_text_font': 'Lato',
            'axis_label_text_font_style': 'normal'
            },
            'Grid': {
                'grid_line_color': "white",
            },
            'Legend': {
                'background_fill_color': '#E3E7E9',
                'border_line_color': '#FFFFFF',
                'border_line_width': 1.5,
                'background_fill_alpha': 0.5
            },
            'Text': {
                'text_font_style': 'normal',
               'text_font': 'Lato'
            },
            'Title': {
                'background_fill_color': '#FFFBCE',
                'text_font_style': 'normal',
                'align': 'center',
                'text_font': 'Lato',
                'offset': 2,
            }}}

    theme = Theme(json=theme_json)
    bokeh.io.curdoc().theme = theme

    # Define the colors
    colors = color_palette()
    palette = [v for k, v in colors.items() if 'pale' not in k]
    return [colors, palette]

bokeh_theme();

## Data Import

This part is in construction right now and not required to run the interactive plot at this point in time.

In [4]:
df_map = pd.read_csv(
    "./20220514_mapping_processed.csv",
)

In [5]:
def seq_to_int(seq):
    d = {'A': 0, 'C': 1, 'G': 2, 'T': 3}
    return np.array([d[x] for x in list(seq)])

In [6]:
def get_paths():
    paths = glob.glob("../../../data/barcode_counts/20230907_barcode/*")
    paths += glob.glob("../../../data/barcode_counts/20231207_barcode/*")
    paths += glob.glob("../../../data/barcode_counts/20240514_barcode_test/*")
    map = {}
    for x in paths:
        gc = x.split('/')[-1].split('_')[0].split('-')[0]
        rep = x.split('/')[-1].split('_')[0].split('-')[1]
        xna = x.split('/')[-1].split('_')[1]
        map[(gc, rep, xna)] = x

    return map
    

#Map = get_paths()

def import_data(gc, rep, map, df_map):
    RNA_file = map[(gc, rep, 'RNA')]
    DNA_file = map[(gc, rep, 'DNA')]

    df_DNA = pd.read_csv(DNA_file, names=["ct_0", "barcode"], sep="\s+")
    df_RNA = pd.read_csv(RNA_file, names=["ct_1", "barcode"], sep="\s+")
    

    df_outer = df_DNA.merge(df_RNA, on="barcode", how="outer").fillna(0)
    df = df_outer.merge(df_map, on="barcode", how="inner")
    df['ct'] = df['ct_1'] + df['ct_0']
    df['relative_counts'] = (df.ct_1 + 1) / (df.ct_0 + 1)
    df['int_promoter'] = df['promoter'].apply(seq_to_int)
    df['int_wt'] = df['wt_seq'].apply(seq_to_int)
    return df

  df_DNA = pd.read_csv(DNA_file, names=["ct_0", "barcode"], sep="\s+")
  df_RNA = pd.read_csv(RNA_file, names=["ct_1", "barcode"], sep="\s+")


In [7]:
def expression_shift(df_input, prom):
    df = df_input.loc[df_input.name == prom, :]
    mean_exp = np.mean(df.relative_counts)
    exp_shift = np.zeros([4, 160])
    for i in range(len(df)):
        for j in range(160):
            exp_shift[int(df.int_promoter.values[i][j]), j] += (df.relative_counts.values[i] - mean_exp)/mean_exp

    return exp_shift

In [8]:
def mutual_information(df_input, prom, drop_out):
    df = df_input.loc[df_input.name == prom, :]
    arr = np.vstack(df.int_promoter.values)
    freq_mat = np.zeros([2, 4, 160])
    for i in range(len(df)):
        for j in range(160):
            freq_mat[0, df.int_promoter.values[i][j], j] += df.ct_0.values[j]
            freq_mat[1, df.int_promoter.values[i][j], j] += df.ct_1.values[j]
    return freq_mat

## Import footprints and Metadata
Download the three files from this cell from Box.

In [9]:
# import mutual information
df = pd.read_csv('./footprints.csv')

# import expression shift
df_exshift = pd.read_csv('./exshifts.csv')

# Import metadata for promoters
df_meta = pd.read_csv('./20231207_footprints_meta.csv')
df_regulonDB = pd.read_csv('./regulonDB_meta.csv')

# Transform types to strings
df['replicate'] = df['replicate'].astype(str)
df_exshift['replicate'] = df_exshift['replicate'].astype(str)
df['d'] = df['d'].astype(str)

# Save data in CDS
data = ColumnDataSource(df)
exshift = ColumnDataSource(df_exshift)
meta = ColumnDataSource(df_meta)
regulonDB = ColumnDataSource(df_regulonDB)
promoters = list(df['promoter'].unique())

# Quick peak at rspAp to look for the correct binding sites
df_regulonDB.loc[df_regulonDB['PROMOTER_NAME'] == 'rspAp', :]

Unnamed: 0,PROMOTER_NAME,SIGMA_FACTOR,RI_FUNCTION,CENTER_POSITION,RI_SEQUENCE,RI_ORIENTATION,FINAL_STATE,INTERACTION_TYPE,TRANSCRIPTION_FACTOR_NAME,TRANSCRIPTION_FACTOR_FAMILY,CONSENSUS_SEQUENCE,DNA_BINDING_EFFECT
1554,rspAp,,activator,-60.5,atacgcctttTTTTGTGATCACTCCGGCTTTTttcgatcttt,,CRP-cyclic-AMP,Allosteric,CRP,CRP,AAATGTGAtctagaTCACATTT,
2254,rspAp,,repressor,-35.0,tttcgatcttTATACTTGTATggtagtagct,,RspR,,RspR,GntR,missing,


In [10]:
df

Unnamed: 0,promoter,source,mut_info,growth_condition,growth_condition_info,replicate,pos,d
0,TSS_1414_storz_regulondb,genome,0.000158,Glucose,M9 Minimal Media with 0.5% Glucose,1,-115,0
1,TSS_1414_storz_regulondb,genome,0.000246,Glucose,M9 Minimal Media with 0.5% Glucose,1,-114,0
2,TSS_1414_storz_regulondb,genome,0.000230,Glucose,M9 Minimal Media with 0.5% Glucose,1,-113,0
3,TSS_1414_storz_regulondb,genome,0.000047,Glucose,M9 Minimal Media with 0.5% Glucose,1,-112,0
4,TSS_1414_storz_regulondb,genome,0.000042,Glucose,M9 Minimal Media with 0.5% Glucose,1,-111,0
...,...,...,...,...,...,...,...,...
1943869,znuCp,genome,0.001195,Cold Shock (1h at 10C),M9 Minimal Media with 0.5% Glucose with 1h at ...,1,38,2
1943870,znuCp,genome,0.000588,Cold Shock (1h at 10C),M9 Minimal Media with 0.5% Glucose with 1h at ...,1,39,2
1943871,znuCp,genome,0.000586,Cold Shock (1h at 10C),M9 Minimal Media with 0.5% Glucose with 1h at ...,1,40,2
1943872,znuCp,genome,0.000505,Cold Shock (1h at 10C),M9 Minimal Media with 0.5% Glucose with 1h at ...,1,41,2


In [9]:
# Set inital settings for plot
prom_ini = 'rspAp'
gc_ini = 'Glucose'
source_ini = 'genome'
rep_ini = '1'
d_ini = '1'

A lot of the following code was inspired by BeBi103a.

https://bebi103a.github.io/recitations/06/serving_dashboard.html

In [10]:
# data preparation function
def extract_sub_df(df, promoter, growth_condition, replicate, source, d=None):
    """Extract sub data frame for relevant parameters."""
    if d:
        inds = (
            (df["promoter"] == promoter)
            & (df["growth_condition"] == growth_condition)
            & (df["d"] == d)
            & (df["replicate"] == replicate)
            & (df["source"] == source)
        )
    else:
        inds = (
            (df["promoter"] == promoter)
            & (df["growth_condition"] == growth_condition)
            & (df["replicate"] == replicate)
            & (df["source"] == source)
        )

    return df.loc[inds, :]


def build_cds(df, promoter, growth_condition, replicate, source, d=None):
    """Builds a ColumnDataSource object from the part of a given data frame."""
    cds = bokeh.models.ColumnDataSource(
        extract_sub_df(df, promoter, growth_condition, replicate, source, d)
    )

    return cds


# plotting functions
def plot_info(cds):
    """Make plot of information footprint given ColumnDataSource."""
    p_info = bokeh.plotting.figure(width=1000, 
                                   height=200, 
                                   x_axis_label='position',
                                   y_axis_label='mutual information [bits]',
                                   title="Mutual Information from Data",
                                   x_range=[-0.5-115, 45 - 0.5])

    p_info.vbar(x='pos', top='mut_info', source=cds)

    #p_info.legend.click_policy="hide"
    p_info.xaxis.ticker = np.arange(-11, 5) * 10
    #p_info.extra_x_ranges.update({'x_above':  p_info.x_range})
    #p_info.add_layout(LinearAxis(x_range_name='x_above', ticker=np.arange(0, 160) * 10), 'above')

    return p_info

def plot_exshift(cds_ex):
    """Make plot of expression shift given ColumnDataSource."""
    TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"
    t = cds_ex.data['promoter'][0]
    p_exshift = bokeh.plotting.figure(width=1000, 
                                      height=200, 
                                      x_axis_label='sequence',
                                      title="Expression Shift upon mutation",
                                      x_range=[-0.5-115, 45 - 0.5], 
                                      y_range=[0.5, 5 - 0.5],
                                      tooltips=[('wild type base', '@wt_base')],
                                      tools=TOOLS)
    
    p_exshift.xaxis.ticker = np.arange(-115, 45)
    p_exshift.yaxis.ticker = np.arange(1,5)
    
    wt_seq = cds_ex.data['wt_base'][0::4]
    p_exshift.xaxis.major_label_overrides = {(tick-115): x_ for tick, x_ in enumerate(wt_seq)}
    p_exshift.yaxis.major_label_overrides = {(tick+1): x_ for tick, x_ in enumerate(['A', 'C', 'G', 'T'])}
    p_exshift.xaxis.major_label_text_font_size = "6pt"

    r = p_exshift.rect(x='pos', 
                       y='base',
                       width=1,
                       height=1,
                       fill_color=linear_cmap('expression_shift', 
                                              bokeh.palettes.interp_palette(["#D14241", "#FFFFFF", "#738FC1"], 100),
                                              low=-1, 
                                              high=1),
                       line_color=None,
                       source=cds_ex
    )

    p_exshift.extra_x_ranges.update({'x_above':  p_exshift.x_range})
    p_exshift.add_layout(LinearAxis(x_range_name='x_above', ticker=np.arange(-11, 5) * 10), 'above')
    
    color_bar = r.construct_color_bar(padding=5)
    p_exshift.add_layout(color_bar, "right")
    #taptool = p_exshift.select(type=TapTool)

    return p_exshift



# creating widgets
# Define the selections
prom_selector = Select(options=list(np.sort(promoters)), value=prom_ini)
gc_selector = Select(options=list(df.loc[df['promoter']==prom_selector.value, 'growth_condition'].unique()), value=gc_ini)
source_selector = Select(options=list(df.loc[df['promoter']==prom_selector.value, 'source'].unique()), value=source_ini)
rep_selector = Select(options=list(df.loc[df['promoter']==prom_selector.value, 'replicate'].unique()), value=rep_ini)
d_selector = Select(options=list(df.loc[df['promoter']==prom_selector.value, 'd'].unique()), value=d_ini)


# titles for selectors
prom_title = Div(text="<b>Promoter</b>")
gc_title = Div(text="<b>Growth Condition</b>")
source_title = Div(text="<b>Plasmid or Genome</b>")
rep_title = Div(text="<b>Replicate</b>")
d_title = Div(text="<b>Window Width</b>")

# metadata for default choice
meta_ini = df_meta.loc[df_meta['promoter'] == prom_ini, :]

# boxes for description
prom_desc = Div(text='<div style="width:300px; overflow-wrap: break-word;"><b> Genes controlled by promoter</b>: <br/>' + meta_ini['genes'].values[0] + '<br/><b>Strand: </b><br/>' + meta_ini['direction'].values[0] + '<br/><b>5\':</b><br/>' + str(meta_ini['five_prime'].values[0]) + '<br/><b>3\':</b><br/>' + str(meta_ini['three_prime'].values[0]) + '</div>')
regulonDB_desc = Div(text="")

# callback functions
def update_gc_selector(attr, old, new):
    """Given a promoter and source, update selectors to reflect existing data sets."""
    inds = (df["promoter"] == prom_selector.value) & (df["source"] == source_selector.value) 
    options = [str(i) for i in sorted(df.loc[inds, "growth_condition"].unique())]
    x = gc_selector.value
    gc_selector.options = options
    if x not in options:
        gc_selector.value = options[0]
    else:
        gc_selector.value = x
    options = [str(i) for i in sorted(df.loc[inds, "replicate"].unique())]
    rep_selector.options = options
    rep_selector.value = options[0]


def update_meta(attr, old, new):
    promoter = prom_selector.value
    meta = df_meta.loc[df_meta['promoter'] == promoter, :]
    new_desc = '<div style="width:300px; overflow-wrap: break-word;"><b> Genes controlled by promoter</b>: <br/>' + meta['genes'].values[0] + '<br/><b>Strand: </b><br/>' + meta['direction'].values[0] + '<br/><b>5\':</b><br/>' + str(meta['five_prime'].values[0]) + '<br/><b>3\':</b><br/>' + str(meta['three_prime'].values[0]) + '</div>'
    prom_desc.update(text=new_desc)

def update_wt_seq(attr, old, new):
    wt_seq = cds_ex.data['wt_base'][0::4]
    p_exshift.xaxis[1].update(major_label_overrides={(tick-115): x_ for tick, x_ in enumerate(wt_seq)})


def update_sites(attr, old, new):
    x = '<div style="width:700px;"><b> Annotation in RegulonDB</b><br/>'
    promoter = prom_selector.value
    regulons = df_regulonDB.loc[df_regulonDB['PROMOTER_NAME'] == promoter, :]
    if len(regulons) == 0:
        x += '<br/>No Binding Sites Found'
    else:
        for index, site in regulons.iterrows():
            if -115 < site['CENTER_POSITION'] < 45:
                x += '<div style="overflow-wrap: break-word;"><br/><b>' + site['RI_FUNCTION'] + '</b><br/>Transcription Factor: ' + site['TRANSCRIPTION_FACTOR_NAME'] + '<br/>Binding Site Position Relative to TSS: ' + str(site['CENTER_POSITION']) + '</div>'#+ '<br/> Binding Site Sequence (Capital Letters): ' + site['RI_SEQUENCE'] + '<br/> Consensus Sequence: ' + site['CONSENSUS_SEQUENCE'] + '</div>';
    x += '</div>'
    regulonDB_desc.update(text=x)

update_sites("", "", "")
def callback(attr, old, new):
    """Update data given new selections."""
    # selectors
    promoter = prom_selector.value
    gc = gc_selector.value
    source = source_selector.value
    d = d_selector.value
    rep = rep_selector.value

    # Renewing data in cds
    new_cds = build_cds(df, promoter, gc, rep, source, d)
    new_cds_ex = build_cds(df_exshift, promoter, gc, rep, source)

    cds.data.update(new_cds.data)
    cds_ex.data.update(new_cds_ex.data)


Initiate plot.

In [11]:
promoter = prom_selector.value
gc = gc_selector.value
source = source_selector.value
d = d_selector.value
rep = rep_selector.value

# Renewing data in cds
cds = build_cds(df, promoter, gc, rep, source, d)
cds_ex = build_cds(df_exshift, promoter, gc, rep, source)
cds_mut_info = build_cds(df_exshift, promoter, gc, rep, source)

In [12]:
# writing on_change for all other widgets
prom_selector.on_change("value", update_meta, callback, update_gc_selector, update_wt_seq, update_sites)
source_selector.on_change("value", update_gc_selector, callback)
gc_selector.on_change("value", callback)
rep_selector.on_change("value", callback)
d_selector.on_change("value", callback)

In [13]:
def save_plots():
    """Save current plots as pdf using matplotlib."""
    promoter = prom_selector.value
    gc = gc_selector.value
    source = source_selector.value
    d = d_selector.value
    rep = rep_selector.value
    filename = f"../../../figures/{promoter}_{gc}_{d}_{rep}.pdf"
    fig, ax = plt.subplots(2, 1, figsize=(10, 4), sharex=True)
    x = cds.data['pos']
    y = cds.data['mut_info']
    
    ax[0].bar(x, y)
    ax[0].set_ylabel("MI [bits]")
    ax[0].set_title("{}, {}, Rep {}".format(promoter, gc, rep))
    
    z = np.reshape(cds_ex.data['expression_shift'], (160, 4))
    
    im = ax[1].imshow(np.transpose(z), extent=(-115.5, 44.5, 0, 4), aspect='auto', cmap=ListedColormap(bokeh.palettes.interp_palette(["#D14241", "#FFFFFF", "#738FC1"], 100)), vmin=-1, vmax=1)
    ax[1].set_yticks([0.5, 1.5, 2.5, 3.5])
    ax[1].set_yticklabels(['A', 'C', 'G', 'T'])
    
    ax2 = ax[1].twiny()
    ax2.set_xlim(ax[1].get_xlim())
    
    # Corrected x-ticks for ax2
    ax2.set_xticks(np.linspace(-110, 40, 16))
    ax2.set_xticklabels(np.round(np.linspace(-110, 40, 16), 2), fontsize=8)
    
    # Corrected x-ticks for ax[1]
    ax[1].set_xticks(np.arange(-115, 45))
    ax[1].set_xticklabels(cds_ex.data['wt_base'][0::4], fontsize=5)  # Assuming you want to label every 4th position
    
    ax[0].tick_params(axis='y', labelsize=8) 
    ax[1].tick_params(axis='y', labelsize=8) 
    
    fig.subplots_adjust(hspace=0.4)
    
    # Specify ax[1] to make the colorbar appear only below the lower row
    cbar = fig.colorbar(im, ax=ax[1], orientation='horizontal', pad=0.2)
    cbar.ax.tick_params(labelsize=8)
    
    fig.savefig(filename)
    plt.close()



Create dashboard.

In [14]:
button = Button(label="save", button_type="success")
button.on_click(save_plots)
def save_callback(attr, new, old):
    promoter = prom_selector.value
    gc = gc_selector.value
    source = source_selector.value
    d = d_selector.value
    rep = rep_selector.value
    filename = f"{promoter}_{gc}_{d}_{rep}.svg"
    export_svg(dashboard, filename)
    

widgets = bokeh.layouts.row(
    bokeh.layouts.column(
        prom_title,
        prom_selector,
        bokeh.models.Spacer(height=20),
        source_title,
        source_selector,
        bokeh.models.Spacer(height=20),
        gc_title,
        gc_selector,
        button
    ),
    bokeh.layouts.column(
        bokeh.models.Spacer(height=20),
        bokeh.layouts.row(
            bokeh.layouts.column(
                d_title,
                d_selector),
            bokeh.models.Spacer(width=1),
            bokeh.layouts.column(
                rep_title,
                rep_selector),
            )
        ),
    bokeh.models.Spacer(width=10),
    prom_desc,
    bokeh.models.Spacer(width=10),
    regulonDB_desc
)




p_info = plot_info(cds)

p_exshift = plot_exshift(cds_ex)

dashboard = bokeh.layouts.column(
    widgets,
    bokeh.models.Spacer(height=20),
    #plot_info(cds),
    p_info,
    bokeh.models.Spacer(height=20),
    #plot_exshift(cds_ex),
    p_exshift
)


def app(doc):
    doc.add_root(dashboard)


In [15]:
notebook_url = "localhost:8889"
bokeh.io.show(app, notebook_url=notebook_url)
