In [None]:
import re

import numpy as np
import pandas as pd
import altair as alt

#alt.data_transformers.enable('csv')
alt.data_transformers.disable_max_rows()
# NOTE: To avoid making to large notebooks, only one heatmap is generated at a time

# Input
input_file = '../.data/fBin1.tsv' # Generated by rib, 'Bin#.csv'

# Columns from look up table
names = ['tag','barcode','barcode_quality','gen_mutation','gen_mutation_quality','gen_indel','gen_indel_quality','codon','aa_mutation','n_aa_substitutions','nnk','fraction','barcode_quality_score','gen_quality_score','read_quality_score','variant_call_support']
delim = '\t'

# RIB format
tag_column = 'tag'
aa_column  = 'aa_mutation'
n_aa_column = 'n_aa_substitutions'
valid_barcode_tag = '1'

# Clarity
remove_stop_codon = True
frequency_filter = 'mut_freq > 0'

In [None]:
source = pd.read_csv(input_file, header=None, delimiter='\t', names=names)
single_muts = source.query(f"{tag_column} == {valid_barcode_tag}").query(f"{n_aa_column} == 1")

single_muts = single_muts.assign(mut_freq=lambda x: x[aa_column].map(x[aa_column].value_counts())).drop_duplicates()

single_muts = single_muts.query(frequency_filter)

single_muts['wt_aa'] = single_muts[aa_column].str.extract(r'(^[A-Z*])')
single_muts['position'] = single_muts[aa_column].str.extract(r'([0-9]+)').astype(int)
single_muts['mutated_aa'] = single_muts[aa_column].str.extract(r'([A-Z*] $)')

smp = single_muts[['position','wt_aa','mutated_aa','mut_freq',aa_column]].reset_index(drop=True)

if remove_stop_codon:
    smp = smp.query('mutated_aa != "*"')
    
brush = alt.selection_interval(encodings=['x'])

bar = alt.Chart(smp).mark_bar(size=2, color='grey').encode(
alt.X('position:O')
).properties(width=alt.Step(2)).add_selection(brush)

muts = alt.Chart(smp).mark_rect().encode(
alt.X('position:O'),
y='mutated_aa:O',
color='mut_freq:Q',
tooltip = ['mut_freq',aa_column]
).transform_filter(brush).add_selection(alt.selection_single())

wt = alt.Chart(smp).mark_text().encode(
x='position:O', 
y='wt_aa:O',
text='wt_aa:O',
tooltip = ['position']
).transform_filter(brush).add_selection(alt.selection_single())

hm = alt.vconcat(

    bar,
    muts,

)
display(hm)