# Load Settings and Configs

In [None]:
# %load load_manuscript_data.py
from datetime import date
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from pathlib import Path
import seaborn as sns
import sys
import plotly.express as px
import plotly.io as pio
import yaml

sns.set_context("notebook", font_scale=1.4)
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 100)
plt.rcParams["figure.figsize"] = (16, 12)
plt.rcParams['savefig.dpi'] = 200
plt.rcParams['figure.autolayout'] = False
plt.rcParams['axes.labelsize'] = 18
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['font.size'] = 16
plt.rcParams['lines.linewidth'] = 2.0
plt.rcParams['lines.markersize'] = 8
plt.rcParams['legend.fontsize'] = 14
pd.set_option('display.float_format', lambda x: '{:,.4f}'.format(x))


config_file = "manuscript_config.yaml"
with open(config_file) as file:
    # The FullLoader parameter handles the conversion from YAML
    # scalar values to Python the dictionary format
    configs = yaml.load(file, Loader=yaml.FullLoader)
    
# Run on server:
run_on = "server"
root = Path(configs['root'][run_on])
scratchDir = Path(configs['scratchDir'][run_on])
figuresDir = Path(configs['figuresDir'][run_on])

alphabetClrs = px.colors.qualitative.Alphabet
clrs = ["#f7ba65", "#bf4713", "#9c002f", "#d73d00", "#008080", "#004c4c"]
colors = {'grey': alphabetClrs[8], 
        'light_yellow': clrs[0],
        'darko': clrs[1],
        'maroon':clrs[2],
        'brighto': clrs[3],
        'teal':clrs[4],
        'darkteal':clrs[5]
       }

sushi_colors = {'red': '#C0504D',
             'orange': '#F79646',
             'medSea': '#4BACC6', 
             'black': '#000000',
             'dgreen': '#00B04E',
             'lgreen': '#92D050',
             'dblue': '#366092',
             'lblue': '#95B3D7'}
today = date.today().strftime("%d-%m-%y")

# Nguyen et al 2020

## Load the data

In [None]:
nguyenConfig = configs['nguyen']
countsFile = root/nguyenConfig['countsFile']
resultsFile = root/nguyenConfig['resultsFile']
#resultsFileName = root/nguyenConfig['resultsFileName']
sampleDataFile = root/nguyenConfig['sampleDataFile']
publishedResultsFile = root/nguyenConfig['publishedResultsFile']
publishedPhenotypesFile = root/nguyenConfig['publishedPhenotypesFile']
mapFile = root/nguyenConfig['mapFile']['filtered']

In [None]:
publishedResults = pd.read_csv(publishedResultsFile, skiprows=1)
results = pd.read_csv(resultsFile)
# What is a hit?

results['mbarq_hit'] = ((results['neg_selection_fdr'] < 0.05) | (results['pos_selection_fdr'] < 0.05)) & (abs(results.LFC) > 0.6)
#results['mbarq_hit'] = ((results['neg_selection_fdr'] < 0.05) | (results['pos_selection_fdr'] < 0.05))


annotations = (pd.read_csv(mapFile)[['Name', 'locus_tag']]
               .drop_duplicates()
               .rename({'Name': 'mapName', 'locus_tag': 'Name'}, axis=1))

publishedCI = (publishedResults.melt(id_vars=['locus', 'gene'], 
                                    value_vars=[c for c in publishedResults.columns if 'median_CI' in c],
                                    value_name='median_CI', 
                                    var_name=['contrast'])
                               .rename({'locus': 'Name'}, axis=1))

publishedCI['contrast'] = publishedCI.contrast.str.split("_", expand=True)[0]
publishedCI['median_CI'] = np.log10(publishedCI.median_CI)

publishedHits = (publishedResults.melt(id_vars=['locus'], var_name=['contrast'],
                                     value_vars=[c for c in publishedResults.columns if 'adj_p_value_CI'in c],
                                     value_name='adj_pvalue')
                                 .rename({'locus': 'Name'}, axis=1))

publishedHits['contrast'] = publishedCI.contrast.str.split("_", expand=True)[0]
publishedDf = publishedCI.merge(publishedHits, on=['Name', 'contrast'])

# What is a hit?
#publishedDf['published_hit'] = ((publishedDf.adj_pvalue < 0.05)).astype(int)*2
publishedDf['published_hit'] = ((publishedDf.adj_pvalue < 0.05) & (abs(publishedDf.median_CI) > 0.6)).astype(int)*2

compCntrl = results.merge(publishedDf, on=['Name', 'contrast'], how='inner')
compCntrl['hit'] = (compCntrl['mbarq_hit'] + compCntrl['published_hit']).astype(str)
compCntrl.hit.replace({'0': 'No defect', '1': 'New', '2': 'Original', '3': 'Confirmed'}, inplace=True)

In [None]:
## Look at results by Name instead of locus tag
#resultsName = pd.read_csv(resultsFileName)
#resultsName['mbarq_hit'] = ((resultsName['neg_selection_fdr'] < 0.05) | (resultsName['pos_selection_fdr'] < 0.05) & (abs(resultsName.LFC) > 0.6))


## Compare CIs

In [None]:
def compare_CIs(df, contrast):
    to_plot = df[df.contrast == contrast]
    to_plot = to_plot.rename({'hit': 'Significant change in CI'}, axis=1)
    fig = px.scatter(to_plot, x='LFC', y='median_CI', color='Significant change in CI', height=800, width=1000,
              template = 'plotly_white', 
                labels = {'median_CI': 'CI (original analysis)', 'LFC': 'LFC (mBARq analysis)'},
                color_discrete_map = {'No defect': colors['grey'], 'Confirmed': sushi_colors['dgreen'], 
                                     'New': sushi_colors['dblue'], 'Original': sushi_colors['orange']},
                hover_data=['Name', 'gene'],
                category_orders = {'Significant change in CI':['No defect', 'Original', 'New', 'Confirmed']},)


    fig.update_traces(marker=dict(size=20,
                        line=dict(width=1,
                                                    color='DarkSlateGrey'), opacity=0.9),
                              selector=dict(mode='markers'))

    fig.update_layout(font={'size':22})
    return fig

In [None]:
fig = compare_CIs(compCntrl, 'd1')
fig.write_image(figuresDir/f"{today}_Figure2C.png", format ='png', scale=2)
fig

In [None]:
# can specify width, height, and scale to improve resolution


In [None]:
#compare_CIs(compCntrl, 'd2')

In [None]:
#compare_CIs(compCntrl, 'd3')

### Calculate CI correlations

In [None]:
corr_df = compCntrl.groupby('contrast')[['LFC', 'median_CI']].corr().iloc[0::2,-1].reset_index()
corr_df.columns = ['contrast', 'LFC', 'R']

In [None]:
corr_df

In [None]:
#corr_df = corr_df[corr_df.contrast != 'd4']
fig = px.bar(corr_df, x="contrast", y='R', color='contrast',
      color_discrete_sequence = ['black']*3,
      height=400, width=350, text_auto='.2f', template='plotly_white')
fig.update_layout(showlegend=False)
fig.write_image(figuresDir/f"{today}_Figure2D.png", format ='png', scale=2)
fig

### Calculate recall, precision, balanced accuracy 

In [None]:
from sklearn.metrics import recall_score, precision_score, balanced_accuracy_score

phenotypes = pd.read_csv(root/nguyenConfig["publishedPhenotypesFile"])
phenotypes = phenotypes.rename({'locus': 'Name', 'day': 'contrast'}, axis=1)
phenotypes = phenotypes.merge(compCntrl, how='left', on=['Name', 'contrast'])
#phenotypes['pheno_hit'] = (phenotypes['adjusted p value (C.I.)'] < 0.05).astype(int)
# Same definition of hit as for mbarq
phenotypes['pheno_hit'] = ((phenotypes['adjusted p value (C.I.)'] < 0.05) & (abs(np.log(phenotypes['median'])) > 0.6)).astype(int)
phenotypes = phenotypes[['Name', 'gene_x', 'contrast', 'pheno_hit', 'mbarq_hit', 'published_hit']].dropna()
phenotypes['mbarq_hit'] = phenotypes.mbarq_hit.astype(int)
phenotypes['published_hit'] = (phenotypes.published_hit/2).astype(int)
#phenotypes = phenotypes[phenotypes.contrast != 'd4']


metrics = {'mBARq Analysis': (precision_score(phenotypes.pheno_hit, phenotypes.mbarq_hit), 
                     recall_score(phenotypes.pheno_hit, phenotypes.mbarq_hit), 
                     balanced_accuracy_score(phenotypes.pheno_hit, phenotypes.mbarq_hit)), 
          'Original Analysis': (precision_score(phenotypes.pheno_hit, phenotypes.published_hit), 
                       recall_score(phenotypes.pheno_hit, phenotypes.published_hit), 
                       balanced_accuracy_score(phenotypes.pheno_hit, phenotypes.published_hit) )}

metricDf = (pd.DataFrame(metrics, index=['Precision', 'Recall', 'Balanced Accuracy'])
              .T
            .reset_index()
            .rename({'index':'Method'}, axis=1)
              .melt(id_vars=['Method'], var_name='Metric', value_name='Score'))

In [None]:
phenotypes.published_hit.value_counts()

In [None]:
phenotypes.pheno_hit.value_counts()

In [None]:
phenotypes.mbarq_hit.value_counts()

In [None]:
compCntrl[compCntrl.hit != 'No defect'].groupby('contrast').hit.value_counts(normalize=True)

In [None]:
fig = px.bar(metricDf, x='Metric', y='Score', 
       color='Method', barmode='group', text_auto='.2f', 
       height=400, width=400, 
      template='plotly_white', 
      color_discrete_map = {'mBARq Analysis':'black' , 'Original Analysis': colors['grey']})
fig.write_image(figuresDir/f"{today}_Figure2E.png", format ='png', scale=2 )
fig

In [None]:
allResults = results.merge(publishedDf, on=['Name', 'contrast'], how='outer')
allD1 = allResults[allResults.contrast == 'd1'].merge(annotations, how='left', on='Name')
#allD1[(allD1.mbarq_hit == True) & (allD1.LFC < 0) & (allD1.Name.str.len() < 15)]

# Wetmore et al 2015

## Load the data

In [None]:
def get_bigger_tstat(x):
    a =[abs(i) for i in x.values]
    return x.values[a.index(max(a))]

In [None]:
wetmoreConfig = configs['wetmore']
wcountsFile = root/wetmoreConfig['countsFile']
wresultsFile = root/wetmoreConfig['resultsFile']
wsampleDataFile = root/wetmoreConfig['sampleDataFile']
wpublishedResultsFile = root/wetmoreConfig['publishedResultsFile']
wpublishedCountsFile = root/wetmoreConfig['publishedCountsFile']
wmapFile = root/wetmoreConfig['mapFile']['filtered']
wpublishedStatsFile = root/wetmoreConfig['publishedStatsFile']

In [None]:
contrast_map = {'D-Maltose_monohydrate': 'D-Maltose',
                'a-Ketoglutaric_acid_disodium_salt_hydrate': 'a-Ketoglutaric acid',
                'a-Ketoglutaric': 'a-Ketoglutaric acid',
                'Potassium_acetate': 'Acetate',
                'acetate': 'Acetate',
                'CAS_amino_acids': 'CAS amino acids',
                'CAS': 'CAS amino acids',
                'Tween_20': 'Tween',
                'Sodium_L-Lactate': 'L-Lactate',
                'Sodium_D,L-Lactate': 'D,L-Lactate',
                'Sodium_pyruvate': 'Pyruvate',
                'pyruvate': 'Pyruvate',
                'Putrescine_Dihydrochloride': 'Putrescine',
                'N-Acetyl-D-Glucosamine': 'NAG',
                'L-Glutamic_acid_monopotassium_salt_monohydrate': 'L-Glutamic acid',
                'L-Glutamic': 'L-Glutamic acid',
                'Sodium_Fumarate_dibasic': 'Fumarate',
                'L-Malic_acid_disodium_salt_monohydrate': 'L-Malic acid',
                'Sodium_Fumarate_dibasic': 'Fumarate', 
                'Sodium_succinate_dibasic_hexahydrate': 'Succinate'
               }

## Clean published results

In [None]:
wsampleData = pd.read_csv(wsampleDataFile)

In [None]:
#wsampleData

In [None]:
wsampleData.groupby('condition').filter(lambda x: len(x) > 1).to_csv(wsampleDataFile.with_suffix('.replicates.csv'),
                                                                    index=False)

In [None]:
pcounts = pd.read_table(wpublishedCountsFile).drop(['scaffold', 'strand', 'pos', 'locusId', 'f'], axis=1)
pcounts = pcounts.melt(id_vars=['barcode', 'rcbarcode'], value_name='cnt', var_name='sampleID')
pcounts = pcounts[pcounts.sampleID.str.contains('set1')]
pcounts['sampleID'] = pcounts['sampleID'].str.split('.', expand=True)[1]

In [None]:
wpublishedResults = pd.read_table(wpublishedResultsFile).drop(['locusId', 'desc', 'comb'], axis=1)
wpublishedResults = wpublishedResults.melt(id_vars=['sysName'],  
                                         var_name='contrast', 
                                         value_name='LFC')
wpublishedResults['set'] = wpublishedResults.contrast.str.split(expand=True)[0]
wpublishedResults['contrast'] = wpublishedResults.contrast.str.split(expand=True)[1]
wpublishedStats = pd.read_table(wpublishedStatsFile).drop(['locusId', 'desc'], axis=1)
wpublishedStats = wpublishedStats.melt(id_vars=['sysName'],  
                                         var_name='contrast', 
                                         value_name='tstat')
wpublishedStats['set'] = wpublishedStats.contrast.str.split(expand=True)[0]
wpublishedStats['contrast'] = wpublishedStats.contrast.str.split(expand=True)[1]
wpublishedDf = wpublishedResults.merge(wpublishedStats, on=['sysName', 'contrast', 'set'])
wpublishedDf = wpublishedDf[wpublishedDf.set.str.contains('set1')]
wpublishedDf = wpublishedDf.rename({'sysName':'Name'}, axis=1)
wpublishedDf = (wpublishedDf.groupby(['contrast', 'Name']).agg({'LFC': ['median'], 
                                                               'tstat':[get_bigger_tstat]})
                            .reset_index())
wpublishedDf.columns = ['contrast', 'Name', 'published_LFC', 'tstat']
wpublishedDf['contrast'] =wpublishedDf['contrast'].replace(contrast_map)

In [None]:
wpublishedDf.contrast.unique()

In [None]:
nag_genes = ['Sama_0944', 'Sama_0945', 'Sama_0946', 'Sama_0948']
mannose_genes = ['Sama_0561', 'Sama_0562', 'Sama_0563', 'Sama_0564']
wpublishedDf[(wpublishedDf.Name.isin(mannose_genes)) & (wpublishedDf.contrast == 'D-Mannitol')]

## Look at the count data

In [None]:
#wcnts = pd.read_csv(wcountsFile)
#wcnts_annotated = wcnts[~wcnts.old_locus_tag.isna()]
#wcnts_annotated.to_csv(wcountsFile.with_suffix(".annotated.csv"), index=False)

In [None]:
wcnts_annotated = pd.read_csv(wcountsFile.with_suffix(".annotated.csv"))

In [None]:
ncounts = wcnts_annotated.rename({'barcode':'rcbarcode'}, axis=1)
ncounts = ncounts.melt(id_vars=['rcbarcode', 'old_locus_tag'], var_name='sampleID', value_name='new_count')

In [None]:
cnts = ncounts.merge(pcounts, on=['rcbarcode', 'sampleID'], how='inner')

In [None]:
cnts['logCnt'] = np.log2(cnts['cnt'] +1)
cnts['logNewCnt'] = np.log2(cnts['new_count'] +1)

In [None]:
wcntCor = cnts.groupby('sampleID')[['logNewCnt', 'logCnt']].corr().iloc[0::2,-1].reset_index()
wcntCor.columns = ['contrast', 'comparison', 'R']
wcntCor['R2'] = round(wcntCor['R']**2, 3)

In [None]:
wcntCor

## Look at the results

In [None]:
wresultsFile = root/'wetmore_2015/results_1/Set1_rra_results.csv'

In [None]:
wresults = pd.read_csv(wresultsFile)
wresults = wresults[~wresults.Name.str.contains(":")]
wresults['contrast'] = wresults['contrast'].replace(contrast_map)

In [None]:
carbon_sources =['D-Glucose', 'D-Maltose', 'a-Ketoglutaric acid', 'Acetate',
       'D-Cellobiose', 'L-Lactate', 'D,L-Lactate', 'Pyruvate',
       'D-Mannitol', 'Tween', 'L-Glutamic acid', 'L-Glutamine', 'Gly-Glu',
       'Gelatin', 'CAS amino acids', 'Putrescine', 'NAG', 'Adenosine',
       'Uridine', 'Thymidine', 'Inosine', 'Cytidine', 'D-Mannose',
       'Sucrose', 'L-Serine']

In [None]:
final_results = wresults[wresults.contrast.isin(carbon_sources)]

In [None]:
final_results.to_csv(root/'wetmore_2015/results_1/Set1_rra_results_contrasts_edited.csv', index=False)

In [None]:
wcomp = wresults.merge(wpublishedDf, on=['Name', 'contrast'], how='inner')
wcomp['New'] = ((abs(wcomp.LFC) > 0.6) & ((wcomp.neg_selection_fdr < 0.05)| (wcomp.pos_selection_fdr < 0.05)))
wcomp['Original'] = (abs(wcomp.tstat) > 4).astype(int)*2
wcomp['Hits'] = wcomp['New'].astype(int) + wcomp['Original']
wcomp.Hits.replace({0: 'No change', 1: 'New', 2: 'Original', 3: 'Confirmed'}, inplace=True)

In [None]:
wcomp.sample(5)

In [None]:
def compare_CIs_db(df, contrast):
    to_plot = df[df.contrast == contrast]
    to_plot = to_plot.rename({'Hits': 'Significant change in CI'}, axis=1)
    fig = px.scatter(to_plot, x='LFC', y='published_LFC', color='Significant change in CI', height=800, width=1000,
              template = 'plotly_white', 
                labels = {'published_LFC': 'LFC (original analysis)', 'LFC': 'LFC (mBARq analysis)'},
               color_discrete_map = {'No change': colors['grey'], 'Confirmed': sushi_colors['dgreen'], 
                                     'New': sushi_colors['dblue'], 'Original': sushi_colors['orange']},
                hover_data=['Name'],
                category_orders = {'Significant change in CI':['No change', 'Original', 'New', 'Confirmed']},)


    fig.update_traces(marker=dict(size=20,
                        line=dict(width=1,
                                                    color='DarkSlateGrey'), opacity=0.8),
                              selector=dict(mode='markers'))

    fig.update_layout(font={'size':22})
    return fig


In [None]:
pio.write_image(fig, figuresDir/'26-09-22-nag_ci.png', width=1000, height=600, scale=2)

In [None]:
fig = compare_CIs_db(wcomp, 'Tween')
fig

In [None]:
pio.write_image(fig, figuresDir/'tween20_ci.png', width=1000, height=600, scale=2)

In [None]:
wcomp[wcomp.contrast == 'D-Glucose']

In [None]:
wcorr_df = wcomp.groupby('contrast')[['LFC', 'published_LFC']].corr().iloc[0::2,-1].reset_index()
wcorr_df.columns = ['contrast', 'comparison', 'R']


In [None]:
wcorr_df

In [None]:
cor_df = pd.concat([wcorr_df, wcntCor])
cor_df.replace({'logNewCnt': 'Counts'}, inplace=True)

In [None]:
fig = px.box(cor_df, x='comparison', y='R', width=400, height=400, color='comparison',
      color_discrete_map = {'LFC': 'black' ,'Counts': 'black'
                           },
             labels={'comparison': ''},
        category_orders = {'comparison': ['Counts', 'LFC']},
      template='plotly_white', hover_data=['contrast'])
fig.update_yaxes(range=[0, 1.1])
fig.update_layout(showlegend=False, font=dict(size=20))

In [None]:
pio.write_image(fig, figuresDir/'26-09-22_R-box.png', width=400, height=400, scale=2)

In [None]:
gois = ["Sama_2129", "Sama_2132", "Sama_2134", "Sama_1943", "Sama_1942", "Sama_1944", "Sama_1946",  "Sama_2129", "Sama_2134",
       "Sama_2131", "Sama_2133", "Sama_1941", "Sama_1948", "Sama_1947"]
#gois = [""]

In [None]:
wcomp[(wcomp.Name.isin(gois)) & ((wcomp.contrast == 'D-Glucose')) ]

In [None]:
fig = px.bar(wcorr_df, x="contrast", y='R', color='contrast',
      color_discrete_sequence = [colors['teal']]*3,
      height=400, width=800, text_auto='.2f', template='plotly_white')
fig.update_layout(showlegend=False)

In [None]:
pio.write_image(fig, figuresDir/'26-09-22-wetmore_ci_cors.png', width=1000, height=600, scale=2)

In [None]:
x = wcomp[wcomp.Hits != 'No change'].groupby('contrast').Hits.value_counts(normalize=True)
x.name = 'hit_props'
x = x.reset_index()

In [None]:
fig = px.box(x, x='Hits', y='hit_props', color='Hits', width=500, height=500, 
    labels = {'hit_props': 'Proportion of Hits', 'Hits':''},
      color_discrete_map = {'Confirmed': sushi_colors['dgreen'] ,'Original': sushi_colors['orange'], 
                            'New': sushi_colors['dblue']}, hover_data=['contrast'], 
      template='plotly_white')
fig.update_layout(showlegend=False, font=dict(size=20))

In [None]:
pio.write_image(fig, figuresDir/'26-09-22_hit_props-box.png', width=400, height=400, scale=2)

# Testing

In [None]:
df = pd.read_table(root/"wetmore_2015/glucose_results/TestNew_D-Glucose_vs_Time0.gene_summary.txt")

In [None]:
df = df[['id', 'neg|lfc', 'neg|fdr', 'pos|fdr']].copy()
df['contrast'] = 'D-Glucose'

In [None]:
df = df.rename({'id': 'Name', 'neg|lfc': 'LFC', 'neg|fdr': 'neg_selection_fdr', 
                'pos|fdr': 'pos_selection_fdr'}, axis=1)

In [None]:
df = df[~df.Name.str.contains(":")]

In [None]:
df.head()

In [None]:
df.LFC.hist(bins=100)

In [None]:
wcomp2 = df.merge(wpublishedDf, on=['Name', 'contrast'], how='inner')
wcomp2['New'] = ((abs(wcomp2.LFC) > 0.6) & ((wcomp2.neg_selection_fdr < 0.05)| (wcomp2.pos_selection_fdr < 0.05)))
wcomp2['Original'] = (abs(wcomp2.tstat) > 4).astype(int)*2
wcomp2['Hits'] = wcomp2['New'].astype(int) + wcomp2['Original']
wcomp2.Hits.replace({0: 'No defect', 1: 'New', 2: 'Original', 3: 'Confirmed'}, inplace=True)

In [None]:
wcorr_df2 = wcomp2.groupby('contrast')[['LFC', 'published_LFC']].corr().iloc[0::2,-1].reset_index()
wcorr_df2.columns = ['contrast', 'LFC', 'R']

In [None]:
wcorr_df2

In [None]:
compare_CIs_db(wcomp2, 'D-Glucose')

In [None]:
x = wcomp2[wcomp2.Hits != 'No defect'].groupby('contrast').Hits.value_counts(normalize=True)
x.name = 'hit_props'
x = x.reset_index()

In [None]:
x

# Jasinska 2020

## Load data 

In [None]:
jasinskaConfig = configs['jasinska']
jsampleDataFile = jasinskaConfig['sampleDataFile']


In [None]:
jsampleData = pd.read_csv(root/'jasinska_2020/SraRunTable_Jasinska.txt')
to_keep = ["Run", "Drug_condition_and_replicate",  "Sample Name"]
jsampleData = jsampleData[to_keep]
drugs = jsampleData.Drug_condition_and_replicate.str.split(" r", expand=True)
drugs.columns = ['drug_condition', 'replicate']
names = jsampleData['Sample Name'].str.split('_', expand=True)
names.columns = ['exp', 'well', 'passage', 'subsample']
jsampleData = pd.concat([jsampleData, drugs, names], axis=1)
jsampleData = jsampleData.drop(['Drug_condition_and_replicate', 'Sample Name'], axis=1)
jsampleData['passage'] = jsampleData.passage.str.split("-", expand=True)[1].astype(int)
jsampleData['generation'] = jsampleData['passage']*6

In [None]:
# no_drug_samples = jsampleData[jsampleData.drug_condition == 'No drug'].Run.unique()
# with open(root/"jasinska_2020/no_drug_samples.tsv", 'w') as fo:
#     for s in no_drug_samples:
#         fo.write(f"{s}_mbarq_counts.csv\n")

# tmp_samples = jsampleData[jsampleData.drug_condition == 'Low TMP'].Run.unique()
# with open(root/"jasinska_2020/low_tmp_samples.tsv", 'w') as fo:
#     for s in tmp_samples:
#         fo.write(f"{s}_mbarq_counts.csv\n")
# Running merge on each of these seperately

In [None]:
noDrugsFile = root/jasinskaConfig['noDrugsFile']
lowTMPFile = root/jasinskaConfig['lowTMPFile']

In [None]:
def process_frequency_df(fileName, sd):
    df = pd.read_csv(fileName)
    df = df.set_index('barcode')
    df = df/df.sum()*100
    df = df.reset_index()
    df = df.melt(id_vars='barcode', var_name='Run', value_name = 'RelAb')
    df = sd.merge(df, on='Run', how='inner')
    return df

## Graph No Drug Condition

In [None]:
no_drug = process_frequency_df(noDrugsFile, jsampleData)

In [None]:
color_seq1 = [colors['maroon'], colors['teal'], colors['light_yellow'], 
             colors['darko'], colors['darkteal'], colors['brighto']] + alphabetClrs + ['grey']*997
color_seq2 = [colors['light_yellow'], colors['darko'], colors['grey'], 
             colors['brighto'], colors['darkteal'], colors['teal']] + alphabetClrs + ['grey']*997
color_seq3 = [colors['brighto'], colors['darkteal'], colors['darko'], 
             colors['maroon'], colors['teal'], colors['light_yellow']] + alphabetClrs + ['grey']*997

In [None]:
def graph_replicate(df,  color_seq, replicate= '1', f = 0.1):
    rep_df = df[df.replicate == replicate]
    rep_df = (rep_df.groupby(['barcode', 'generation']).RelAb.mean().reset_index())
    rep_df = rep_df.groupby('barcode').filter(lambda x: x['RelAb'].sum() > f)
    o = list(rep_df.groupby('barcode').RelAb.sum().sort_values(ascending=False).index)
    fig = px.area(rep_df, x='generation', y='RelAb', color='barcode', log_y=False,
                  labels = {'RelAb': 'Frequency', 'generation': 'Generation'},
       color_discrete_sequence = color_seq, 
       template='plotly_white', height=400, width=500,
             category_orders = {'barcode': o })
    fig.update_layout(showlegend=False)
    return fig

In [None]:
fig = graph_replicate(no_drug, color_seq1, '1')

In [None]:
fig.update_yaxes(title='Frequency')
fig.update_xaxes(title='Generation')


In [None]:
pio.write_image(fig, figuresDir/'26-09-22-no_drug_1.png', width=1000, height=600, scale=2)

In [None]:
fig = graph_replicate(no_drug, color_seq2, '2')

In [None]:
fig

In [None]:
pio.write_image(fig, figuresDir/'26-09-22-no_drug_2.png', width=1000, height=600, scale=2)

In [None]:
fig = graph_replicate(no_drug, color_seq3, '3')

In [None]:
fig

In [None]:
pio.write_image(fig, figuresDir/'26-09-22-no_drug_3.png', width=1000, height=600, scale=2)

## Graph Low TMP Condition

In [None]:
low_tmp = process_frequency_df(lowTMPFile, jsampleData)

In [None]:
color_seq4 = [px.colors.qualitative.Light24[0], px.colors.qualitative.Light24[5], 
              px.colors.qualitative.Dark24[19], px.colors.qualitative.Light24[11], 
              px.colors.qualitative.Light24[9]] +  alphabetClrs + ['grey']*997

In [None]:
fig = graph_replicate(low_tmp, color_seq4, '1', f=0.01)

In [None]:
fig

In [None]:
fig = graph_replicate(low_tmp, color_seq4, '2')

In [None]:
fig

In [None]:
fig = graph_replicate(low_tmp, color_seq4, '3')

In [None]:
fig