# Merge filter files
Each DBTL file proteomics analysis (DBTL[1-6]_A_analyze_proteomics.ipynb) generates a file containing the filtering criteria and results for each strain from that cycle.  In this notebook I merge them to generate plots of all cycles.

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
file_names = [
    './DBTL0_results/dbtl0_df_target.csv',
    'dbtl1_figures_and_analysis/dbtl1_df_target.csv',
    'dbtl2_figures_and_analysis/dbtl2_df_target.csv', 
    'dbtl3_figures_and_analysis/dbtl3_df_target.csv',
    'dbtl4_figures_and_analysis/dbtl4_df_target.csv', 
    'dbtl5_figures_and_analysis/dbtl5_df_target.csv',
    'dbtl6_figures_and_analysis/dbtl6_df_target.csv'
]
dfs = []
c = []
for i, f in enumerate(file_names):
    df = pd.read_csv(f, index_col = 0)
    df['cycle'] = i
    dfs.append(df)
    c.append(df.columns)

In [4]:
for ci in c:
    assert all(ci == c[0]), 'ERROR: one column set is not the same'

In [5]:
c[0]

Index(['Line Name', 'rep', 'is_control', 'line_name_norep', 'number_of_grna',
       'isoprenol', 'dCas9', 'dCas9_normtocontrol', 'target_1', 'target_2',
       'target_3', 'target_4', 'target_1_value', 'target_2_value',
       'target_3_value', 'target_4_value', 'target_1_control',
       'target_2_control', 'target_3_control', 'target_4_control',
       'target_1_norm', 'target_2_norm', 'target_3_norm', 'target_4_norm',
       'has_missing_control', 'above_dcas9_thresh', 'target_1_belowthresh',
       'target_2_belowthresh', 'target_3_belowthresh', 'target_4_belowthresh',
       'all_targets_belowthresh', 'all_reps_belowthresh',
       'all_reps_above_dcas9_thresh', 'to_keep', 'to_keep_improved', 'cycle'],
      dtype='object')

In [6]:
df = pd.concat(dfs, axis = 0, ignore_index = True)
print(f'Concat df shape: {df.shape}')

Concat df shape: (1506, 36)


Reorder the columns to put the 'cycle' column near the front and drop unneeded columns.

In [7]:
old_cols = list(df.columns)
new_cols = old_cols[0:1] + [old_cols[-1]] + old_cols[2:-1]
df = df[new_cols]
df.head(3)

Unnamed: 0,Line Name,cycle,is_control,line_name_norep,number_of_grna,isoprenol,dCas9,dCas9_normtocontrol,target_1,target_2,...,above_dcas9_thresh,target_1_belowthresh,target_2_belowthresh,target_3_belowthresh,target_4_belowthresh,all_targets_belowthresh,all_reps_belowthresh,all_reps_above_dcas9_thresh,to_keep,to_keep_improved
0,Control-R1,0,True,Control,0,177.337,23396190.6,1.007405,,,...,True,False,False,False,False,True,True,True,,True
1,Control-R10,0,True,Control,0,176.13507,28115139.4,1.275427,,,...,True,False,False,False,False,True,True,True,,True
2,Control-R11,0,True,Control,0,192.63713,24151481.4,1.095618,,,...,True,False,False,False,False,True,True,True,,True


In [8]:
df.to_csv('df_target_all.csv')