## 1 Functions and module

### 1.1 Modules

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import gridspec
import numpy as np
import seaborn as sns
import scipy.stats as stats
%matplotlib inline 
import scipy
import itertools
import importlib

In [2]:
pd.set_option('display.max_columns', None)
import matplotlib as mpl
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['pdf.fonttype'] = 42

### 1.2 Functions

In [9]:
def Cas12a_array_combination(input_df, n):
    """
    This function creates different combinations of gRNA and gene columns 
    based on the input DataFrame and number of guides/genes provided.
    
    Parameters:
    input_df (DataFrame): The input DataFrame containing gRNA and gene information.
    n (int): The number of gRNAs/genes columns to be combined.

    Returns:
    DataFrame: A new DataFrame with additional columns for ordered and unordered combinations.
    """
    # Copy the input DataFrame to avoid modifying the original
    temp_df = input_df.copy()

    # Generate the list of gRNA and gene column names dynamically based on n
    gRNA_columns = [f'gRNA{i}' for i in range(1, n + 1)]
    gene_columns = [f'Gene{i}' for i in range(1, n + 1)]

    # Generate combinations at gRNA and gene levels
    temp_df['gRNA_combination'] = temp_df[gRNA_columns].agg('_'.join, axis=1)
    temp_df['gRNA_combination_unordered'] = temp_df[gRNA_columns].agg(lambda row: '_'.join(sorted(row)), axis=1)

    temp_df['gene_combination'] = temp_df[gene_columns].agg('_'.join, axis=1)
    temp_df['gene_combination_unordered'] = temp_df[gene_columns].agg(lambda row: '_'.join(sorted(row)), axis=1)

    return temp_df


In [4]:
EP = importlib.import_module("UltraSeq_Projects.Cas12a_3guide_example.03_bootstrapping.main_code.epistasis_calculation")
importlib.reload(EP)

<module 'UltraSeq_Projects.Cas12a_3guide_example.03_bootstrapping.main_code.epistasis_calculation' from '/oak/stanford/scg/lab_mwinslow/Haiqing/UltraSeq_Projects/Cas12a_3guide_example/03_bootstrapping/main_code/epistasis_calculation.py'>

## 2 Input and output address

In [28]:
# input address
working_dir = "/labs/mwinslow/Haiqing/UltraSeq_Projects/Cas12a_3guide_example/03_bootstrapping/data/"
ref_address = working_dir + 'guide_reference-cas12a_triple_KO.csv'
intermediate_file_addres = working_dir + 'TripleKnockout_BT_NormalMethod_CE_KTHCas12a_KTCas12a_N100_R10_gene_combination_unordered_intermediate'
KT_intermediate_file_addres = working_dir + 'TripleKnockout_BT_NormalMethod_CE_KT_N100_R10_gene_combination_unordered_intermediate'
gene_combination_unordered_address = working_dir+'TripleKnockout_BT_NormalMethod_CE_KTHCas12a_KTCas12a_N100_R10_gene_combination_unordered_level_summary.csv'
KT_gene_combination_unordered_address = working_dir + 'TripleKnockout_BT_NormalMethod_CE_KT_N100_R10_gene_combination_unordered_level_summary.csv'

In [22]:
# output address 
df4_output_address = working_dir+'TripleKnockoutTable_for_gene_level_tumor_metrics_ignore_position.csv'
two_way_epistasis_output = working_dir+'TripleKnockoutTable_for_two_way_epistasis.csv'
three_way_epistasis_output = working_dir+'TripleKnockoutTable_for_three_way_epistasis.csv'

## 3 Data Input and simple QC

### 3.0 Read array reference info

In [13]:
ref_df = pd.read_csv(ref_address)
ref_df = ref_df.rename(columns={'Guide1_sequence':'gRNA1','Guide2_sequence':'gRNA2','Guide3_sequence':'gRNA3',
                               })
ref_df = Cas12a_array_combination(ref_df,3)

In [16]:
ref_list = ['gRNA_combination', 'gRNA1', 'gRNA2', 'gRNA3', 
            'Guide1', 'Guide2','Guide3', 'Gene1', 'Gene2', 'Gene3', 
            'gRNA_combination_unordered', 'gene_combination',
            'gene_combination_unordered','Array_category']

In [17]:
ref_df = ref_df[ref_list].drop_duplicates()

### 3.1 unordered gene level metrics

In [18]:
df4 = pd.read_csv(gene_combination_unordered_address)
df4 = df4.merge(ref_df[['gene_combination_unordered','Array_category']].drop_duplicates(), on = 'gene_combination_unordered', how ='left')

In [24]:
df4_KT = pd.read_csv(KT_gene_combination_unordered_address)
df4_KT = df4_KT.merge(ref_df[['gene_combination_unordered','Array_category']].drop_duplicates(), on = 'gene_combination_unordered', how ='left')

### 3.2 Intermediate data

In [None]:
IT_df = pd.read_csv(intermediate_file_addres)
IT_df = IT_df.merge(ref_df[['gene_combination_unordered','Array_category']].drop_duplicates(), on = 'gene_combination_unordered', how ='left')

In [29]:
KT_IT_df = pd.read_csv(KT_intermediate_file_addres)
KT_IT_df = KT_IT_df.merge(ref_df[['gene_combination_unordered','Array_category']].drop_duplicates(), on = 'gene_combination_unordered', how ='left')

## 4 Data processing

### 4.1 Two-way epistasis

In [30]:
query_df = IT_df[IT_df.Array_category.isin(['Triple_TSG','Double_TSG','Single_TSG'])].copy()
query_category_list = ['Double_TSG']
trait_of_interest = 'LN_mean_relative'

In [31]:
aggregated_two_way_results = EP.perform_twoway_epistasis_analysis(query_df,trait_of_interest,query_category_list)

In [33]:
aggregated_two_way_results.head()

Unnamed: 0,Bootstrap_id,gene_combination_unordered,gene_a,gene_b,ko_a_in_wt,ko_b_in_wt,ko_a_in_b_ko,ko_b_in_a_ko,observed_double_ko,expected_double_ko,epistasis,ko_a_in_wt_95P,ko_a_in_wt_5P,ko_a_in_wt_fraction_greater_than_one,ko_a_in_wt_bootstrap_median,ko_a_in_wt_bootstrap_mean,ko_a_in_wt_97.5P,ko_a_in_wt_2.5P,ko_b_in_wt_95P,ko_b_in_wt_5P,ko_b_in_wt_fraction_greater_than_one,ko_b_in_wt_bootstrap_median,ko_b_in_wt_bootstrap_mean,ko_b_in_wt_97.5P,ko_b_in_wt_2.5P,ko_a_in_b_ko_95P,ko_a_in_b_ko_5P,ko_a_in_b_ko_fraction_greater_than_one,ko_a_in_b_ko_bootstrap_median,ko_a_in_b_ko_bootstrap_mean,ko_a_in_b_ko_97.5P,ko_a_in_b_ko_2.5P,ko_b_in_a_ko_95P,ko_b_in_a_ko_5P,ko_b_in_a_ko_fraction_greater_than_one,ko_b_in_a_ko_bootstrap_median,ko_b_in_a_ko_bootstrap_mean,ko_b_in_a_ko_97.5P,ko_b_in_a_ko_2.5P,observed_double_ko_95P,observed_double_ko_5P,observed_double_ko_fraction_greater_than_one,observed_double_ko_bootstrap_median,observed_double_ko_bootstrap_mean,observed_double_ko_97.5P,observed_double_ko_2.5P,expected_double_ko_95P,expected_double_ko_5P,expected_double_ko_fraction_greater_than_one,expected_double_ko_bootstrap_median,expected_double_ko_bootstrap_mean,expected_double_ko_97.5P,expected_double_ko_2.5P,epistasis_95P,epistasis_5P,epistasis_fraction_greater_than_one,epistasis_bootstrap_median,epistasis_bootstrap_mean,epistasis_97.5P,epistasis_2.5P,ko_a_in_wt_pvalue,ko_a_in_wt_pvalue_twoside,ko_b_in_wt_pvalue,ko_b_in_wt_pvalue_twoside,ko_a_in_b_ko_pvalue,ko_a_in_b_ko_pvalue_twoside,ko_b_in_a_ko_pvalue,ko_b_in_a_ko_pvalue_twoside,observed_double_ko_pvalue,observed_double_ko_pvalue_twoside,expected_double_ko_pvalue,expected_double_ko_pvalue_twoside,epistasis_pvalue,epistasis_pvalue_twoside
0,Real,Arid1a_Keap1_Safe,Arid1a,Keap1,0.311802,-0.149143,0.464486,0.003541,0.315343,0.162659,0.152684,0.590047,0.149204,1.0,0.380194,0.353868,0.605554,0.135307,0.046646,-0.362522,0.2,-0.153314,-0.154076,0.056323,-0.385161,0.623815,0.22794,1.0,0.467101,0.442567,0.645589,0.218303,0.276366,-0.33277,0.3,-0.116456,-0.065377,0.363242,-0.353205,0.497599,0.094888,1.0,0.268949,0.288491,0.534564,0.077769,0.527341,-0.046477,0.8,0.19889,0.199792,0.534601,-0.063345,0.35859,-0.2012,0.7,0.116805,0.0887,0.371355,-0.213412,0.0,0.0,0.2,0.4,0.0,0.0,0.3,0.6,0.0,0.0,0.2,0.4,0.3,0.6
1,Real,Arid1a_Lkb1_Safe,Arid1a,Lkb1,0.311802,0.82098,0.089595,0.598772,0.910574,1.132781,-0.222207,0.590047,0.149204,1.0,0.380194,0.353868,0.605554,0.135307,1.062114,0.657753,1.0,0.826344,0.839119,1.10464,0.628029,0.55267,-0.273532,0.7,0.136007,0.11035,0.631535,-0.296742,0.889112,0.267042,1.0,0.592556,0.595601,0.89763,0.245508,1.210423,0.747036,1.0,0.937721,0.949469,1.259564,0.744201,1.492757,0.974221,1.0,1.112413,1.192987,1.504451,0.963323,0.231359,-0.657524,0.3,-0.263448,-0.243518,0.269601,-0.673228,0.0,0.0,0.0,0.0,0.3,0.6,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.6
2,Real,Arid1a_Mga_Safe,Arid1a,Mga,0.311802,0.31645,-0.016547,-0.011899,0.299903,0.628252,-0.328349,0.590047,0.149204,1.0,0.380194,0.353868,0.605554,0.135307,0.461729,0.164522,1.0,0.303325,0.298799,0.473569,0.158445,0.153646,-0.077502,0.6,0.024554,0.022506,0.162638,-0.077832,0.146335,-0.229912,0.4,-0.007865,-0.032564,0.164414,-0.249806,0.408828,0.234263,1.0,0.323846,0.321304,0.418025,0.222803,0.879771,0.449117,1.0,0.586228,0.652666,0.890155,0.42748,-0.162811,-0.493568,0.0,-0.325783,-0.331362,-0.148371,-0.511623,0.0,0.0,0.0,0.0,0.4,0.8,0.4,0.8,0.0,0.0,0.0,0.0,0.0,0.0
3,Real,Arid1a_Rb1_Safe,Arid1a,Rb1,0.311802,0.505743,0.586912,0.780853,1.092655,0.817545,0.27511,0.590047,0.149204,1.0,0.380194,0.353868,0.605554,0.135307,0.706305,0.17149,1.0,0.404507,0.419655,0.725485,0.157495,0.819489,0.561661,1.0,0.652651,0.671069,0.84521,0.560477,1.026898,0.467434,1.0,0.698315,0.736856,1.063991,0.434535,1.359325,0.912913,1.0,1.065904,1.090724,1.414695,0.896493,1.1675,0.409612,1.0,0.728975,0.773523,1.189531,0.33726,0.554126,0.002578,0.9,0.35524,0.317201,0.584645,-0.001445,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.2
4,Real,Arid1a_Rbm10_Safe,Arid1a,Rbm10,0.311802,0.275509,0.133162,0.09687,0.408672,0.587311,-0.17864,0.590047,0.149204,1.0,0.380194,0.353868,0.605554,0.135307,0.457215,0.099644,1.0,0.267279,0.278446,0.469084,0.086501,0.269484,-0.119678,0.8,0.171129,0.122969,0.279507,-0.149917,0.246854,-0.148251,0.7,0.04738,0.047547,0.254003,-0.158975,0.517056,0.281069,1.0,0.412041,0.401415,0.524434,0.272998,0.962165,0.322526,1.0,0.621103,0.632314,0.971253,0.308948,-0.015951,-0.586049,0.0,-0.156961,-0.230899,-0.010438,-0.591974,0.0,0.0,0.0,0.0,0.2,0.4,0.3,0.6,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
aggregated_two_way_results.head()

Unnamed: 0,Bootstrap_id,gene_combination_unordered,gene_a,gene_b,ko_a_in_wt,ko_b_in_wt,ko_a_in_b_ko,ko_b_in_a_ko,observed_double_ko,expected_double_ko,epistasis,ko_a_in_wt_95P,ko_a_in_wt_5P,ko_a_in_wt_fraction_greater_than_one,ko_a_in_wt_bootstrap_median,ko_a_in_wt_bootstrap_mean,ko_a_in_wt_97.5P,ko_a_in_wt_2.5P,ko_b_in_wt_95P,ko_b_in_wt_5P,ko_b_in_wt_fraction_greater_than_one,ko_b_in_wt_bootstrap_median,ko_b_in_wt_bootstrap_mean,ko_b_in_wt_97.5P,ko_b_in_wt_2.5P,ko_a_in_b_ko_95P,ko_a_in_b_ko_5P,ko_a_in_b_ko_fraction_greater_than_one,ko_a_in_b_ko_bootstrap_median,ko_a_in_b_ko_bootstrap_mean,ko_a_in_b_ko_97.5P,ko_a_in_b_ko_2.5P,ko_b_in_a_ko_95P,ko_b_in_a_ko_5P,ko_b_in_a_ko_fraction_greater_than_one,ko_b_in_a_ko_bootstrap_median,ko_b_in_a_ko_bootstrap_mean,ko_b_in_a_ko_97.5P,ko_b_in_a_ko_2.5P,observed_double_ko_95P,observed_double_ko_5P,observed_double_ko_fraction_greater_than_one,observed_double_ko_bootstrap_median,observed_double_ko_bootstrap_mean,observed_double_ko_97.5P,observed_double_ko_2.5P,expected_double_ko_95P,expected_double_ko_5P,expected_double_ko_fraction_greater_than_one,expected_double_ko_bootstrap_median,expected_double_ko_bootstrap_mean,expected_double_ko_97.5P,expected_double_ko_2.5P,epistasis_95P,epistasis_5P,epistasis_fraction_greater_than_one,epistasis_bootstrap_median,epistasis_bootstrap_mean,epistasis_97.5P,epistasis_2.5P,ko_a_in_wt_pvalue,ko_a_in_wt_pvalue_twoside,ko_b_in_wt_pvalue,ko_b_in_wt_pvalue_twoside,ko_a_in_b_ko_pvalue,ko_a_in_b_ko_pvalue_twoside,ko_b_in_a_ko_pvalue,ko_b_in_a_ko_pvalue_twoside,observed_double_ko_pvalue,observed_double_ko_pvalue_twoside,expected_double_ko_pvalue,expected_double_ko_pvalue_twoside,epistasis_pvalue,epistasis_pvalue_twoside
0,Real,Arid1a_Keap1_Safe,Arid1a,Keap1,0.311802,-0.149143,0.464486,0.003541,0.315343,0.162659,0.152684,0.590047,0.149204,1.0,0.380194,0.353868,0.605554,0.135307,0.046646,-0.362522,0.2,-0.153314,-0.154076,0.056323,-0.385161,0.623815,0.22794,1.0,0.467101,0.442567,0.645589,0.218303,0.276366,-0.33277,0.3,-0.116456,-0.065377,0.363242,-0.353205,0.497599,0.094888,1.0,0.268949,0.288491,0.534564,0.077769,0.527341,-0.046477,0.8,0.19889,0.199792,0.534601,-0.063345,0.35859,-0.2012,0.7,0.116805,0.0887,0.371355,-0.213412,0.0,0.0,0.2,0.4,0.0,0.0,0.3,0.6,0.0,0.0,0.2,0.4,0.3,0.6
1,Real,Arid1a_Lkb1_Safe,Arid1a,Lkb1,0.311802,0.82098,0.089595,0.598772,0.910574,1.132781,-0.222207,0.590047,0.149204,1.0,0.380194,0.353868,0.605554,0.135307,1.062114,0.657753,1.0,0.826344,0.839119,1.10464,0.628029,0.55267,-0.273532,0.7,0.136007,0.11035,0.631535,-0.296742,0.889112,0.267042,1.0,0.592556,0.595601,0.89763,0.245508,1.210423,0.747036,1.0,0.937721,0.949469,1.259564,0.744201,1.492757,0.974221,1.0,1.112413,1.192987,1.504451,0.963323,0.231359,-0.657524,0.3,-0.263448,-0.243518,0.269601,-0.673228,0.0,0.0,0.0,0.0,0.3,0.6,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.6
2,Real,Arid1a_Mga_Safe,Arid1a,Mga,0.311802,0.31645,-0.016547,-0.011899,0.299903,0.628252,-0.328349,0.590047,0.149204,1.0,0.380194,0.353868,0.605554,0.135307,0.461729,0.164522,1.0,0.303325,0.298799,0.473569,0.158445,0.153646,-0.077502,0.6,0.024554,0.022506,0.162638,-0.077832,0.146335,-0.229912,0.4,-0.007865,-0.032564,0.164414,-0.249806,0.408828,0.234263,1.0,0.323846,0.321304,0.418025,0.222803,0.879771,0.449117,1.0,0.586228,0.652666,0.890155,0.42748,-0.162811,-0.493568,0.0,-0.325783,-0.331362,-0.148371,-0.511623,0.0,0.0,0.0,0.0,0.4,0.8,0.4,0.8,0.0,0.0,0.0,0.0,0.0,0.0
3,Real,Arid1a_Rb1_Safe,Arid1a,Rb1,0.311802,0.505743,0.586912,0.780853,1.092655,0.817545,0.27511,0.590047,0.149204,1.0,0.380194,0.353868,0.605554,0.135307,0.706305,0.17149,1.0,0.404507,0.419655,0.725485,0.157495,0.819489,0.561661,1.0,0.652651,0.671069,0.84521,0.560477,1.026898,0.467434,1.0,0.698315,0.736856,1.063991,0.434535,1.359325,0.912913,1.0,1.065904,1.090724,1.414695,0.896493,1.1675,0.409612,1.0,0.728975,0.773523,1.189531,0.33726,0.554126,0.002578,0.9,0.35524,0.317201,0.584645,-0.001445,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.2
4,Real,Arid1a_Rbm10_Safe,Arid1a,Rbm10,0.311802,0.275509,0.133162,0.09687,0.408672,0.587311,-0.17864,0.590047,0.149204,1.0,0.380194,0.353868,0.605554,0.135307,0.457215,0.099644,1.0,0.267279,0.278446,0.469084,0.086501,0.269484,-0.119678,0.8,0.171129,0.122969,0.279507,-0.149917,0.246854,-0.148251,0.7,0.04738,0.047547,0.254003,-0.158975,0.517056,0.281069,1.0,0.412041,0.401415,0.524434,0.272998,0.962165,0.322526,1.0,0.621103,0.632314,0.971253,0.308948,-0.015951,-0.586049,0.0,-0.156961,-0.230899,-0.010438,-0.591974,0.0,0.0,0.0,0.0,0.2,0.4,0.3,0.6,0.0,0.0,0.0,0.0,0.0,0.0


### 4.2 Three-way epistasis

In [35]:
query_df = IT_df[IT_df.Array_category.isin(['Triple_TSG','Double_TSG','Single_TSG'])].copy()
query_category_list = ['Triple_TSG']
trait_of_interest = 'LN_mean_relative'

In [36]:
aggregated_three_way_results = EP.perform_threeway_epistasis_analysis(query_df,trait_of_interest,query_category_list)

In [37]:
aggregated_three_way_results.head()

Unnamed: 0,Bootstrap_id,gene_combination_unordered,gene_a,gene_b,gene_c,ko_a_in_wt,ko_b_in_wt,ko_c_in_wt,ko_ab_in_wt,ko_ac_in_wt,ko_bc_in_wt,observed_triple_ko,expected_triple_ko_linear,expected_triple_ko,epistasis,ko_a_in_wt_95P,ko_a_in_wt_5P,ko_a_in_wt_fraction_greater_than_one,ko_a_in_wt_bootstrap_median,ko_a_in_wt_bootstrap_mean,ko_a_in_wt_97.5P,ko_a_in_wt_2.5P,ko_b_in_wt_95P,ko_b_in_wt_5P,ko_b_in_wt_fraction_greater_than_one,ko_b_in_wt_bootstrap_median,ko_b_in_wt_bootstrap_mean,ko_b_in_wt_97.5P,ko_b_in_wt_2.5P,ko_c_in_wt_95P,ko_c_in_wt_5P,ko_c_in_wt_fraction_greater_than_one,ko_c_in_wt_bootstrap_median,ko_c_in_wt_bootstrap_mean,ko_c_in_wt_97.5P,ko_c_in_wt_2.5P,ko_ab_in_wt_95P,ko_ab_in_wt_5P,ko_ab_in_wt_fraction_greater_than_one,ko_ab_in_wt_bootstrap_median,ko_ab_in_wt_bootstrap_mean,ko_ab_in_wt_97.5P,ko_ab_in_wt_2.5P,ko_ac_in_wt_95P,ko_ac_in_wt_5P,ko_ac_in_wt_fraction_greater_than_one,ko_ac_in_wt_bootstrap_median,ko_ac_in_wt_bootstrap_mean,ko_ac_in_wt_97.5P,ko_ac_in_wt_2.5P,ko_bc_in_wt_95P,ko_bc_in_wt_5P,ko_bc_in_wt_fraction_greater_than_one,ko_bc_in_wt_bootstrap_median,ko_bc_in_wt_bootstrap_mean,ko_bc_in_wt_97.5P,ko_bc_in_wt_2.5P,observed_triple_ko_95P,observed_triple_ko_5P,observed_triple_ko_fraction_greater_than_one,observed_triple_ko_bootstrap_median,observed_triple_ko_bootstrap_mean,observed_triple_ko_97.5P,observed_triple_ko_2.5P,expected_triple_ko_linear_95P,expected_triple_ko_linear_5P,expected_triple_ko_linear_fraction_greater_than_one,expected_triple_ko_linear_bootstrap_median,expected_triple_ko_linear_bootstrap_mean,expected_triple_ko_linear_97.5P,expected_triple_ko_linear_2.5P,expected_triple_ko_95P,expected_triple_ko_5P,expected_triple_ko_fraction_greater_than_one,expected_triple_ko_bootstrap_median,expected_triple_ko_bootstrap_mean,expected_triple_ko_97.5P,expected_triple_ko_2.5P,epistasis_95P,epistasis_5P,epistasis_fraction_greater_than_one,epistasis_bootstrap_median,epistasis_bootstrap_mean,epistasis_97.5P,epistasis_2.5P,ko_a_in_wt_pvalue,ko_a_in_wt_pvalue_twoside,ko_b_in_wt_pvalue,ko_b_in_wt_pvalue_twoside,ko_c_in_wt_pvalue,ko_c_in_wt_pvalue_twoside,ko_ab_in_wt_pvalue,ko_ab_in_wt_pvalue_twoside,ko_ac_in_wt_pvalue,ko_ac_in_wt_pvalue_twoside,ko_bc_in_wt_pvalue,ko_bc_in_wt_pvalue_twoside,observed_triple_ko_pvalue,observed_triple_ko_pvalue_twoside,expected_triple_ko_linear_pvalue,expected_triple_ko_linear_pvalue_twoside,expected_triple_ko_pvalue,expected_triple_ko_pvalue_twoside,epistasis_pvalue,epistasis_pvalue_twoside
0,Real,Arid1a_Keap1_Lkb1,Arid1a,Keap1,Lkb1,0.311802,-0.149143,0.82098,0.315343,0.910574,0.568939,0.650045,0.983639,0.811218,-0.161173,0.590047,0.149204,1.0,0.380194,0.353868,0.605554,0.135307,0.046646,-0.362522,0.2,-0.153314,-0.154076,0.056323,-0.385161,1.062114,0.657753,1.0,0.826344,0.839119,1.10464,0.628029,0.497599,0.094888,1.0,0.268949,0.288491,0.534564,0.077769,1.210423,0.747036,1.0,0.937721,0.949469,1.259564,0.744201,0.740723,0.425719,1.0,0.576458,0.596438,0.740904,0.405637,0.896693,0.424655,1.0,0.646401,0.659377,0.919545,0.373242,1.355359,0.683506,1.0,1.060494,1.038911,1.356447,0.638284,1.272206,0.320489,1.0,0.828053,0.795488,1.400465,0.257496,0.417482,-0.763839,0.3,-0.093531,-0.136111,0.442703,-0.840962,0.0,0.0,0.2,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.6
1,Real,Arid1a_Keap1_Mga,Arid1a,Keap1,Mga,0.311802,-0.149143,0.31645,0.315343,0.299903,-0.122556,0.244819,0.479109,0.013581,0.231238,0.590047,0.149204,1.0,0.380194,0.353868,0.605554,0.135307,0.046646,-0.362522,0.2,-0.153314,-0.154076,0.056323,-0.385161,0.461729,0.164522,1.0,0.303325,0.298799,0.473569,0.158445,0.497599,0.094888,1.0,0.268949,0.288491,0.534564,0.077769,0.408828,0.234263,1.0,0.323846,0.321304,0.418025,0.222803,-0.072683,-0.238692,0.0,-0.128074,-0.138347,-0.067519,-0.250289,0.402536,0.191682,1.0,0.249662,0.27468,0.420594,0.188241,0.802742,0.144555,1.0,0.525762,0.49859,0.846501,0.143505,0.228113,-0.304534,0.4,-0.031632,-0.027142,0.237774,-0.307548,0.590784,0.022331,1.0,0.259245,0.301822,0.61672,0.022002,0.0,0.0,0.2,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.8,0.0,0.0
2,Real,Arid1a_Keap1_Rb1,Arid1a,Keap1,Rb1,0.311802,-0.149143,0.505743,0.315343,1.092655,0.361036,0.879816,0.668402,1.100632,-0.220816,0.590047,0.149204,1.0,0.380194,0.353868,0.605554,0.135307,0.046646,-0.362522,0.2,-0.153314,-0.154076,0.056323,-0.385161,0.706305,0.17149,1.0,0.404507,0.419655,0.725485,0.157495,0.497599,0.094888,1.0,0.268949,0.288491,0.534564,0.077769,1.359325,0.912913,1.0,1.065904,1.090724,1.414695,0.896493,0.494178,0.265655,1.0,0.409018,0.390757,0.494423,0.250874,1.049695,0.643706,1.0,0.86966,0.839823,1.069102,0.627264,1.090472,0.304848,1.0,0.552192,0.619446,1.145878,0.297388,1.465188,0.770703,1.0,1.213483,1.150526,1.469016,0.726215,0.108572,-0.793781,0.2,-0.218336,-0.310703,0.152386,-0.802719,0.0,0.0,0.2,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.4
3,Real,Arid1a_Keap1_Rbm10,Arid1a,Keap1,Rbm10,0.311802,-0.149143,0.275509,0.315343,0.408672,-0.034328,0.418046,0.438168,0.251518,0.166528,0.590047,0.149204,1.0,0.380194,0.353868,0.605554,0.135307,0.046646,-0.362522,0.2,-0.153314,-0.154076,0.056323,-0.385161,0.457215,0.099644,1.0,0.267279,0.278446,0.469084,0.086501,0.497599,0.094888,1.0,0.268949,0.288491,0.534564,0.077769,0.517056,0.281069,1.0,0.412041,0.401415,0.524434,0.272998,0.08208,-0.189745,0.3,-0.071371,-0.057648,0.101932,-0.198774,0.603775,0.291494,1.0,0.483206,0.464774,0.626993,0.258733,0.906351,0.211889,1.0,0.437182,0.478237,0.938207,0.181149,0.496342,-0.129762,0.7,0.115762,0.15402,0.559365,-0.138294,0.599533,0.046405,1.0,0.277136,0.310754,0.610286,0.037115,0.0,0.0,0.2,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.6,0.0,0.0,0.0,0.0,0.3,0.6,0.0,0.0
4,Real,Arid1a_Keap1_Tet2,Arid1a,Keap1,Tet2,0.311802,-0.149143,0.096692,0.315343,0.089916,-0.162257,0.086217,0.259351,-0.016348,0.102565,0.590047,0.149204,1.0,0.380194,0.353868,0.605554,0.135307,0.046646,-0.362522,0.2,-0.153314,-0.154076,0.056323,-0.385161,0.210614,-0.150409,0.8,0.103386,0.075738,0.211036,-0.157417,0.497599,0.094888,1.0,0.268949,0.288491,0.534564,0.077769,0.154457,-0.048183,0.8,0.085954,0.07476,0.162423,-0.062168,-0.040649,-0.225932,0.0,-0.129115,-0.140722,-0.037806,-0.231906,0.178583,-0.016927,0.8,0.094584,0.091686,0.18411,-0.023825,0.696312,-0.080121,0.9,0.224837,0.27553,0.723879,-0.112954,0.197894,-0.34582,0.4,-0.054907,-0.053,0.235664,-0.385063,0.471896,-0.107657,0.7,0.092806,0.144686,0.485136,-0.108485,0.0,0.0,0.2,0.4,0.2,0.4,0.0,0.0,0.2,0.4,0.0,0.0,0.2,0.4,0.1,0.2,0.4,0.8,0.3,0.6


### 4.3 Two-way epistasis for KT

In [38]:
query_df = KT_IT_df[KT_IT_df.Array_category.isin(['Triple_TSG','Double_TSG','Single_TSG'])].copy()
query_category_list = ['Double_TSG']
trait_of_interest = 'LN_mean_relative'

In [39]:
KT_aggregated_two_way_results = EP.perform_twoway_epistasis_analysis(query_df,trait_of_interest,query_category_list)

### 4.4 Three-way epistasis for KT

In [40]:
query_df = KT_IT_df[KT_IT_df.Array_category.isin(['Triple_TSG','Double_TSG','Single_TSG'])].copy()
query_category_list = ['Triple_TSG']
trait_of_interest = 'LN_mean_relative'

In [41]:
KT_aggregated_three_way_results = EP.perform_threeway_epistasis_analysis(query_df,trait_of_interest,query_category_list)

## 5 Generate output table for gene_combination_unordered

In [46]:
query_df = df4.copy()
query_df = query_df[query_df.Array_category.isin(['Triple_TSG', 'Double_TSG', 'Single_TSG','Triple_Inert'])]
query_df['Fitness'] = np.log2(query_df['LN_mean_relative'])
query_df['Fitness_bootstrap_median'] = np.log2(query_df['LN_mean_relative_bootstrap_median'])
query_df['Fitness_bootstrap_mean'] = np.log2(query_df['LN_mean_relative_bootstrap_mean'])
query_df['Fitness_97.5P'] = np.log2(query_df['LN_mean_relative_97.5P'])
query_df['Fitness_2.5P'] = np.log2(query_df['LN_mean_relative_2.5P'])
query_df['Fitness_pvalue_twoside'] = query_df['LN_mean_relative_pvalue_twoside']
query_df['Fitness_pvalue_twoside_FDR'] = EP.fdr(query_df['Fitness_pvalue_twoside'])
subset1 = query_df
subset1['Mouse_genotype'] ='KTCas12a'

In [47]:
query_df = df4_KT.copy()
query_df = query_df[query_df.Array_category.isin(['Triple_TSG', 'Double_TSG', 'Single_TSG','Triple_Inert'])]
query_df['Fitness'] = np.log2(query_df['LN_mean_relative'])
query_df['Fitness_bootstrap_median'] = np.log2(query_df['LN_mean_relative_bootstrap_median'])
query_df['Fitness_bootstrap_mean'] = np.log2(query_df['LN_mean_relative_bootstrap_mean'])
query_df['Fitness_97.5P'] = np.log2(query_df['LN_mean_relative_97.5P'])
query_df['Fitness_2.5P'] = np.log2(query_df['LN_mean_relative_2.5P'])
query_df['Fitness_pvalue_twoside'] = query_df['LN_mean_relative_pvalue_twoside']
query_df['Fitness_pvalue_twoside_FDR'] = EP.fdr(query_df['Fitness_pvalue_twoside'])
subset2 = query_df
subset2['Mouse_genotype'] ='KT'

In [48]:
temp_list = ['gene_combination_unordered','Array_category','Mouse_genotype']
query_list = ['Fitness']
trait_list = []
for x in query_list:
    temp = [x, f'{x}_bootstrap_median',f'{x}_bootstrap_mean',f'{x}_97.5P',f'{x}_2.5P',f'{x}_pvalue_twoside',f'{x}_pvalue_twoside_FDR']
    trait_list = trait_list+temp
df4_output = pd.concat([subset1,subset2],ignore_index=True)[temp_list+trait_list]

In [49]:
df4_output.to_csv(df4_output_address,index=False)

## 6 output two way epistasis

In [50]:
temp_list = ['gene_combination_unordered','gene_a','gene_b']
query_list = ['ko_a_in_wt','ko_b_in_wt','ko_a_in_b_ko','ko_b_in_a_ko','observed_double_ko','expected_double_ko','epistasis']
trait_list = []
for x in query_list:
    temp = [x,f'{x}_bootstrap_median',f'{x}_bootstrap_mean',f'{x}_97.5P',f'{x}_2.5P',f'{x}_pvalue_twoside']
    trait_list = trait_list+temp

In [51]:
subset1 =  aggregated_two_way_results[temp_list+trait_list].copy()

In [52]:
subset1['epistasis_pvalue_twoside_FDR'] = EP.fdr(subset1['epistasis_pvalue_twoside'])

In [53]:
subset1['Mouse_genotype'] ='KTCas12a'

In [54]:
temp_list = ['gene_combination_unordered','gene_a','gene_b']
query_list = ['ko_a_in_wt','ko_b_in_wt','ko_a_in_b_ko','ko_b_in_a_ko','observed_double_ko','expected_double_ko','epistasis']
trait_list = []
for x in query_list:
    temp = [x,f'{x}_bootstrap_median',f'{x}_bootstrap_mean',f'{x}_97.5P',f'{x}_2.5P',f'{x}_pvalue_twoside']
    trait_list = trait_list+temp

In [55]:
subset2 =  KT_aggregated_two_way_results[temp_list+trait_list].copy()

In [56]:
subset2['epistasis_pvalue_twoside_FDR'] = EP.fdr(subset2['epistasis_pvalue_twoside'])

In [57]:
subset2['Mouse_genotype'] ='KT'

In [58]:
two_way_output = pd.concat([subset1,subset2],ignore_index=True)

In [59]:
two_way_output.to_csv(two_way_epistasis_output,index=False)

## 7 output three way epistasis

In [60]:
temp_list = ['gene_combination_unordered','gene_a','gene_b','gene_c']
query_list = [ 'ko_a_in_wt', 'ko_b_in_wt', 'ko_c_in_wt', 
              'ko_ab_in_wt','ko_ac_in_wt', 'ko_bc_in_wt', 
              'observed_triple_ko','expected_triple_ko_linear', 'expected_triple_ko', 'epistasis']
trait_list = []
for x in query_list:
    temp = [x,f'{x}_bootstrap_median',f'{x}_bootstrap_mean',f'{x}_97.5P',f'{x}_2.5P',f'{x}_pvalue_twoside']
    trait_list = trait_list+temp

In [61]:
subset1 =  aggregated_three_way_results[temp_list+trait_list].copy()

In [62]:
subset1['epistasis_pvalue_twoside_FDR'] = EP.fdr(subset1['epistasis_pvalue_twoside'])
subset1['Mouse_genotype'] ='KTCas12a'

In [63]:
temp_list = ['gene_combination_unordered','gene_a','gene_b','gene_c']
query_list = [ 'ko_a_in_wt', 'ko_b_in_wt', 'ko_c_in_wt', 
              'ko_ab_in_wt','ko_ac_in_wt', 'ko_bc_in_wt', 
              'observed_triple_ko','expected_triple_ko_linear', 'expected_triple_ko', 'epistasis']
trait_list = []
for x in query_list:
    temp = [x,f'{x}_bootstrap_median',f'{x}_bootstrap_mean',f'{x}_97.5P',f'{x}_2.5P',f'{x}_pvalue_twoside']
    trait_list = trait_list+temp

In [64]:
subset2 =  KT_aggregated_three_way_results[temp_list+trait_list].copy()

In [65]:
subset2['epistasis_pvalue_twoside_FDR'] = EP.fdr(subset2['epistasis_pvalue_twoside'])
subset2['Mouse_genotype'] ='KT'

In [66]:
three_way_output = pd.concat([subset1,subset2],ignore_index=True)

In [67]:
three_way_output.to_csv(three_way_epistasis_output,index=False)