In [2]:
import pandas as pd
pd.options.display.max_rows = 1000

knock_out_data = pd.read_excel('KO_data.xlsx')

In [3]:
knock_out_data.drop(['Unnamed: 0', 'growth_control', 'r-Prot_control', 'normalized_r-Prot_control'], axis=1, inplace=True)

In [10]:
strains_to_drop = ['287', '268', '269', '273', '274', '275', '288', '290', '295', '301', '302', '277', '278', 'medium', '2900', '283', '284']
knock_out_data = knock_out_data[~knock_out_data['strain'].isin(strains_to_drop)]

knock_out_data.to_excel('KO_data_filtered.xlsx')

In [5]:
knock_out_means = knock_out_data.groupby('cassette')[['FC_r-Prot', 'FC_normalized_r-Prot']].agg('mean')
knock_out_means['TF number'] = ['TF124', 'TF128', 'TF054', 'TF037', 'TF006', 'TF120', 'TF068', 'TF126', 'TF042', 'control', 'TF080', 'TF033']


In [6]:
knock_out_means.to_excel('final_KO.xlsx')

In [7]:
knock_out_data.rename(columns={'r-Prot': 'rProt', 'normalized_r-Prot': 'normalized_rProt'}, inplace=True)
print(knock_out_data.head())


  strain cassette  growth  rProt  normalized_rProt  FC_growth  FC_r-Prot  \
0    259     GZF1  21.190  61640       2908.919302   1.075635   1.955584   
1    259     GZF1  21.190  61640       2908.919302   1.333711   2.756708   
2    260     HSF1  20.180  59220       2934.588702   1.024365   1.878807   
3    260     HSF1  20.180  59220       2934.588702   1.270141   2.648479   
4    261     SKN7  20.054  47540       2370.599382   1.017970   1.508249   

   FC_normalized_r-Prot  
0              1.818075  
1              2.066946  
2              1.834118  
3              2.085185  
4              1.481625  


ANOVA

In [8]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Perform ANOVA on FL
model_FL = ols('rProt ~ C(cassette)', data=knock_out_data).fit()
anova_table_FL = sm.stats.anova_lm(model_FL, typ=2)
print("ANOVA Table:")
print(anova_table_FL)

# Perform post-hoc analysis for FL
tukey_FL = pairwise_tukeyhsd(endog=knock_out_data['rProt'], groups=knock_out_data['cassette'], alpha=0.05)
print("Tukey's HSD results:")
print(tukey_FL)


ANOVA Table:
                   sum_sq     df          F        PR(>F)
C(cassette)  2.053512e+10   11.0  55.421169  2.800442e-39
Residual     3.637916e+09  108.0        NaN           NaN
Tukey's HSD results:
        Multiple Comparison of Means - Tukey HSD, FWER=0.05        
 group1   group2    meandiff  p-adj     lower       upper    reject
-------------------------------------------------------------------
    AZF1     DEP1  36859.1667    0.0  29454.4281  44263.9053   True
    AZF1     EUF1     15030.0    0.0   7114.0015  22945.9985   True
    AZF1     GZF1  34586.6667    0.0  23391.7542  45781.5792   True
    AZF1     HAC1   4556.6667 0.9685  -6638.2458  15751.5792  False
    AZF1     HAP1  13324.1667    0.0   5919.4281  20728.9053   True
    AZF1     HSF1  21816.6667    0.0  10621.7542  33011.5792   True
    AZF1     KLF1   9136.6667 0.2279  -2058.2458  20331.5792  False
    AZF1     MIG1     -2525.0 0.9853  -9380.4558   4330.4558  False
    AZF1 RedStar+  -3313.3333 0.9977 -14508.

In [9]:
# Perform ANOVA on FL
model_sFL = ols('normalized_rProt ~ C(cassette)', data=knock_out_data).fit()
anova_table_sFL = sm.stats.anova_lm(model_FL, typ=2)
print("ANOVA Table:")
print(anova_table_sFL)

# Perform post-hoc analysis for FL
tukey_sFL = pairwise_tukeyhsd(endog=knock_out_data['normalized_rProt'], groups=knock_out_data['cassette'], alpha=0.05)
print("Tukey's HSD results:")
print(tukey_sFL)

ANOVA Table:
                   sum_sq     df          F        PR(>F)
C(cassette)  2.053512e+10   11.0  55.421169  2.800442e-39
Residual     3.637916e+09  108.0        NaN           NaN
Tukey's HSD results:
      Multiple Comparison of Means - Tukey HSD, FWER=0.05       
 group1   group2   meandiff  p-adj    lower      upper    reject
----------------------------------------------------------------
    AZF1     DEP1  1934.7904    0.0  1591.4918  2278.0891   True
    AZF1     EUF1   675.7737    0.0   308.7721  1042.7754   True
    AZF1     GZF1  1458.8585    0.0   939.8398  1977.8773   True
    AZF1     HAC1   217.1022 0.9617  -301.9166    736.121  False
    AZF1     HAP1   611.7429    0.0   268.4442   955.0415   True
    AZF1     HSF1  1033.5756    0.0   514.5568  1552.5943   True
    AZF1     KLF1   434.3883 0.1966   -84.6305   953.4071  False
    AZF1     MIG1    70.5855 0.9998  -247.2473   388.4183  False
    AZF1 RedStar+    59.4287    1.0    -459.59   578.4475  False
    AZF1    