In [53]:
import pandas as pd
import numpy as np
from scipy.stats import normaltest
import scipy.stats as stats

In [7]:
df = pd.read_csv("../../resources/measurements/20220922-RNAcopies_12hpi_alldata.csv", sep=";")

In [8]:
df

Unnamed: 0,Genotype,Technical replicate,"RNA copies,"
0,P0,1,1815425123
1,P0,2,6809966917
2,P0,3,1451593061
3,KOa1,1,1019975811
4,KOa1,2,223996319
...,...,...,...
88,OEd10,2,2476029923
89,OEd10,3,1865177534
90,OEe10,1,2009546199
91,OEe10,2,1757186753


In [43]:
def f_passage(row):
    if row["Genotype"]=="P0":
        return 0
    if row["Genotype"][-1]=="1":
        return 1
    if (row["Genotype"][-1]=="0") & (row["Genotype"][-2]=="1"):
        return 10

def f_genotype(row):
    if row["Genotype"]=="P0":
        return "P0"
    if row["Genotype"][0]=="O":
        return "OE"
    if row["Genotype"][0]=="K":
        return "KO"  
    if row["Genotype"][0]=="W":
        return "WT" 
    
def f_rna_copies(x):
    if x == ",":
        return np.nan
    else:
        return float(x.replace(",", "."))
    
def f_replicate(row):
    if row["Genotype"]=="P0":
        return "a"
    else:
        return row["Genotype"][2]
    
df["passage"] = df.apply(f_passage, axis=1)
df["genotype"] = df.apply(f_genotype, axis=1)
df["RNA_copies"] = df["RNA copies,"].apply(f_rna_copies)
df["biological_replicate"] = df.apply(f_replicate, axis=1)

In [44]:
df

Unnamed: 0,Genotype,Technical replicate,"RNA copies,",passage,genotype,RNA_copies,biological_replicate
0,P0,1,1815425123,0,P0,1.815425e+05,a
1,P0,2,6809966917,0,P0,6.809967e+05,a
2,P0,3,1451593061,0,P0,1.451593e+05,a
3,KOa1,1,1019975811,1,KO,1.019976e+05,a
4,KOa1,2,223996319,1,KO,2.239963e+04,a
...,...,...,...,...,...,...,...
88,OEd10,2,2476029923,10,OE,2.476030e+07,d
89,OEd10,3,1865177534,10,OE,1.865178e+07,d
90,OEe10,1,2009546199,10,OE,2.009546e+07,e
91,OEe10,2,1757186753,10,OE,1.757187e+07,e


In [46]:
df_pivot = pd.pivot_table(data=df, 
               values="RNA_copies",
               index=["passage", "genotype", "Technical replicate"],
               aggfunc='mean')

df_pivot = df_pivot.reset_index()
df_pivot.head(5)

Unnamed: 0,passage,genotype,Technical replicate,RNA_copies
0,0,P0,1,181542.5
1,0,P0,2,680996.7
2,0,P0,3,145159.3
3,1,KO,1,7814090.0
4,1,KO,2,5531892.0


## compare parental stock to passage 10

In [54]:
df_t = df_pivot[df_pivot["passage"].isin([0,10])]
df_t

Unnamed: 0,passage,genotype,Technical replicate,RNA_copies
0,0,P0,1,181542.5
1,0,P0,2,680996.7
2,0,P0,3,145159.3
12,10,KO,1,99824680.0
13,10,KO,2,80697000.0
14,10,KO,3,66132330.0
15,10,OE,1,23217180.0
16,10,OE,2,25738080.0
17,10,OE,3,22778740.0
18,10,WT,1,12729390.0


In [51]:
# test normalitiy --> H0: samples comes from normal distribution
print(normaltest(df_t['RNA_copies']).pvalue)

0.1401126293926721


In [57]:
# paired t-test between P0 and WT-P10

stats.ttest_rel(df_t[df_t["genotype"]=="P0"]["RNA_copies"],
                df_t[df_t["genotype"]=="WT"]["RNA_copies"],
                alternative="less").pvalue

0.008552188948606711

In [58]:
# paired t-test between P0 and OE-P10

stats.ttest_rel(df_t[df_t["genotype"]=="P0"]["RNA_copies"],
                df_t[df_t["genotype"]=="OE"]["RNA_copies"],
                alternative="less").pvalue

0.0005050718896247445

In [59]:
# paired t-test between P0 and KO-P10

stats.ttest_rel(df_t[df_t["genotype"]=="P0"]["RNA_copies"],
                df_t[df_t["genotype"]=="KO"]["RNA_copies"],
                alternative="less").pvalue

0.0069563964818532015

## at passage 10, compare host genotypes

In [66]:
df_t = df[df["passage"].isin([10])]
df_t = df_t.dropna()

In [73]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd

tukey =  pairwise_tukeyhsd(endog=df_t["RNA_copies"],
                          groups=df_t['genotype'],
                          alpha=0.05)
   

In [74]:
print(tukey)

           Multiple Comparison of Means - Tukey HSD, FWER=0.05           
group1 group2    meandiff    p-adj       lower          upper      reject
-------------------------------------------------------------------------
    KO     OE -60781392.6416  0.001  -94894032.4369 -26668752.8463   True
    KO     WT -74433858.3552  0.001 -108546498.1505 -40321218.5599   True
    OE     WT -13652465.7136 0.5706  -46524227.9328  19219296.5055  False
-------------------------------------------------------------------------
