In [1]:
import pandas as pd
import pingouin as pg 
from scipy import stats
import numpy as np
from statsmodels.stats.multicomp import pairwise_tukeyhsd

base_path = "Processed_data\Results"

A549_51 = pd.read_csv(base_path + "\A549_51.csv")
A549_61 = pd.read_csv(base_path + "\A549_61.csv")

Hct116_31 = pd.read_csv(base_path + "\Hct116_31.csv")
Hct116_34 = pd.read_csv(base_path + "\Hct116_34.csv")
Hct116_43 = pd.read_csv(base_path + "\Hct116_43.csv")

HepG2_52 = pd.read_csv(base_path + "\HepG2_52.csv") 
HepG2_61 = pd.read_csv(base_path + "\HepG2_61.csv")

MCF7_31 = pd.read_csv(base_path + "\MCF7_31.csv")
MCF7_41 = pd.read_csv(base_path + "\MCF7_41.csv")

K562_41 = pd.read_csv(base_path + "\K562_41.csv")
K562_51 = pd.read_csv(base_path + "\K562_51.csv")
K562_61 = pd.read_csv(base_path + "\K562_61.csv")


In [2]:
A549 = pd.concat([A549_51,A549_61],axis=0)["0"]
HepG2 = pd.concat([HepG2_52,HepG2_61],axis=0)["0"]
MCF7 = pd.concat([MCF7_31,MCF7_41],axis=0)["0"]
HCT116 = pd.concat([Hct116_31,Hct116_43],axis=0)["label"]
K562 = pd.concat([K562_41,K562_51,K562_61],axis=0)["0"]

In [32]:
log_tran_A549 = [np.log(float(x)+0.01) for x in list(A549)]
log_tran_HepG2 = [np.log(float(x)+0.01) for x in list(HepG2)]
log_tran_K562 = [np.log(float(x)+0.01) for x in list(K562)]
log_tran_MCF7 = [np.log(float(x)+0.01) for x in list(MCF7)] 
log_tran_Hct116 = [np.log(float(x)+0.01) for x in list(HCT116)]

In [106]:
output_KW = []
output_Flk = []
output_welch = []
output_tukey = []
for i in range(0,100):
    num_samples = 1000
    A549_BS = np.random.choice(log_tran_A549, size=num_samples, replace=True)
    HepG2_BS = np.random.choice(log_tran_HepG2, size=num_samples, replace=True)
    K562_BS = np.random.choice(log_tran_K562, size=num_samples, replace=True)
    MCF7_BS = np.random.choice(log_tran_MCF7, size=num_samples, replace=True)
    Hct116_BS = np.random.choice(log_tran_Hct116, size=num_samples, replace=True)

    H_KW, p_value_KW = stats.kruskal(A549_BS, HepG2_BS, K562_BS, MCF7_BS,Hct116_BS)
    H_Flk, p_value_Flk = stats.fligner(A549_BS, HepG2_BS, K562_BS, MCF7_BS,Hct116_BS)
    
    raw_data = pd.DataFrame({
    'Cell_Line': ['A549'] * 1000 + ['HepG2'] * 1000 + ['MCF7'] * 1000 + ["K562"] * 1000 + ["HCT116"] * 1000 ,
    'Processed_Values': list(A549_BS) + list(HepG2_BS) + list(MCF7_BS) + list(K562_BS) + list(Hct116_BS)
        })
    welch_anova_raw_result = pg.welch_anova(data=raw_data, dv='Processed_Values', between='Cell_Line')
    tukey_results = pairwise_tukeyhsd(raw_data['Processed_Values'], raw_data['Cell_Line'])
    
    output_KW.append(p_value_KW)
    output_Flk.append(p_value_Flk)
    output_welch.append(welch_anova_raw_result["p-unc"][0])
    output_tukey.append(tukey_results.pvalues.tolist())

In [110]:
np.mean(output_welch)
np.mean(output_Flk)
np.mean(output_KW)

0.00021355901651011315

In [113]:
processed_tukey = []
for i in range(0,10):
    p_value = 0
    for j in range(0,len(output_tukey)):
        p_value += output_tukey[j][i]
    processed_tukey.append(p_value/1000)

In [116]:
holder = pd.DataFrame({"group1": ["A549","A549","A549","A549","Hct116","Hct116","Hct116","HepG2","HepG2","K562"],
                     "group2": ["Hct116","HepG2","K562","MCF7","HepG2","K562","MCF7","K562","MCF7","MCF7"],
                     "p_values":processed_tukey})
holder

Unnamed: 0,group1,group2,p_values
0,A549,Hct116,0.039415
1,A549,HepG2,0.016348
2,A549,K562,0.036735
3,A549,MCF7,0.045596
4,Hct116,HepG2,0.074459
5,Hct116,K562,0.001709
6,Hct116,MCF7,0.003983
7,HepG2,K562,0.00019
8,HepG2,MCF7,0.001385
9,K562,MCF7,0.082482
