In [1]:
# Notebook: Cell fraction stats
# Description: Notebook with analysis of cell fraction proportions between groups

In [1]:
%load_ext lab_black

In [2]:
import numpy as np
import pandas as pd
import scipy.stats as sts

In [3]:
spain_labels = pd.read_csv(
    "../data/raw/CONCATED_Spain_HB/SampleSheetSpain.csv", index_col=0
)["Sample_Group"]

spain_labels = spain_labels.map(
    lambda x: "Healthy" if not "Spain" in x else "COVID-19 SPAIN"
)  # Convert sample names

spain_labels

Sample_Name
GSM5163386       COVID-19 SPAIN
GSM5163387       COVID-19 SPAIN
GSM5163388       COVID-19 SPAIN
GSM5163389       COVID-19 SPAIN
GSM5163390       COVID-19 SPAIN
                      ...      
HB_Control_78           Healthy
HB_Control_79           Healthy
HB_Control_80           Healthy
HB_Control_81           Healthy
HB_Control_82           Healthy
Name: Sample_Group, Length: 490, dtype: object

In [4]:
spain_cf = pd.read_csv("../data/processed/CF/Spain_before_CFC.csv", index_col=0)
spain_cf = pd.concat((spain_cf, spain_labels), axis=1)
spain_cf

Unnamed: 0,B,NK,CD4T,CD8T,Mono,Neutro,Eosino,Sample_Group
GSM5163593,0.002257,0.023829,0.195079,0.049923,0.065042,0.663872,0.0,COVID-19 SPAIN
GSM5163624,0.000000,0.141800,0.125534,0.022799,0.052468,0.657399,0.0,COVID-19 SPAIN
HB_Control_26,0.000000,0.152049,0.158047,0.002009,0.096114,0.591781,0.0,Healthy
GSM5163732,0.025770,0.091979,0.228205,0.044410,0.106320,0.503316,0.0,COVID-19 SPAIN
HB_Control_49,0.009619,0.068933,0.074765,0.000000,0.092905,0.753778,0.0,Healthy
...,...,...,...,...,...,...,...,...
GSM5163596,0.019290,0.000000,0.200085,0.000000,0.083151,0.697474,0.0,COVID-19 SPAIN
GSM5163503,0.000000,0.000000,0.234437,0.006541,0.100711,0.658311,0.0,COVID-19 SPAIN
GSM5163456,0.000000,0.171187,0.000000,0.129440,0.083872,0.615501,0.0,COVID-19 SPAIN
GSM5163709,0.009710,0.025270,0.107237,0.057689,0.046407,0.753687,0.0,COVID-19 SPAIN


In [5]:
df_control = spain_cf[spain_cf.Sample_Group == "Healthy"]
df_target = spain_cf[spain_cf.Sample_Group != "Healthy"]

cell_types = spain_cf.columns.tolist()
cell_types.remove("Sample_Group")

In [7]:
output = []
for cell_type in cell_types:

    # KW test:
    _, pval = sts.kruskal(df_control[cell_type], df_target[cell_type])

    # Barttlet test
    _, pval_2 = sts.levene(df_control[cell_type], df_target[cell_type])

    record = {
        "Cell type": cell_type,
        "K-W test p-value": pval,
        "Levene test p-value": pval_2,
    }
    output.append(record)

output = pd.DataFrame(output).round(4)
output

Unnamed: 0,Cell type,K-W test p-value,Levene test p-value
0,B,0.0,0.2072
1,NK,0.0,0.0783
2,CD4T,0.0,0.035
3,CD8T,0.0,0.0001
4,Mono,0.0034,0.1992
5,Neutro,0.1841,0.0001
6,Eosino,0.9911,0.836
