In [None]:
# Notebook: WBC cell fraction
# Description: Notebook with analysis of WBC cell fraction proportions between groups

In [None]:
import numpy as np
import pandas as pd
import scipy.stats as sts

In [None]:
samplesheet = pd.read_csv("../data/raw/SampleSheetALL.csv", index_col=0)["Status"]
samplesheet = samplesheet[
    (samplesheet != "Unknown")
    & (samplesheet != "Other-infection USA 2")
    & (samplesheet != "Other-infection USA 1")
]

In [None]:
samplesheet.value_counts()

In [None]:
cf = pd.read_csv("../data/processed/CF/All_1175_raw_CF.csv", index_col=0)
cell_types = cf.columns

cf = pd.concat((cf, samplesheet), axis=1).dropna()
cf

In [None]:
cf.Status.nunique()

In [None]:
output = []

for cell_type in cell_types:

    datasets = []
    data = cf[[cell_type, "Status"]]

    for unq in data["Status"].unique():
        values = data[data["Status"] == unq][cell_type].values.tolist()
        datasets.append(values)

    try:
        # KW test:
        _, pval_kw = sts.kruskal(*datasets)
    except:
        pval_kw = 1

    try:
        # Levene test
        _, pval_lev = sts.levene(*datasets)
    except:
        pval_lev = 1

    record = {
        "Cell type": cell_type,
        "K-W test p-value": pval_kw,
        "Levene test p-value": pval_lev,
    }
    output.append(record)

output = pd.DataFrame(output).round(2)
output

In [None]:
output.to_csv("../Files/CF_stats.csv")