## Microarray thresholding

In [1]:
p_val = 0.05
logFC = 1.2

$$p < 0.05 \text{ and } |\log FC| > 1.2$$

In [2]:
import os
os.chdir("../")

In [3]:
from pathlib import Path
from utils.mathresh_utils import applyMAthresh2all
from utils.generic_utils import saveNameList

In [4]:
data_dir = "data/GEOquery/"
de_fn   = "1.Microarray.thresholding.txt"

p = Path(os.path.join(data_dir, "MArrayLM"))

### mRNA

In [5]:
mRNAs = applyMAthresh2all(
    gen=p.glob(f"mRNA_GSE{'[0-9]'*5}_preprocessed.csv"),
    colnames=["Gene.symbol", "Symbol"],
    logFC=logFC, p_val=p_val,
)
DEGs_path = os.path.join(data_dir, "DEGs", de_fn)
saveNameList(lst=mRNAs, path=DEGs_path, remove_nan=True, add_num=True)
print(f"Save DEGs to {DEGs_path}")

Apply MicroArray thresholding to all data.
|logFC| > 1.2
p_val < 0.05
mRNA_GSE60993_preprocessed.csv : 48803 --[drop NaN]-> 48803 --[threshold]-> 32 (  0.1%)
mRNA_GSE61144_preprocessed.csv : 48687 --[drop NaN]-> 48687 --[threshold]-> 17 (  0.0%)
mRNA_GSE34198_preprocessed.csv : 48701 --[drop NaN]-> 45076 --[threshold]-> 707 (  1.5%)
mRNA_GSE62646_preprocessed.csv : 33297 --[drop NaN]-> 33297 --[threshold]-> 6 (  0.0%)
unique data : 762 -> 310 ( 40.7%)
Save DEGs to data/GEOquery/DEGs/1.Microarray.thresholding.txt


### miRNA

In [6]:
miRNAs = applyMAthresh2all(
    gen=p.glob(f"miRNA_GSE{'[0-9]'*5}_preprocessed.csv"),
    colnames="miRNA_ID",
    logFC=logFC, p_val=p_val,
)
DEMis_path = os.path.join(data_dir, "DEMis", de_fn)
saveNameList(lst=miRNAs, path=DEMis_path, remove_nan=True, add_num=True)
print(f"Save DEMis to {DEMis_path}")

Apply MicroArray thresholding to all data.
|logFC| > 1.2
p_val < 0.05
miRNA_GSE53211_preprocessed.csv : 318 --[drop NaN]-> 246 --[threshold]-> 33 ( 10.4%)
miRNA_GSE24548_preprocessed.csv : 292 --[drop NaN]-> 292 --[threshold]-> 11 (  3.8%)
miRNA_GSE61741_preprocessed.csv : 848 --[drop NaN]-> 848 --[threshold]-> 84 (  9.9%)
unique data : 128 -> 123 ( 96.1%)
Save DEMis to data/GEOquery/DEMis/1.Microarray.thresholding.txt
