# Check data preprocessing output

This is done using the command `make data`

### Setup

In [10]:
import os

import anndata as ad
import numpy as np
import pandas as pd
from dotenv import load_dotenv

load_dotenv()

True

In [12]:
DATA_DIR = os.getenv("INFLAMM_DEBATE_FM_DATA_ROOT")
ANN_DATA_DIR = os.path.join(DATA_DIR, "processed/anndata_cleaned/")

# load adatas
adatas = {}
for f in sorted(os.listdir(ANN_DATA_DIR)):
    if f.endswith(".h5ad"):
        name = f.replace(".h5ad", "")
        path = os.path.join(ANN_DATA_DIR, f)
        adatas[name] = ad.read_h5ad(path)
        print(f"Loaded {name}: {adatas[name].shape}")
adatas

Loaded human_burn: (590, 19914)
Loaded human_sepsis: (30, 19914)
Loaded human_trauma: (857, 19914)
Loaded mouse_burn: (32, 13832)
Loaded mouse_infection: (72, 10248)
Loaded mouse_sepsis: (50, 13832)
Loaded mouse_trauma: (96, 13832)


{'human_burn': AnnData object with n_obs × n_vars = 590 × 19914
     obs: 'group', 'patient_id', 'tissue', 'sex', 'age', 'time_point_hours', 'takao_inflamed', 'takao_control', 'takao_status', 'infl_acute', 'infl_subacute', 'infl_chronic'
     var: 'ensembl', 'symbol',
 'human_sepsis': AnnData object with n_obs × n_vars = 30 × 19914
     obs: 'tissue', 'group', 'takao_inflamed', 'takao_control', 'takao_status'
     var: 'ensembl', 'symbol',
 'human_trauma': AnnData object with n_obs × n_vars = 857 × 19914
     obs: 'group', 'patient_id', 'tissue', 'sex', 'age', 'time_point_hours', 'takao_inflamed', 'takao_control', 'takao_status', 'infl_acute', 'infl_subacute', 'infl_chronic'
     var: 'ensembl', 'symbol',
 'mouse_burn': AnnData object with n_obs × n_vars = 32 × 13832
     obs: 'title', 'cell_type', 'sex', 'strain', 'time_point_hours', 'group', 'patient_id', 'takao_inflamed', 'takao_control', 'takao_status', 'infl_acute', 'infl_subacute', 'infl_chronic'
     var: 'ensembl', 'symbol',
 '

### Quick check for dataframes

In [15]:
adatas['human_burn'].obs

Unnamed: 0,group,patient_id,tissue,sex,age,time_point_hours,takao_inflamed,takao_control,takao_status,infl_acute,infl_subacute,infl_chronic
GSM909644,control,19297865,White Blood Cells,F,30,,False,True,takao_control,False,False,False
GSM909645,control,35028656,White Blood Cells,M,35,,False,True,takao_control,False,False,False
GSM909646,control,16952213,White Blood Cells,F,30,,False,True,takao_control,False,False,False
GSM909647,control,20591195,White Blood Cells,M,19,,False,True,takao_control,False,False,False
GSM909648,control,19107727,White Blood Cells,F,18,,False,True,takao_control,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
GSM910229,inflammation,34264195,White blood cells,M,8,42.0,True,False,takao_inflamed,False,True,False
GSM910230,inflammation,34330981,White blood cells,F,2,67.3,True,False,takao_inflamed,False,True,False
GSM910231,inflammation,34350054,White blood cells,M,0,73.1,True,False,takao_inflamed,False,True,False
GSM910232,inflammation,34350054,White blood cells,M,0,1204.5,True,False,takao_inflamed,False,False,True


In [17]:
adatas['human_trauma'].obs

Unnamed: 0,group,patient_id,tissue,sex,age,time_point_hours,takao_inflamed,takao_control,takao_status,infl_acute,infl_subacute,infl_chronic
GSM1639900,inflammation,6221614,White Blood Cells,M,27,35.4,False,False,,False,True,False
GSM1639901,inflammation,9007070,White Blood Cells,M,29,179.4,False,False,,False,False,True
GSM1639902,inflammation,9289610,White Blood Cells,F,36,33.0,False,False,,False,True,False
GSM1639903,inflammation,9289610,White Blood Cells,F,36,105.0,False,False,,False,True,False
GSM1639904,inflammation,9289610,White Blood Cells,F,36,201.2,False,False,,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...
GSM902165,inflammation,9754329,White Blood Cells,F,48,83.6,False,False,,False,True,False
GSM902166,inflammation,9973279,White Blood Cells,F,44,162.6,False,False,,False,True,False
GSM902167,inflammation,9973279,White Blood Cells,F,44,22.7,False,False,,True,False,False
GSM902168,inflammation,9973279,White Blood Cells,F,44,5.4,False,False,,True,False,False


In [None]:
adatas['human_sepsis'].obs

Unnamed: 0,tissue,group,takao_inflamed,takao_control,takao_status
GSM712478,whole blood,inflammation,True,False,takao_inflamed
GSM712479,whole blood,inflammation,True,False,takao_inflamed
GSM712480,whole blood,inflammation,True,False,takao_inflamed
GSM712481,whole blood,inflammation,True,False,takao_inflamed
GSM712482,whole blood,inflammation,True,False,takao_inflamed
GSM712483,whole blood,inflammation,True,False,takao_inflamed
GSM712484,whole blood,inflammation,True,False,takao_inflamed
GSM712485,whole blood,inflammation,True,False,takao_inflamed
GSM712486,whole blood,inflammation,True,False,takao_inflamed
GSM712487,whole blood,inflammation,True,False,takao_inflamed


In [19]:
adatas['mouse_burn'].obs

Unnamed: 0,title,cell_type,sex,strain,time_point_hours,group,patient_id,takao_inflamed,takao_control,takao_status,infl_acute,infl_subacute,infl_chronic
GSM178608,Burn Blood 2 hr rep 1,mouse leukocytes,Male,C57BL/6J,2.0,inflammation,1,False,False,,True,False,False
GSM178609,Burn Blood 2 hr rep 2,mouse leukocytes,Male,C57BL/6J,2.0,inflammation,2,False,False,,True,False,False
GSM178610,Burn Blood 2 hr rep 3,mouse leukocytes,Male,C57BL/6J,2.0,inflammation,3,False,False,,True,False,False
GSM178611,Burn Blood 2 hr rep 4,mouse leukocytes,Male,C57BL/6J,2.0,inflammation,4,False,False,,True,False,False
GSM178612,Burn Blood 1 day rep 1,mouse leukocytes,Male,C57BL/6J,24.0,inflammation,1,False,False,,False,True,False
GSM178613,Burn Blood 1 day rep 2,mouse leukocytes,Male,C57BL/6J,24.0,inflammation,2,False,False,,False,True,False
GSM178614,Burn Blood 1 day rep 3,mouse leukocytes,Male,C57BL/6J,24.0,inflammation,3,False,False,,False,True,False
GSM178615,Burn Blood 1 day rep 4,mouse leukocytes,Male,C57BL/6J,24.0,inflammation,4,False,False,,False,True,False
GSM178616,Burn Blood 3 day rep 1,mouse leukocytes,Male,C57BL/6J,72.0,inflammation,1,False,False,,False,False,True
GSM178617,Burn Blood 3 day rep 2,mouse leukocytes,Male,C57BL/6J,72.0,inflammation,2,False,False,,False,False,True


In [20]:
adatas['mouse_trauma'].obs

Unnamed: 0,title,cell_type,sex,strain,time_point_hours,group,patient_id,takao_inflamed,takao_control,takao_status,infl_acute,infl_subacute,infl_chronic
GSM178640,Trauma Hemorrhage Burn 2 hr rep 1,mouse leukocytes,Male,C57BL/6J,2.0,inflammation,1,False,False,,True,False,False
GSM178641,Trauma Hemorrhage Burn 2 hr rep 2,mouse leukocytes,Male,C57BL/6J,2.0,inflammation,2,False,False,,True,False,False
GSM178642,Trauma Hemorrhage Burn 2 hr rep 3,mouse leukocytes,Male,C57BL/6J,2.0,inflammation,3,False,False,,True,False,False
GSM178643,Trauma Hemorrhage Burn 2 hr rep 4,mouse leukocytes,Male,C57BL/6J,2.0,inflammation,4,False,False,,True,False,False
GSM178644,Trauma Hemorrhage Burn 1 day rep 1,mouse leukocytes,Male,C57BL/6J,24.0,inflammation,1,False,False,,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
GSM178731,Trauma Hemorrhage Sham Spleen 3 day rep 4,mouse splenocytes,Male,C57BL/6J,,control,4,False,True,takao_control,False,False,False
GSM178732,Trauma Hemorrhage Sham Spleen 7 day rep 1,mouse splenocytes,Male,C57BL/6J,,control,1,False,True,takao_control,False,False,False
GSM178733,Trauma Hemorrhage Sham Spleen 7 day rep 2,mouse splenocytes,Male,C57BL/6J,,control,2,False,True,takao_control,False,False,False
GSM178734,Trauma Hemorrhage Sham Spleen 7 day rep 3,mouse splenocytes,Male,C57BL/6J,,control,3,False,True,takao_control,False,False,False


In [21]:
adatas['mouse_sepsis'].obs

Unnamed: 0,group,strain,time_point_hours,patient_id,takao_inflamed,takao_control,takao_status,infl_acute,infl_subacute,infl_chronic
GSM491049,control,A/J,0.0,5,False,False,,False,False,False
GSM491050,inflammation,A/J,2.0,5,False,False,,True,False,False
GSM491051,inflammation,A/J,4.0,5,False,False,,True,False,False
GSM491052,inflammation,A/J,6.0,5,False,False,,True,False,False
GSM491053,inflammation,A/J,12.0,5,False,False,,False,True,False
GSM491054,control,C57BL/6J,0.0,5,False,True,takao_control,False,False,False
GSM491055,inflammation,C57BL/6J,2.0,5,False,False,,True,False,False
GSM491056,inflammation,C57BL/6J,4.0,5,True,False,takao_inflamed,True,False,False
GSM491057,inflammation,C57BL/6J,6.0,5,False,False,,True,False,False
GSM491058,inflammation,C57BL/6J,12.0,5,False,False,,False,True,False


In [22]:
adatas['mouse_infection'].obs

Unnamed: 0,age,strain,infection_status_detail,tissue,group,time_point_hours,takao_inflamed,takao_control,takao_status,infl_acute,infl_subacute,infl_chronic
GSM515522,8 wk old,BALB/c,healthy,Whole Blood,control,,False,True,takao_control,False,False,False
GSM515523,8 wk old,BALB/c,healthy,Whole Blood,control,,False,True,takao_control,False,False,False
GSM515524,8 wk old,BALB/c,healthy,Whole Blood,control,,False,True,takao_control,False,False,False
GSM515525,8 wk old,BALB/c,healthy,Whole Blood,control,,False,True,takao_control,False,False,False
GSM515526,8 wk old,BALB/c,healthy,Whole Blood,control,,False,True,takao_control,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
GSM515589,8 wk old,BALB/c,staph,Whole Blood,inflammation,72.0,False,False,,False,False,True
GSM515590,8 wk old,BALB/c,staph,Whole Blood,inflammation,72.0,False,False,,False,False,True
GSM515591,8 wk old,BALB/c,healthy,Whole Blood,control,,False,True,takao_control,False,False,False
GSM515592,8 wk old,BALB/c,healthy,Whole Blood,control,,False,True,takao_control,False,False,False
