In [1]:
import scanpy as sc
import pandas as pd

In [None]:
data_path = '/path/to/data/'
car_embedding_path = "/path/to/car/embedding/"

In [2]:
dataset="20240229_from_rocio_for_manuscript" # CD4, CD8, 20240229_from_rocio_for_manuscript
group="CAR_Variant"
cont_scores = ["Cytotoxicity_1", "Proinflamatory_2", "Memory_3", "CD4_Th1_4", "CD4_Th2_5", 'S.Score', 'G2M.Score']
fraction_scores = ["Donor", "Time", "Phase", "ident", "subset"]

In [3]:
adata = sc.read_h5ad(f"{data_path}{dataset}.h5ad")

In [4]:
means = adata.obs[[group] +  cont_scores].groupby(group).mean()
stds = adata.obs[[group] +  cont_scores].groupby(group).std()
cont_features = means.merge(stds, left_index=True, right_index=True, suffixes=("_mean", "_std"))

In [5]:
group_size = adata.obs.groupby(group).size()

In [6]:
all_cat_counts = []
for cat in fraction_scores:
    temp = adata.obs.groupby([group, cat], observed=False).size().reset_index(drop=False)
    temp = pd.pivot_table(data=temp, index=group, columns=cat, values=0, observed=False)
    all_cat_counts.append(temp)

cat_features = pd.concat(all_cat_counts, axis=1)
cat_features = cat_features.div(group_size, axis=0)

In [7]:
metadata_embed = cont_features.merge(cat_features, left_index=True, right_index=True).T
metadata_embed.to_csv(f"{car_embedding_path}{dataset}_metadata")
metadata_embed

CAR_Variant,41BB-41BB-z,41BB-CD28-z,41BB-CD40-z,41BB-CTLA4-z,41BB-IL15RA-z,41BB-NA-z,CD28-41BB-z,CD28-CD28-z,CD28-CD40-z,CD28-CTLA4-z,...,CTLA4-IL15RA-z,CTLA4-NA-z,IL15RA-41BB-z,IL15RA-CD28-z,IL15RA-CD40-z,IL15RA-CTLA4-z,IL15RA-IL15RA-z,IL15RA-NA-z,NA-NA-NA,NA-NA-z
Cytotoxicity_1_mean,0.351413,0.275135,0.205165,0.353017,0.458549,0.406408,0.444376,0.381099,0.374806,0.535467,...,0.437564,0.429667,0.456037,0.365119,0.507899,0.491641,0.299433,0.336889,0.007743,0.351071
Proinflamatory_2_mean,-0.335432,-0.32229,-0.398368,-0.283526,-0.228468,-0.242455,-0.262466,-0.18606,-0.273821,-0.179826,...,-0.179043,-0.250166,-0.14836,-0.213234,-0.238949,-0.251975,-0.29692,-0.249488,-0.476395,-0.181316
Memory_3_mean,0.005263,-0.046538,-0.023605,0.067639,0.004542,0.015175,0.015634,-0.040628,-0.036659,0.001849,...,0.080883,0.011041,-0.013567,-0.022884,-0.081313,-0.019735,-0.017605,0.020297,0.363311,-0.006771
CD4_Th1_4_mean,-0.134154,-0.144201,-0.150225,-0.08003,-0.085548,-0.089921,-0.070845,-0.069957,-0.064405,-0.057777,...,-0.071947,-0.100334,-0.006351,-0.074593,-0.066721,-0.098509,-0.048795,-0.081591,-0.224516,0.006719
CD4_Th2_5_mean,0.042534,0.036099,0.164,-0.013779,0.119549,0.003511,0.012625,-0.032466,-0.018406,0.014226,...,0.039647,-0.040895,-0.002594,-0.018332,0.000982,-0.021968,0.263085,-0.002303,-0.117958,0.083393
S.Score_mean,-0.005307,-0.009867,-0.010475,-0.009161,-0.010967,-0.01961,-0.008755,-0.007573,0.007642,-0.006006,...,-0.029864,0.00313,-0.001586,-0.017963,0.031809,0.039131,-0.003802,0.004062,-0.005507,-0.002719
G2M.Score_mean,-0.062512,-0.06761,-0.06513,-0.064628,-0.06895,-0.075282,-0.088149,-0.070189,-0.055691,-0.063349,...,-0.098082,-0.051713,-0.054815,-0.083066,-0.011091,0.011953,-0.069789,-0.062552,-0.0538,-0.072651
Cytotoxicity_1_std,0.798612,0.782451,0.784722,0.822233,0.840531,0.816526,0.753324,0.810056,0.746085,0.827378,...,0.867356,0.780066,0.800626,0.811279,0.73107,0.752495,0.761785,0.837298,0.6249,0.844631
Proinflamatory_2_std,0.641819,0.696703,0.628607,0.654068,0.719539,0.752739,0.682485,0.839964,0.675576,0.750593,...,0.742425,0.697773,0.789845,0.808149,0.650651,0.82177,0.698346,0.762647,0.344704,0.858275
Memory_3_std,0.332409,0.309155,0.314673,0.32955,0.325136,0.34439,0.32549,0.296926,0.294457,0.303477,...,0.340258,0.318396,0.311836,0.315998,0.274757,0.26982,0.300266,0.335192,0.447833,0.30514
