In [32]:
import os
import pandas as pd
import numpy as np
import anndata
from pyInfinityFlow.InfinityFlow_Utilities import read_fcs_into_anndata
from pyInfinityFlow.fcs_io import FCSFileObject

os.chdir("/media/kyle_storage/kyle_ferchen/grimes_lab_main/analysis/"\
    "2022_12_07_new_multilin_panel/")

In [3]:
# Read in the InfinityFlow fcs file
tmp_fcs = read_fcs_into_anndata("output/infinity_flow/infinity_flow_reprediction_on_new_panal_logicle_redo.fcs")
tmp_fcs

AnnData object with n_obs × n_vars = 1935037 × 149
    obs: 'cell_number'
    var: 'name', 'USE_LOGICLE', 'LOGICLE_T', 'LOGICLE_W', 'LOGICLE_M', 'LOGICLE_A', 'LOGICLE_APPLIED', 'IMPUTED'

In [4]:
print(tmp_fcs.var.to_string())

                                            name  USE_LOGICLE  LOGICLE_T  LOGICLE_W  LOGICLE_M  LOGICLE_A  LOGICLE_APPLIED  IMPUTED
APC-A                                      CD371         True  3000000.0        0.0        3.0        1.0            False    False
APC-Cy7-A                                   CD48         True  3000000.0        0.0        3.0        1.0            False    False
AlexaFluor647-A                             CD55         True  3000000.0        0.0        3.0        1.0            False    False
AlexaFluor700-A                             Ly6G         True  3000000.0        0.0        3.0        1.0            False    False
BUV395-A                                 CD16_32         True  3000000.0        0.0        3.0        1.0            False    False
BUV615-A                                   ITGB7         True  3000000.0        0.0        3.0        1.0            False    False
BUV737-A                                    CD27         True  3000000.0    

In [9]:
# Read in the R7 annotations (with old umap)
cell_anno = pd.read_csv("output/automated_py3_cellharmony_r7/"\
    "r7_py3_cellharmony_assigned_infinity_flow_groups_redo.csv")

# Read in the new umap coordinates
even_umap_coors = pd.read_csv("output/automated_py3_cellharmony_r7/"\
    "evenly_sampled_r7/umap_2d_coordinates_redo.csv")

cell_anno.loc[:,["umap_x", "umap_y"]] = even_umap_coors[["umap-x", "umap-y"]].values

cell_anno

Unnamed: 0,cell_id,cell_number,umap_x,umap_y,r7,r7_rho
0,:0,0,10.952296,-6.878245,preNeu-3-ADT,0.836751
1,:1,1,10.202045,7.717799,pre-MultiLin-1,0.903928
2,:2,2,12.240065,-3.202506,preNeu-1,0.793765
3,:3,3,6.748181,21.352978,pre-B-Il2ra,0.874901
4,:4,4,9.314702,5.906703,ETP-CC-4,0.925663
...,...,...,...,...,...,...
1935032,:1935032,1935032,-4.400203,-1.029844,ERP4-Bcl2l15,0.556924
1935033,:1935033,1935033,12.978972,-5.007983,preNeu-1,0.410792
1935034,:1935034,1935034,10.876128,5.335987,MultiLin-1,0.464509
1935035,:1935035,1935035,5.310091,-2.056845,Baso,0.742290


In [12]:
# Get r7 centroid positions
pd.pivot_table(cell_anno, index="r7", values=["umap_x", "umap_y"], aggfunc=np.mean).to_csv(\
    "output/automated_py3_cellharmony_r7/evenly_sampled_r7/plots_for_pub/"\
    "r7_centroid_positions_over_evenly_sampled_umap.csv",
    header=True, index=True)

In [18]:
# Read in the previously used mapping from R7 to number for FCS file
r7_num_anno = pd.read_csv("output/automated_py3_cellharmony_r7/fcs/"\
    "r7_to_number_cluster_mapping_redo.csv", header=None)

r7_to_num = pd.Series(\
    r7_num_anno.iloc[:,1].values, 
    index=r7_num_anno.iloc[:,0].values)

r7_to_num

B-cell progenitor        1
BMCP                     2
Baso                     3
Bcl11b+_preETP_Cd3d      4
Bcl11b+_preETP_Tdrd5     5
                        ..
preNeu-3-Mac_C1qa       84
preNeu-Ebf1             85
proNeu-1                86
proNeu-1-ADT            87
proNeu-2                88
Length: 88, dtype: int64

In [33]:
# Features to take from the old FCS object
var_to_drop = ["umap-x", "umap-y"]
inflow_X = tmp_fcs[:,np.setdiff1d(tmp_fcs.var.index.values, var_to_drop)]

# Features to add to the FCS
to_add_X_df = pd.DataFrame({\
    "umap_x": cell_anno["umap_x"].values,
    "umap_y": cell_anno["umap_y"].values,
    "R7": cell_anno["r7"].replace(r7_to_num.to_dict()).values,
    "cell_number": list(range(1,1+cell_anno.shape[0]))})

old_var = tmp_fcs[:,np.setdiff1d(tmp_fcs.var.index.values, var_to_drop)].var
old_var.loc["leiden", "USE_LOGICLE"] = False

to_add_var = pd.DataFrame(\
    old_var.loc[['leiden']*4].values,
    columns = old_var.columns.values,
    index = to_add_X_df.columns.values)

new_flow = anndata.AnnData(\
    X = np.concatenate([\
        inflow_X.X.toarray(),
        to_add_X_df.values], 
        axis=1),
    obs = tmp_fcs.obs,
    var = pd.concat([old_var, to_add_var]))

new_flow


  new_flow = anndata.AnnData(\


AnnData object with n_obs × n_vars = 1935037 × 151
    obs: 'cell_number'
    var: 'name', 'USE_LOGICLE', 'LOGICLE_T', 'LOGICLE_W', 'LOGICLE_M', 'LOGICLE_A', 'LOGICLE_APPLIED', 'IMPUTED'

In [34]:
from pyInfinityFlow.InfinityFlow_Utilities import write_anndata_to_fcs

write_anndata_to_fcs(\
    new_flow, 
    "output/automated_py3_cellharmony_r7/evenly_sampled_r7/fcs_for_pub/"\
        "optimized_infinityflow_with_even_sampled_fcs_and_r7_labels.fcs",
    add_umap = False)

Omitting spillover matrix...
