In [2]:
import scanpy as sc
import numpy as np
import pandas as pd
import synapseclient
import matplotlib.pyplot as plt
import scipy.sparse as sps
from scipy.io import mmread
import anndata as ad

# Bi 2021 dataset
Source: https://www.cell.com/cancer-cell/fulltext/S1535-6108(21)00117-3

Stored at: https://singlecell.broadinstitute.org/single_cell/study/SCP1288/tumor-and-immune-reprogramming-during-immunotherapy-in-advanced-renal-cell-carcinoma#study-summary

In [3]:
syn = synapseclient.Synapse()
syn.login()

Welcome, heimann!




UPGRADE AVAILABLE

A more recent version of the Synapse Client (4.3.0) is available. Your version (2.7.2) can be upgraded by typing:
    pip install --upgrade synapseclient

Python Synapse Client version 4.3.0 release notes

https://python-docs.synapse.org/news/



## Loading data

### Clinical Data

In [4]:
#Load metadata
entity = syn.get('syn59975076')
metadata = pd.read_table(entity.path)

metadata

  exec(code_obj, self.user_global_ns, self.user_ns)



Unnamed: 0,NAME,biosample_id,donor_id,species,species__ontology_label,disease,disease__ontology_label,organ,organ__ontology_label,library_preparation_protocol,library_preparation_protocol__ontology_label,ICB_Exposed,ICB_Response,TKI_Exposed,Initial_Louvain_Cluster,Lineage,InferCNV,FinalCellType,sex
0,TYPE,group,group,group,group,group,group,group,group,group,group,group,group,group,group,group,group,group,group
1,AAACCTGAGAATAGGG.p55,P55_scRNA,P55,NCBITaxon_9606,Homo sapiens,MONDO_0005005,clear cell renal carcinoma,UBERON_0000916,abdomen,EFO:0009899,10X 3' v2 sequencing,ICB,ICB_PR,TKI,9,Lymphoid,NonMalignant,41BB-Hi CD8+ T cell,male
2,AAACCTGAGGCTAGGT.p55,P55_scRNA,P55,NCBITaxon_9606,Homo sapiens,MONDO_0005005,clear cell renal carcinoma,UBERON_0000916,abdomen,EFO:0009899,10X 3' v2 sequencing,ICB,ICB_PR,TKI,9,Lymphoid,NonMalignant,41BB-Hi CD8+ T cell,male
3,AAACCTGCACTGTGTA.p55,P55_scRNA,P55,NCBITaxon_9606,Homo sapiens,MONDO_0005005,clear cell renal carcinoma,UBERON_0000916,abdomen,EFO:0009899,10X 3' v2 sequencing,ICB,ICB_PR,TKI,9,Lymphoid,NonMalignant,41BB-Hi CD8+ T cell,male
4,AAACCTGCAGTCCTTC.p55,P55_scRNA,P55,NCBITaxon_9606,Homo sapiens,MONDO_0005005,clear cell renal carcinoma,UBERON_0000916,abdomen,EFO:0009899,10X 3' v2 sequencing,ICB,ICB_PR,TKI,2,Lymphoid,NonMalignant,MitoHigh T-Helper,male
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34322,TTGGAACGTGAGGGAG.p916,P916_scRNA,P916,NCBITaxon_9606,Homo sapiens,MONDO_0005005,clear cell renal carcinoma,UBERON_0000029,lymph node,EFO:0009899,10X 3' v2 sequencing,NoICB,NoICB,NoTKI,7,Lymphoid,NonMalignant,Effector T-Helper,male
34323,TTGTAGGGTATGAAAC.p916,P916_scRNA,P916,NCBITaxon_9606,Homo sapiens,MONDO_0005005,clear cell renal carcinoma,UBERON_0000029,lymph node,EFO:0009899,10X 3' v2 sequencing,NoICB,NoICB,NoTKI,0,Lymphoid,NonMalignant,41BB-Lo CD8+ T cell,male
34324,TTTACTGCACACATGT.p916,P916_scRNA,P916,NCBITaxon_9606,Homo sapiens,MONDO_0005005,clear cell renal carcinoma,UBERON_0000029,lymph node,EFO:0009899,10X 3' v2 sequencing,NoICB,NoICB,NoTKI,9,Lymphoid,NonMalignant,41BB-Hi CD8+ T cell,male
34325,TTTGTCAAGAGCAATT.p916,P916_scRNA,P916,NCBITaxon_9606,Homo sapiens,MONDO_0005005,clear cell renal carcinoma,UBERON_0000029,lymph node,EFO:0009899,10X 3' v2 sequencing,NoICB,NoICB,NoTKI,7,Lymphoid,NonMalignant,Effector T-Helper,male


In [13]:
metadata.columns

Index(['NAME', 'biosample_id', 'donor_id', 'species',
       'species__ontology_label', 'disease', 'disease__ontology_label',
       'organ', 'organ__ontology_label', 'library_preparation_protocol',
       'library_preparation_protocol__ontology_label', 'ICB_Exposed',
       'ICB_Response', 'TKI_Exposed', 'Initial_Louvain_Cluster', 'Lineage',
       'InferCNV', 'FinalCellType', 'sex'],
      dtype='object')

In [14]:
# Getting a closer look at the columns that seem to be of annotation we'll need
clin_annot = pd.DataFrame({
        'biosample_id': metadata["biosample_id"],
        'donor_id': metadata["donor_id"],
        'ICB_Exposed': metadata["ICB_Exposed"],
        'ICB_Response': metadata["ICB_Response"],
        'TKI_Exposed': metadata["TKI_Exposed"],
        'sex': metadata["sex"],
        'Lineage': metadata["Lineage"]
    }
)
clin_annot

Unnamed: 0,biosample_id,donor_id,ICB_Exposed,ICB_Response,TKI_Exposed,sex,Lineage
0,group,group,group,group,group,group,group
1,P55_scRNA,P55,ICB,ICB_PR,TKI,male,Lymphoid
2,P55_scRNA,P55,ICB,ICB_PR,TKI,male,Lymphoid
3,P55_scRNA,P55,ICB,ICB_PR,TKI,male,Lymphoid
4,P55_scRNA,P55,ICB,ICB_PR,TKI,male,Lymphoid
...,...,...,...,...,...,...,...
34322,P916_scRNA,P916,NoICB,NoICB,NoTKI,male,Lymphoid
34323,P916_scRNA,P916,NoICB,NoICB,NoTKI,male,Lymphoid
34324,P916_scRNA,P916,NoICB,NoICB,NoTKI,male,Lymphoid
34325,P916_scRNA,P916,NoICB,NoICB,NoTKI,male,Lymphoid


In [15]:
#Inspect more closely unique values for cell type
metadata["FinalCellType"].value_counts()

FinalCellType
41BB-Lo CD8+ T cell      5420
TP2                      4599
TP1                      3324
FOLR2-Hi TAM             1528
MitoHigh CD8+ T cell     1482
MitoHigh Myeloid         1407
Effector T-Helper        1389
GPNMB-Hi TAM             1382
41BB-Hi CD8+ T cell      1321
MitoHigh T-Helper        1316
FGFBP2- NK               1306
VSIR-Hi TAM              1070
B cell                    962
CD16- Monocyte            844
NKT                       811
T-Reg                     750
Cycling CD8+ T cell       701
LowLibSize Macrophage     672
Memory T-Helper           579
FGFBP2+ NK                493
Plasma cell               463
MitoHigh NK               446
CD16+ Monocyte            313
CD1C+ DC                  308
Misc/Undetermined         278
Endothelial               271
CXCL10-Hi TAM             226
Cycling TAM               175
MX1-Hi CD8+ T cell        132
Cycling Tumor             117
CLEC9A+ DC                111
Fibroblast                 91
Mast cell                 

In [16]:
clin_annot.describe(include='object')

Unnamed: 0,biosample_id,donor_id,ICB_Exposed,ICB_Response,TKI_Exposed,sex,Lineage
count,34327,34327,34327,34327,34327,34327,34327
unique,9,9,3,6,3,3,5
top,P90_scRNA,P90,ICB,NoICB,NoTKI,male,Lymphoid
freq,8426,8426,17672,16654,23195,17988,17298


## Expression data

The authors provide a table with normalized values after QC

In [5]:
entity = syn.get('syn59976437')
norm_df = pd.read_csv(entity.path, delimiter = "\t")

In [5]:
norm_df

Unnamed: 0,GENE,AAACCTGAGAATAGGG.p55,AAACCTGAGGCTAGGT.p55,AAACCTGCACTGTGTA.p55,AAACCTGCAGTCCTTC.p55,AAACCTGGTAAATGTG.p55,AAACCTGGTACCGAGA.p55,AAACCTGGTGTGAAAT.p55,AAACCTGTCAGATAAG.p55,AAACCTGTCCTGCTTG.p55,...,TTCTACACAATCGGTT.p916,TTCTTAGGTCCATCCT.p916,TTGACTTAGAGCTATA.p916,TTGCCGTCACAAGCCC.p916,TTGCGTCCAAGTTAAG.p916,TTGGAACGTGAGGGAG.p916,TTGTAGGGTATGAAAC.p916,TTTACTGCACACATGT.p916,TTTGTCAAGAGCAATT.p916,TTTGTCAAGCGTTTAC.p916
0,WASH7P,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,AL627309.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,AL627309.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,CICP27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,AL627309.1.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32713,CU638689.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
32714,CU634019.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
32715,CU634019.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
32716,CU638689.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


This normalization, according to the original paper was made using Seurat, with log2. Other single cell datasets in iAtlas were normalized using the scanpy.pp.log1p function, so ideally we will process the un-normalized data.

The authors provide a matrix with raw counts.

In [6]:
entity = syn.get('syn60521660')
a = mmread(entity.path)
a

<60627x39391 sparse matrix of type '<class 'numpy.int64'>'
	with 46152142 stored elements in COOrdinate format>

In [13]:
a.todense()

matrix([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]])

In [7]:
# creating Anndata file
adata = ad.AnnData(sps.coo_matrix.transpose(a).todense())
adata

  



AnnData object with n_obs × n_vars = 39391 × 60627

In [8]:
entity = syn.get('syn60521658')
obs_names = pd.read_table(entity.path, header = None, names = ["NAME"])
obs_names.shape

(39391, 1)

In [9]:
entity = syn.get('syn60521659')
var_names = pd.read_table(entity.path, header = None, index_col = 0, names = ["gene", "symbol"])
var_names.shape
var_names

Unnamed: 0_level_0,symbol
gene,Unnamed: 1_level_1
DDX11L1,DDX11L1
WASH7P,WASH7P
MIR1302-2HG,MIR1302-2HG
FAM138A,FAM138A
OR4G4P,OR4G4P
...,...
CU638689.1,CU638689.1
FP236383.3,FP236383.3
CU633904.2,CU633904.2
FP671120.5,FP671120.5


In [10]:
adata.obs = obs_names

In [11]:
adata.obs_names = obs_names.iloc[:,0]
adata.obs_names

Index(['AAACCTGAGAATAGGG.p55', 'AAACCTGAGGCTAGGT.p55', 'AAACCTGCACTGTGTA.p55',
       'AAACCTGCAGTCCTTC.p55', 'AAACCTGGTAAATGTG.p55', 'AAACCTGGTACCGAGA.p55',
       'AAACCTGGTGTGAAAT.p55', 'AAACCTGTCAGATAAG.p55', 'AAACCTGTCCTGCTTG.p55',
       'AAACCTGTCGCAAGCC.p55',
       ...
       'TTCTTAGGTCCATCCT.p916', 'TTGACTTAGAGCTATA.p916',
       'TTGCCGTCACAAGCCC.p916', 'TTGCGTCCAAGTTAAG.p916',
       'TTGGAACGTGAGGGAG.p916', 'TTGTAGGGTATGAAAC.p916',
       'TTTACTGCACACATGT.p916', 'TTTATGCGTGAAAGAG.p916',
       'TTTGTCAAGAGCAATT.p916', 'TTTGTCAAGCGTTTAC.p916'],
      dtype='object', name='NAME', length=39391)

In [12]:
adata.var = var_names
adata.var_names = var_names.iloc[:,0]
adata.var_names

Index(['DDX11L1', 'WASH7P', 'MIR1302-2HG', 'FAM138A', 'OR4G4P', 'OR4G11P',
       'OR4F5', 'AL627309.1', 'AL627309.3', 'CICP27',
       ...
       'CU633904.3', 'CU634019.2', 'CU638689.3', 'CU634019.6', 'CU634019.4',
       'CU638689.1', 'FP236383.3', 'CU633904.2', 'FP671120.5', 'AC233701.1'],
      dtype='object', name='symbol', length=60627)

In [13]:
adata

AnnData object with n_obs × n_vars = 39391 × 60627
    obs: 'NAME'
    var: 'symbol'

The number of cells in the counts matrix is higher than the one in the metadata and normalized files. The assumption is that the matadata file has the cells after QC. In the next section we will explore the available clinical data and then add this to the anndata object.

# QC

The authors didn't share QC metrics in the data. In the manuscript, they share the following filtering steps:

*To exclude data from droplets containing more than one cell, doublet detection and removal were performed on gene-barcode matrices using Scrublet (Wolock et al., 2019). An expected doublet rate parameter of 0.06 was used, and doublet score thresholds were chosen manually to divide putative singlet and neotypic doublet modes in the score distribution. Predicted doublets were then removed from gene-barcode matrices. Prior to across-sample integration, cells with fewer than 200 genes detected or more than 25% of counts attributed to mitochondrially-encoded transcripts were removed. Genes detected in fewer than three cells across all samples were also excluded.
The resulting merged dataset included 34,326 cells and 32,718 detected genes across the eight samples of the cohort.*

We can easily compute the mitochondrial fraction and number of counts per cell. However, the manual steps are not esily reproducible.
The number of cells and genes in norm_df are the same as mentioned in the manuscript as being the ones selected after QC. We will use them to filter cells and genes in the anndata object and assume we have the correct output from QC.

In [30]:
qcdata = adata[:, np.isin(adata.var["symbol"], norm_df["GENE"])]
qcdata = qcdata[np.isin(adata.obs["NAME"], metadata["NAME"])]
qcdata

View of AnnData object with n_obs × n_vars = 34326 × 32636
    obs: 'NAME'
    var: 'symbol'

### Counts

In [31]:
#saving counts as a layer
qcdata.layers['counts'] = qcdata.X
qcdata.to_df(layer = "counts")

symbol,WASH7P,AL627309.1,AL627309.3,CICP27,AL627309.1.1,AL627309.5,AP006222.2,RP4-669L17.8,AL669831.3,MTND1P23,...,LINC01670,FP236241.1,CU633906.1,CU633967.1,CU634019.1,CU638689.5,CU634019.2,CU634019.6,CU638689.1,FP671120.5
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACCTGAGAATAGGG.p55,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAACCTGAGGCTAGGT.p55,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAACCTGCACTGTGTA.p55,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAACCTGCAGTCCTTC.p55,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAACCTGGTAAATGTG.p55,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGGAACGTGAGGGAG.p916,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TTGTAGGGTATGAAAC.p916,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TTTACTGCACACATGT.p916,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TTTGTCAAGAGCAATT.p916,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Formatting to run pseudobulk

In [32]:
# Normalize and log-transform
sc.pp.normalize_total(qcdata, target_sum=1e4)
sc.pp.log1p(qcdata)
qcdata.layers['normalized'] = qcdata.X


In [33]:
qcdata.obs.reset_index(drop = True, inplace = True)

In [34]:
# merging the metadata
qcdata.obs = pd.merge(qcdata.obs, metadata.iloc[1:].drop('Initial_Louvain_Cluster', axis=1), on = "NAME")

AnnData expects .obs.index to contain strings, but got values like:
    [0, 1, 2, 3, 4]

    Inferred to be: integer

  value_idx = self._prep_dim_index(value.index, attr)



In [35]:
qcdata

AnnData object with n_obs × n_vars = 34326 × 32636
    obs: 'NAME', 'biosample_id', 'donor_id', 'species', 'species__ontology_label', 'disease', 'disease__ontology_label', 'organ', 'organ__ontology_label', 'library_preparation_protocol', 'library_preparation_protocol__ontology_label', 'ICB_Exposed', 'ICB_Response', 'TKI_Exposed', 'Lineage', 'InferCNV', 'FinalCellType', 'sex'
    var: 'symbol'
    uns: 'log1p'
    layers: 'counts', 'normalized'

In [36]:
#This dataset has different names for cells than what we will use in iAtlas, so let's update them
def func(a):
    if a in ["41BB-Lo CD8+ T cell", "MX1-Hi CD8+ T cell", "Memory T-Helper", "Cycling CD8+ T cell", "T-Reg", "MitoHigh T-Helper", "41BB-Hi CD8+ T cell", "Effector T-Helper", "MitoHigh CD8+ T cell"]:
        return "T cell"
    elif a in ["LowLibSize Macrophage", "VSIR-Hi TAM", "Cycling TAM", "CXCL10-Hi TAM", "GPNMB-Hi TAM", "FOLR2-Hi TAM"]:
        return "macrophage"
    elif a in "MitoHigh Myeloid":
        return "myeloid cell"
    elif a in "B cell":
        return "B cell"
    elif a in "Mast cell":
        return "mast cell"
    elif a in "Endothelial":
        return "endothelium"
    elif a in "Fibroblast":
        return "fibroblast"
    elif a in ["TP1", "TP2", "Cycling Tumor"]:
        return "tumor"
    elif a in "Plasma cell":
        return "plasma cell"
    elif a in ["CD16- Monocyte", "CD16+ Monocyte"]:
        return "monocyte"
    elif a in ["MitoHigh NK", "FGFBP2+ NK", "NKT", "FGFBP2- NK"]:
        return "NK"
    elif a in ["CLEC9A+ DC", "CD1C+ DC"]:
        return "Dendritic cell"
    else:
        return a

qcdata.obs["cell_type_iatlas"] = qcdata.obs["FinalCellType"].apply(lambda x: func(x))

In [37]:
qcdata.obs["cell_type_iatlas"].value_counts()

T cell               13090
tumor                 8040
macrophage            5053
NK                    3056
myeloid cell          1407
monocyte              1157
B cell                 962
plasma cell            463
Dendritic cell         419
Misc/Undetermined      278
endothelium            271
fibroblast              91
mast cell               39
Name: cell_type_iatlas, dtype: int64

# File uploads

### h5ad file

In [39]:
qcdata.write('bi_iatlas.h5ad')
file_entity = synapseclient.File('bi_iatlas.h5ad', 'syn59966587')
file_entity = syn.store(file_entity)


##################################################
 Uploading file to Synapse storage 
##################################################



### UMAP coordinates

They are stored in a separate file

In [31]:
umap = pd.read_table("/Users/heimann/Documents/cri_iatlas/htan-dev/Bi_2021/SCP1288/cluster/Final_SCP_ClusterFile.txt")
umap_df = umap.drop(index=0) #first row has metadata about column, exclude it
umap_df

Unnamed: 0,NAME,X,Y,FinalCellType,Initial_Louvain_Cluster,Lineage
1,AAACCTGAGAATAGGG.p55,-6.02505302,-7.725045658,41BB-Hi CD8+ T cell,9,Lymphoid
2,AAACCTGAGGCTAGGT.p55,-6.357606407,-7.995055652,41BB-Hi CD8+ T cell,9,Lymphoid
3,AAACCTGCACTGTGTA.p55,-6.564451214,-8.440553165,41BB-Hi CD8+ T cell,9,Lymphoid
4,AAACCTGCAGTCCTTC.p55,-0.524987217,-3.175035215,MitoHigh T-Helper,2,Lymphoid
5,AAACCTGGTAAATGTG.p55,-5.791819092,-5.585831619,41BB-Lo CD8+ T cell,0,Lymphoid
...,...,...,...,...,...,...
34322,TTGGAACGTGAGGGAG.p916,-5.671237465,-2.7540519,Effector T-Helper,7,Lymphoid
34323,TTGTAGGGTATGAAAC.p916,-6.444504257,-7.248266674,41BB-Lo CD8+ T cell,0,Lymphoid
34324,TTTACTGCACACATGT.p916,-6.708637711,-8.686034656,41BB-Hi CD8+ T cell,9,Lymphoid
34325,TTTGTCAAGAGCAATT.p916,-5.968340393,-2.565747714,Effector T-Helper,7,Lymphoid


In [33]:
#preparing UMAP file to be used in iAtlas
umap_df.index = umap_df["NAME"]
umap_df = umap_df.rename(columns = {"X" : "umap_1", "Y":"umap_2"})
umap_df[["umap_1", "umap_2"]]

Unnamed: 0_level_0,umap_1,umap_2
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1
AAACCTGAGAATAGGG.p55,-6.02505302,-7.725045658
AAACCTGAGGCTAGGT.p55,-6.357606407,-7.995055652
AAACCTGCACTGTGTA.p55,-6.564451214,-8.440553165
AAACCTGCAGTCCTTC.p55,-0.524987217,-3.175035215
AAACCTGGTAAATGTG.p55,-5.791819092,-5.585831619
...,...,...
TTGGAACGTGAGGGAG.p916,-5.671237465,-2.7540519
TTGTAGGGTATGAAAC.p916,-6.444504257,-7.248266674
TTTACTGCACACATGT.p916,-6.708637711,-8.686034656
TTTGTCAAGAGCAATT.p916,-5.968340393,-2.565747714


In [38]:
umap_df[["umap_1", "umap_2"]].to_csv('Bi_2021_umap.tsv', sep='\t', index=True)
file_entity = synapseclient.File('Bi_2021_umap.tsv', 'syn59966566')
file_entity = syn.store(file_entity)

Uploading to Synapse storage: 100%|█| 1.59M/1.59M [00:01<00:00, 1.35MB/s, Bi_202
