# load data

In [None]:
obj.NSCLC <- readRDS('/project/sex_cancer/data/NSCLC_Salcher2022/NSCLC_Salcher2022_coreAtlas.rds')
obj.NSCLC <- UpdateSeuratObject(obj.NSCLC)

# filter sample

In [None]:
obj.NSCLC <- obj.NSCLC %>% 
                    subset(Sex %in% c('female', 'male')) %>% 
                    subset(tissue == 'lung') %>% 
                    subset(origin != 'nan') %>%
                    subset(platform == '10x') %>%
                    subset(disease != 'chronic obstructive pulmonary disease')
length(unique(obj.NSCLC$sample))
table(obj.NSCLC$study)

In [None]:
## filter meta.data
keep_meta <- c('sample', 'Sex', 'Age', 'oCT', 'mCT', 
               'donor_id', 'disease', 'origin', 'uicc_stage', 'tumor_stage', 'cell_type_major', 'cell_type', 'cell_type_tumor', 
               'dataset', 'study', 
               'EGFR_mutation', 'TP53_mutation', 'ALK_mutation', 'BRAF_mutation', 'ERBB2_mutation', 'KRAS_mutation', 'ROS_mutation', 'ever_smoker')
obj.NSCLC@meta.data <- obj.NSCLC@meta.data %>%
                              dplyr::select(keep_meta) %>%
                              transform(barcode = rownames(.))

# modify meta.data

In [None]:
## de-factor
obj.NSCLC@meta.data <-obj.NSCLC@meta.data %>% 
                      mutate_if(~ !is.numeric(.), ~ ext_list(.)) %>%
                      dplyr::rename(c('SampleID' = 'sample', 'Chemistry' = 'assay', 'DonorID' = 'donor_id')) %>% 
                      transform(Sex = ifelse(Sex == 'female', 'F', 'M')) %>%
                      mutate(SampleType = case_when(origin == 'normal' ~ 'normal',
                                                    origin == 'normal_adjacent' ~ 'normal_adjacent',
                                                    origin == 'tumor_primary' ~ 'tumor',
                                                    TRUE ~ 'Others')) %>%
                      transform(Cohort = 'NSCLC_Salcher2022') 

# cell type annotation

## assign mCT

In [None]:
obj.NSCLC@meta.data  <- obj.NSCLC@meta.data %>%
                        mutate(mCT = case_when(cell_type_major %in% c('Tumor cells') ~ 'Tumor',
                                               cell_type_major %in% c('Alveolar cell type 1', 'Ciliated', 'Club', 'transitional club/AT2', 'Alveolar cell type 2') ~ 'Epi',
                                               cell_type_major %in% c('T cell CD8') ~ 'CD8T', 
                                               cell_type_major %in% c('T cell CD4') ~ 'CD4T',
                                               cell_type_major %in% c('T cell regulatory') ~ 'Treg',
                                               cell_type_major %in% c('NK cell') ~ 'NK', 
                                               cell_type_major %in% c('B cell', 'Plasma cell') ~ 'B',
                                               cell_type_major %in% c('Monocyte') ~ 'Mono',
                                               cell_type_major %in% c('Macrophage alveolar', 'Macrophage') ~ 'Mph',
                                               cell_type_major %in% c('cDC2', 'cDC1', 'pDC', 'DC mature') ~ 'DC', 
                                               cell_type_major %in% c('Neutrophils') ~ 'Neu',
                                               cell_type_major %in% c('Mast cell') ~ 'Mast', 
                                               cell_type_major %in% c('Endothelial cell') ~ 'Endo', 
                                               cell_type_tumor %in% c('Fibroblast adventitial', 'Fibroblast alveolar', 'Fibroblast peribronchial') ~ 'Fibro',
                                               cell_type_tumor %in% c('Pericyte') ~ 'Pericyte',
                                               cell_type_tumor %in% c('Mesothelial') ~ 'Mesothelial',
                                               cell_type_tumor %in% c('Smooth muscle cell') ~ 'SMC',
                                               cell_type_major %in% c('other') ~ 'Others', 
                                             )) %>%
                         transform(dCT = oCT)
head(obj.NSCLC@meta.data, n  = 2)

## assign gCT

In [None]:
obj.NSCLC@meta.data  <- obj.NSCLC@meta.data %>%
                        mutate(gCT = case_when(mCT %in% c('Tumor', 'Epi') ~ 'Tumor',
                                               mCT %in% c('Neu', 'Mast', 'CD8T', 'CD4T', 'NK', 'DC', 'B', 'Treg', 'Mono', 'Mph') ~ 'Immune',
                                               mCT %in% c('Pericyte', 'SMC', 'Mesothelial', 'Fibro', 'Endo') ~ 'Stromal',
                                               TRUE ~ 'Others', 
                                             ))
head(obj.NSCLC@meta.data, n  = 2)

# ENSG to symbol

In [None]:
table(rownames(obj.NSCLC@assays$RNA@counts) == rownames(obj.NSCLC@assays$RNA@data))
trans <- obj.NSCLC@assays$RNA@meta.features %>% 
         dplyr::select(c('feature_type', 'feature_name')) %>% 
         .[rownames(obj.NSCLC@assays$RNA@counts),] %>% 
         rownames_to_column('feature')
trans %>% head(n = 2)

## rename counts
rownames(obj.NSCLC@assays$RNA@counts) <- trans$feature_name
## rename data
rownames(obj.NSCLC@assays$RNA@data) <- trans$feature_name
## rename meta.feature
obj.NSCLC@assays$RNA@meta.features <- trans %>% column_to_rownames('feature_name')

# save

In [None]:
saveRDS(obj.NSCLC, 'obj.NSCLC.use.rds')