# Comparison of cell plotting with all variable genes or only those genes that have a recognized symbol (UMAP, R)

In [None]:
library('Seurat')
library('biomaRt')

In [None]:
seurat.obj<-readRDS('scaled_integrated.rds')

In [None]:
residuals<-GetAssayData(object = seurat.obj, slot = "scale.data")

In [None]:
variable_genes<-rownames(residuals)

In [None]:
ensembl = useMart("ensembl",dataset="mfascicularis_gene_ensembl")

In [None]:
hgnc_symbol<-getBM(attributes=c('ensembl_gene_id','hgnc_symbol'), 
                       filters = 'hgnc_symbol', 
                       values = variable_genes, 
                       mart =ensembl)

In [None]:
print(paste('N variable genes:',length(variable_genes)))
print(paste('N variable genes with recognized symbol:', dim(hgnc_symbol)[1]))

In [None]:
all_genes<-read.table('passedQC_genes.tsv',sep='\t',header=TRUE)$rownames

In [None]:
hgnc_symbol_all<-getBM(attributes=c('hgnc_symbol'), 
                       filters = 'hgnc_symbol', 
                       values = all_genes, 
                       mart =ensembl)

In [None]:
print(paste('N all genes that passed QC:',length(all_genes)))
print(paste('N all genes that passed QC with recognized symbol:', dim(hgnc_symbol_all)[1]))

# Comparison of plots with all/ensembl ID genes

In [None]:
options(repr.plot.width=15, repr.plot.height=7)

In [None]:
DimPlot(seurat.obj,group.by=c('region','cell_type'))

In [None]:
seurat.obj_EID<-RunPCA(seurat.obj,verbose=FALSE,npcs = 100,features=hgnc_symbol$hgnc_symbol)
seurat.obj_EID<-RunUMAP(seurat.obj_EID,dims=1:16,verbose=FALSE,min.dist=0.1)
DimPlot(seurat.obj_EID,group.by=c('region','cell_type'))

Smaller number of used features (genes) leads to reduced resolution of UMAP plots.