### Data download

In [1]:
Download in the geo database

ERROR: Error in parse(text = x, srcfile = src): <text>:1:10: unexpected 'in'
1: Download in
             ^


### Quality control

In [2]:
library(Seurat)
library(dplyr)
library(SingleR)
library(celldex)
RAW_DATA = read.table("../data_download/GSE132465/GSE132465_GEO_processed_CRC_10X_raw_UMI_count_matrix.txt.gz",header = T,row.names = 1)
OBJECT = CreateSeuratObject(RAW_DATA,project = "GSE132465",min.cells = 3,min.features = 200)
saveRDS(OBJECT,"GSE132465.RDS")
OBJECT@meta.data$sample = ifelse(substr(OBJECT@meta.data$orig.ident,7,7)=="T","Tumor","Normal")
Tumor = subset(OBJECT,sample == "Tumor")
saveRDS(Tumor,"GSE132465_Tumor.RDS")
VlnPlot(Tumor,features = c("nCount_RNA","nFeature_RNA","percent.mt"),pt.size = .001)
P1 = FeatureScatter(Tumor,feature1 = "nCount_RNA",feature2 = "nFeature_RNA")
P2 = FeatureScatter(Tumor,feature1 = "nCount_RNA",feature2 = "percent.mt")
Tumor_filter = subset(Tumor, orig.ident != "SMC16.T"  & orig.ident != "SMC18.T" &orig.ident != "SMC21.T" &orig.ident != "SMC22.T" )
grep(pattern = "^MT-",rownames(Tumor),value = T)
Tumor[["percent.mt"]] = PercentageFeatureSet(object = Tumor,pattern = "^MT-")
Tumor_filter = subset(Tumor_filter,subset = nFeature_RNA > 500 & nFeature_RNA <2000 & percent.mt < 10)

ERROR: Error in library(Seurat): there is no package called 'Seurat'


### Process data

In [None]:
library(Seurat)
library(dplyr)
library(SingleR)
library(celldex)
library(ggplot2)
Tumor_filter = NormalizeData(Tumor_filter,normalization.method = "LogNormalize",scale.factor = 10000)
Tumor_filter = FindVariableFeatures(Tumor_filter,selection.method = "vst",nfeatures = 2000)
VariableFeaturePlot(Tumor_filter)
TOP10 = head(VariableFeatures(Tumor_filter),10)
L2 = LabelPoints(V2,points = TOP10,repel = T)
Tumor_filter = ScaleData(Tumor_filter)#默认是基于找到的高变基因做的归一化
SCALE.DATA = Tumor_filter[["RNA"]]@scale.data
dim(SCALE.DATA)
Tumor_filter = RunPCA(Tumor_filter,features = VariableFeatures(Tumor_filter))
DimPlot(Tumor_filter,reduction = "pca")
Tumor_filter = JackStraw(Tumor_filter,num.replicate = 100)
Tumor_filter = ScoreJackStraw(Tumor_filter,dims = 1:20)
J1 = JackStrawPlot(Tumor_filter,dims = 1:20)
E1 =   ElbowPlot(Tumor_filter)
Tumor_filter = FindNeighbors(Tumor_filter,dims = 1:20)
Tumor_filter = FindClusters(Tumor_filter,resolution = 0.5)
Tumor_filter = RunUMAP(Tumor_filter,dims = 1:20)
Tumor_filter = RunTSNE(Tumor_filter,dims = 1:20)

Tumor_filter = readRDS("GSE132465_Tumor_filter_anno.RDS")
theme_global <- theme_bw()+theme(panel.grid.major = element_blank(),panel.grid.minor=element_blank()) + theme(aspect.ratio = 1)
D3 =  DimPlot(Tumor_filter,group.by="seurat_clusters",reduction = "umap",label = T,pt.size = .7)+theme_global
D4 =  DimPlot(Tumor_filter,group.by="seurat_clusters",reduction = "tsne",label = T,pt.size = .7)+theme_global
ggsave("umap_no_label.pdf",D3,width = 7,height = 7)
ggsave("tsne_no_label.pdf",D4,width = 7,height = 7)

Tumor_filter_marker = FindAllMarkers(Tumor_filter,only.pos = F,min.pct = 0,logfc.threshold = 0.25)
Tumor_filter_marker$cluster = as.character(Tumor_filter_marker$cluster)
immu_gene = subset(Tumor_filter_marker,cluster == "T_cells"|cluster =="B_cell" |cluster == "Monocyte"|cluster =="Macrophage" |cluster == "NK_cell")
immu_gene = immu_gene[immu_gene$avg_log2FC>0,]
immu_gene_table = data.frame(Gene = rownames(immu_gene),Type = c(rep("Immune-related marker genes")))
write.csv(Tumor_filter_marker,"Tumor_filter_marker.csv")
write.csv(immu_gene_table,"immu_gene_table.csv")
Tumor_filter_marker %>% group_by(cluster) %>% top_n(n = 10,wt = avg_log2FC) ->TOP10
D5 = DoHeatmap(Tumor_filter)
ggsave("DoHeatmap_10.pdf",D5,width = 12,height = 6)
ref_use <- HumanPrimaryCellAtlasData()
anno <- SingleR(test=as.matrix(Tumor_filter@assays$RNA@data),   
                ref=ref_use,                                
                labels=ref_use$label.main,                  
                clusters = Tumor_filter$seurat_clusters,          
                method = "cluster")    
NEW.CLUSTER.IDS = anno$labels
names(NEW.CLUSTER.IDS) = levels(Tumor_filter)
Tumor_filter[["old.ident"]] = Idents(Tumor_filter)
Tumor_filter = RenameIdents(Tumor_filter,NEW.CLUSTER.IDS)
Tumor_filter[["cluster.label"]] =  Idents(Tumor_filter)
D6 = DimPlot(Tumor_filter, label = T,repel = T,reduction = "umap",pt.size = .7)+theme_global
D7 = DimPlot(Tumor_filter, label = T,repel = T,reduction = "tsne",pt.size = .7)+theme_global
ggsave("umap_label_s.pdf",D6,width = 7,height = 7)
ggsave("tsne_label_s.pdf",D7,width = 7,height = 7)
saveRDS(Tumor_filter,"GSE132465_Tumor_filter.RDS")
plot = list()
id = c("CD3D","CD79A","CD163","LYZ","GNLY")
x = 1
for (i in id) {
p =  FeaturePlot(Tumor_filter,features = c(i),ncol = 3,cols =c("#eeeeee","#8ec2b6"),reduction = "tsne")+theme_global
plot[[x]] = p
x = x + 1
}
library(gridExtra)
pdf("immucell_marker_tsne.pdf",width = 20)
grid.arrange(grobs = plot,ncol=5)
dev.off()

### monocel

In [3]:
library(dplyr)
library(Seurat)
library(patchwork)
library(monocle)
Tumor_filter_anno<- readRDS("../variable_gene_dim_annotation_immugene/GSE132465_Tumor_filter.RDS") 
expr_matrix <- as(as.matrix(Tumor_filter_anno@assays$RNA@counts), 'sparseMatrix')
p_data <- Tumor_filter_anno@meta.data
f_data <- data.frame(gene_short_name = row.names(Tumor_filter_anno),row.names = row.names(Tumor_filter_anno))
pd <- new('AnnotatedDataFrame', data = p_data)
fd <- new('AnnotatedDataFrame', data = f_data)
cds <- newCellDataSet(expr_matrix,phenoData = pd,featureData = fd,lowerDetectionLimit = 0.5,expressionFamily = negbinomial.size())
cds <- estimateSizeFactors(cds)
cds <- estimateDispersions(cds)
express_genes <- VariableFeatures(Tumor_filter_anno)
cds <- setOrderingFilter(cds, express_genes)
plot_ordering_genes(cds)
diff <- differentialGeneTest(cds[expressed_genes,],fullModelFormulaStr="~cluster.label",cores=1)
deg <- subset(diff, qval < 0.01) 
deg <- deg[order(deg$qval,decreasing=F),]
head(deg)
ordergene <- rownames(deg)
cds <- setOrderingFilter(cds, ordergene)
plot_ordering_genes(cds)
cds <- reduceDimension(cds, max_components = 2,method = 'DDRTree')
cds <- orderCells(cds)
P1 = plot_cell_trajectory(cds,color_by="Pseudotime", size=1,show_backbone=TRUE)
P2 = plot_cell_trajectory(cds,color_by="cell_type", size=1,show_backbone=TRUE)
P3 = plot_cell_trajectory(cds, color_by = "State",size=1,show_backbone=TRUE)
P4 = plot_cell_trajectory(cds, color_by = "seurat_clusters")

ERROR: Error in library(dplyr): there is no package called 'dplyr'
