KEMRI Workshop on single-cell genomics - September 2024

Project on PBMC and tumor cells with TCR analysis

data from 10x

Healthy PBMCs
https://www.10xgenomics.com/datasets/human-pbmc-from-a-healthy-donor-10-k-cells-multi-v-2-2-standard-5-0-0


NSCLC tumor
https://www.10xgenomics.com/datasets/nsclc-tumor-1-standard-5-0-0

Melanoma tumor
https://www.10xgenomics.com/datasets/melanoma-tumor-derived-cells-v-1-1-2-standard-4-0-0


Questions include:
- What types of cells do you observe in each tissue?
- Are T cell states similar in PBMCs and tumors? Different in each tumor type?
- What are the features of TCRs in PBMC compared to tumors?
- Are there any shared TCR clones across samples?
- What are the TCR features and gene expression states of expanded TCR clones?

In [None]:
# Start with section to define shell call function and install packages
shell_call <- function(command, ...) {
  result <- system(command, intern = TRUE, ...)
  cat(paste0(result, collapse = "\n"))
}

loadPackages = function(pkgs){
  myrequire = function(...){
    suppressWarnings(suppressMessages(suppressPackageStartupMessages(require(...))))
  }
  ok = sapply(pkgs, require, character.only=TRUE, quietly=TRUE)
  if (!all(ok)){
    message("There are missing packages: ", paste(pkgs[!ok], collapse=", "))
  }
}

## Setup R2U
download.file("https://github.com/eddelbuettel/r2u/raw/master/inst/scripts/add_cranapt_jammy.sh",
              "add_cranapt_jammy.sh")
Sys.chmod("add_cranapt_jammy.sh", "0755")
shell_call("./add_cranapt_jammy.sh")
bspm::enable()
options(bspm.version.check=FALSE)
shell_call("rm add_cranapt_jammy.sh")

In [None]:
## Install the R packages
cranPkgs2Install = c("BiocManager")
install.packages(cranPkgs2Install, ask=FALSE, update=TRUE, quietly=TRUE)
system("sudo apt install libgsl-dev")
BiocManager::install("scRepertoire")
install.packages('Seurat')

In [None]:
# Download a Seurat object containing the gene expression data and metadata for pre- and post-vaccination B cells
shell_call("wget -q --output-document 10x_pbmc_tumor_cells.Rds https://www.dropbox.com/scl/fi/pgdeeupit65139qj4l382/10x_pbmc_tumor_cells.Rds?rlkey=fvnrntlz6082w4pmud8ayd5ve&dl=0")

In [None]:
# Download the corresponding TCR contigs
shell_call("wget -q --output-document sc5p_v2_hs_PBMC_10k_multi_5gex_5fb_b_t_vdj_t_filtered_contig_annotations.csv https://cf.10xgenomics.com/samples/cell-vdj/5.0.0/sc5p_v2_hs_PBMC_10k_multi_5gex_5fb_b_t/sc5p_v2_hs_PBMC_10k_multi_5gex_5fb_b_t_vdj_t_filtered_contig_annotations.csv")
shell_call("wget -q --output-document vdj_v1_hs_nsclc_multi_5gex_t_b_vdj_t_filtered_contig_annotations.csv https://cf.10xgenomics.com/samples/cell-vdj/5.0.0/vdj_v1_hs_nsclc_multi_5gex_t_b/vdj_v1_hs_nsclc_multi_5gex_t_b_vdj_t_filtered_contig_annotations.csv")
shell_call("wget -q --output-document sc5p_v1p1_hs_melanoma_10k_t_filtered_contig_annotations.csv https://cf.10xgenomics.com/samples/cell-vdj/4.0.0/sc5p_v1p1_hs_melanoma_10k/sc5p_v1p1_hs_melanoma_10k_t_filtered_contig_annotations.csv")
shell_call("ls")

In [None]:
# Load in Seurat object and begin exploring it
library(Seurat)
cells = readRDS("10x_pbmc_tumor_cells.Rds")

In [None]:
# Explore the Seurat object and make sure it has the expected number of features and samples
head(colnames(cells))
cells

In [None]:
# Join layers (one layer was generated for each object)
cells[["RNA"]] <- JoinLayers(cells[["RNA"]])
cells

In [None]:
# Read in the TCR contigs
tcr1 = read.table("sc5p_v2_hs_PBMC_10k_multi_5gex_5fb_b_t_vdj_t_filtered_contig_annotations.csv",sep=",",header=T)
tcr2 = read.table("vdj_v1_hs_nsclc_multi_5gex_t_b_vdj_t_filtered_contig_annotations.csv",sep=",",header=T)
tcr3 = read.table("sc5p_v1p1_hs_melanoma_10k_t_filtered_contig_annotations.csv",sep=",",header=T)

In [None]:
# Modify cell names in BCR contigs file to match Seurat object cell names
# We will edit the barcode names in the VDJ table to match the Seurat object
tcr1$barcode = paste("S1_",tcr1$barcode,sep="")
tcr2$barcode = paste("S2_",tcr2$barcode,sep="")
tcr3$barcode = paste("S3_",tcr3$barcode,sep="")

In [None]:
# Merge the files into one file that we will use as input to scRepertoire
contig.list <- list(tcr1, tcr2, tcr3)

In [None]:
# Load scRepertoire
library(scRepertoire)

In [None]:
# One more issue - we have to make sure the cell barcodes are the same in the VDJ table and the Seurat object
# Check the barcode names in the Seurat object
head(colnames(cells))
tail(colnames(cells))

In [None]:
# Check the barcode names in the contig list
head(contig.list[[1]]$barcode)
head(contig.list[[2]]$barcode)
head(contig.list[[3]]$barcode)

In [None]:
# Now we have matching names between the Seurat object and the BCR VDJ contigs
# One last problem is that we have too many cells to analyze in Google colab. 25 thousand is a lot, so we will sample down to 5 thousand to speed up our analyses.
# We have to be careful to sample the same cells from the Seurat object and the contigs file.
cell_sample_ids = sample(colnames(cells),5000,replace=F)
cells@meta.data$CellName = colnames(cells)
cells = subset(cells, subset = CellName %in% cell_sample_ids)

In [None]:
# Sample the contig list to a the cells we sampled for the Seurat object
contig.list[[1]] = contig.list[[1]][contig.list[[1]]$barcode %in% cell_sample_ids,]
contig.list[[2]] = contig.list[[2]][contig.list[[2]]$barcode %in% cell_sample_ids,]
contig.list[[3]] = contig.list[[3]][contig.list[[3]]$barcode %in% cell_sample_ids,]

In [None]:
# Finally we can run combineBCR to make cell annotations and call clones
combined.TCR <- combineTCR(contig.list, samples = c("S1","S2","S3"))

In [None]:
# We have to clean up these barcode names after the combineBCR funtion added an extra "S1_" or "S2_" to each name
combined.TCR$S1$barcode = sub("S1_S1","S1",combined.TCR$S1$barcode)
combined.TCR$S2$barcode = sub("S2_S2","S2",combined.TCR$S2$barcode)
combined.TCR$S3$barcode = sub("S3_S3","S3",combined.TCR$S3$barcode)

In [None]:
# Now we can integrate the gene expression in the Seurat object with the BCR calls
cells = combineExpression(combined.TCR,cells,cloneCall="gene")

In [None]:
# Check the object identities
Idents(object = cells) <- "tissue"
table(Idents(cells))

From this point please continue to process the data if necessary and perform analysis to answer the questions described at the top of the notebook. Good luck!