KEMRI workshop on single-cell genomics - September 2024

Set up project on B cells pre and post-vaccination

data from 10x

sub-sampling to 3k cells and generating Rds file


https://www.10xgenomics.com/datasets/human-b-cells-from-a-healthy-donor-pre-and-post-flu-vaccination-v-2-2-standard-5-0-0


Questions include:
- What types of B cells do you observe?
- Are B cell states similar pre- and post-vaccination?
- What are the features of BCRs pre- vs post-vaccination?
- Are there any shared BCR clones across the two time points?
- What are the BCR features and gene expression states of expanded BCR clones?

In [None]:
# Start with section to define shell call function and install packages
shell_call <- function(command, ...) {
  result <- system(command, intern = TRUE, ...)
  cat(paste0(result, collapse = "\n"))
}

loadPackages = function(pkgs){
  myrequire = function(...){
    suppressWarnings(suppressMessages(suppressPackageStartupMessages(require(...))))
  }
  ok = sapply(pkgs, require, character.only=TRUE, quietly=TRUE)
  if (!all(ok)){
    message("There are missing packages: ", paste(pkgs[!ok], collapse=", "))
  }
}

## Setup R2U
download.file("https://github.com/eddelbuettel/r2u/raw/master/inst/scripts/add_cranapt_jammy.sh",
              "add_cranapt_jammy.sh")
Sys.chmod("add_cranapt_jammy.sh", "0755")
shell_call("./add_cranapt_jammy.sh")
bspm::enable()
options(bspm.version.check=FALSE)
shell_call("rm add_cranapt_jammy.sh")

In [None]:
## Install the R packages
cranPkgs2Install = c("BiocManager")
install.packages(cranPkgs2Install, ask=FALSE, update=TRUE, quietly=TRUE)
system("sudo apt install libgsl-dev")
BiocManager::install("scRepertoire")
install.packages('Seurat')

In [None]:
# Download a Seurat object containing the gene expression data and metadata for pre- and post-vaccination B cells
shell_call("wget -q --output-document 10x_vax_Bcells.Rds https://www.dropbox.com/scl/fi/tkh3lw2t231roodd74i6q/10x_vax_Bcells.Rds?rlkey=camy5cste95hr2uesgm6rssah&dl=0")

In [None]:
# Download the corresponding BCR contigs
shell_call("wget -q --output-document sc5p_v2_hs_B_prevax_10k_5gex_B_vdj_b_filtered_contig_annotations.csv https://cf.10xgenomics.com/samples/cell-vdj/5.0.0/sc5p_v2_hs_B_prevax_10k_5gex_B/sc5p_v2_hs_B_prevax_10k_5gex_B_vdj_b_filtered_contig_annotations.csv")
shell_call("wget -q --output-document sc5p_v2_hs_B_postvax_10k_5gex_B_vdj_b_filtered_contig_annotations.csv https://cf.10xgenomics.com/samples/cell-vdj/5.0.0/sc5p_v2_hs_B_postvax_10k_5gex_B/sc5p_v2_hs_B_postvax_10k_5gex_B_vdj_b_filtered_contig_annotations.csv")
shell_call("ls")

In [None]:
# Load in Seurat object and begin exploring it
library(Seurat)
bcells = readRDS("10x_vax_Bcells.Rds")

In [None]:
# Explore the Seurat object and make sure it has the expected number of features and samples
head(colnames(bcells))
bcells

In [None]:
# Join layers (one layer was generated for each object)
bcells[["RNA"]] <- JoinLayers(bcells[["RNA"]])
bcells

In [None]:
# Read in the BCR contigs
bcr1 = read.table("sc5p_v2_hs_B_prevax_10k_5gex_B_vdj_b_filtered_contig_annotations.csv",sep=",",header=T)
bcr2 = read.table("sc5p_v2_hs_B_postvax_10k_5gex_B_vdj_b_filtered_contig_annotations.csv",sep=",",header=T)

In [None]:
# Modify cell names in BCR contigs file to match Seurat object cell names
# We will edit the barcode names in the VDJ table to match the Seurat object
bcr1$barcode = paste("S1_",bcr1$barcode,sep="")
bcr2$barcode = paste("S2_",bcr2$barcode,sep="")

In [None]:
# Merge the files into one file that we will use as input to scRepertoire
contig.list <- list(bcr1, bcr2)

In [None]:
# Load scRepertoire
library(scRepertoire)

In [None]:
# One more issue - we have to make sure the cell barcodes are the same in the VDJ table and the Seurat object
# Check the barcode names in the Seurat object
head(colnames(bcells))

In [None]:
# Check the barcode names in the contig list
head(contig.list[[1]]$barcode)

In [None]:
# Now we have matching names between the Seurat object and the BCR VDJ contigs
# One last problem is that we have too many cells to analyze in Google colab. 25 thousand is a lot, so we will sample down to 5 thousand to speed up our analyses.
# We have to be careful to sample the same cells from the Seurat object and the contigs file.
bcell_sample_ids = sample(colnames(bcells),5000,replace=F)
bcells@meta.data$CellName = colnames(bcells)
bcells = subset(bcells, subset = CellName %in% bcell_sample_ids)

In [None]:
# Sample the contig list to a the cells we sampled for the Seurat object
contig.list[[1]] = contig.list[[1]][contig.list[[1]]$barcode %in% bcell_sample_ids,]
contig.list[[2]] = contig.list[[2]][contig.list[[2]]$barcode %in% bcell_sample_ids,]

In [None]:
# Finally we can run combineBCR to make cell annotations and call clones
combined.BCR <- combineBCR(contig.list, samples = c("S1","S2"), threshold = 0.85)

In [None]:
# We have to clean up these barcode names after the combineBCR funtion added an extra "S1_" or "S2_" to each name
combined.BCR$S1$barcode = sub("S1_S1","S1",combined.BCR$S1$barcode)
combined.BCR$S2$barcode = sub("S2_S2","S2",combined.BCR$S2$barcode)

In [None]:
# Now we can integrate the gene expression in the Seurat object with the BCR calls
bcells = combineExpression(combined.BCR,bcells,cloneCall="gene")

In [None]:
# Check the object identities
Idents(object = bcells) <- "treatment"
table(Idents(bcells))

From this point please continue to process the data if necessary and perform analysis to answer the questions described at the top of the notebook. Good luck!