In [3]:
# final_process_seurat.R
library(Seurat)
library(dplyr)
library(readr)

Loading required package: SeuratObject

Loading required package: sp

‘SeuratObject’ was built under R 4.3.0 but the current version is
4.3.3; it is recomended that you reinstall ‘SeuratObject’ as the ABI
for R may have changed


Attaching package: ‘SeuratObject’


The following object is masked from ‘package:base’:

    intersect



Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [3]:
# --- INPUT FILES ---
seurat_path <- "/data/files/merged_seuratobj.rds"
assignments_path <- "/data/files/demuxlet_assignments.txt"
output_path <- "/data/files/matched_seurat_demuxlet.rds"


In [4]:

seurat_obj <- readRDS(seurat_path)

In [5]:
assignments <- read.table(assignments_path, header = TRUE, sep = "", stringsAsFactors = FALSE, fill = TRUE, quote = "\"", check.names = FALSE)

In [6]:
colnames(assignments) <- gsub("\"", "", colnames(assignments))

In [7]:
head(colnames(seurat_obj))
head(assignments)

Unnamed: 0_level_0,Barcode,Individual_Assignment,Demuxlet_droplet_type
Unnamed: 0_level_1,<chr>,<chr>,<chr>
1,S10A_AAACCCAAGCTGCGAA-1,N955/20,SNG
2,S10A_AAACCCAAGGCTCTAT-1,N955/20,SNG
3,S10A_AAACCCAAGTCAAGCG-1,N955/20,SNG
4,S10A_AAACCCAAGTTTGCTG-1,N955/20,DBL
5,S10A_AAACCCACAAGAGAGA-1,N955/20,SNG
6,S10A_AAACCCACAATCGTCA-1,N969/17,SNG


In [8]:
# Clean Seurat barcodes: remove prefix before last '_'
seurat_obj$clean_barcode <- sub(".*_", "", colnames(seurat_obj))

In [9]:
# Clean assignment barcodes: remove prefix before '_'
assignments$Clean_Barcode <- sub("^[^_]+_", "", assignments$Barcode)

In [10]:
# Match barcodes
intersecting_barcodes <- intersect(seurat_obj$clean_barcode, assignments$Clean_Barcode)
cat("Matched", length(intersecting_barcodes), "barcodes\n")

Matched 693196 barcodes


In [11]:
# Filter Seurat object to matched cells only
cat("Filtering Seurat object to common cells...\n")
cells_to_keep <- seurat_obj$clean_barcode %in% intersecting_barcodes
seurat_obj <- seurat_obj[, cells_to_keep]

Filtering Seurat object to common cells...


In [12]:
# Create named vector for metadata
cat("Preparing Individual_ID vector...\n")
barcode_to_individual <- setNames(assignments$Individual_Assignment, assignments$Clean_Barcode)
individual_ids <- barcode_to_individual[seurat_obj$clean_barcode]

Preparing Individual_ID vector...


In [13]:

# Name it with actual Seurat barcodes
names(individual_ids) <- colnames(seurat_obj)

In [14]:
# Add Individual_ID metadata (fast and safe)
cat("Adding metadata...\n")
seurat_obj <- AddMetaData(seurat_obj, metadata = individual_ids, col.name = "Individual_ID")


Adding metadata...


In [15]:
# Sanity check: make sure there are no NAs
na_count <- sum(is.na(seurat_obj$Individual_ID))
cat("NA entries in Individual_ID:", na_count, "\n")
if (na_count > 0) stop("Some cells are missing individual assignments.")

NA entries in Individual_ID: 0 


In [16]:
# Drop the helper column
seurat_obj$clean_barcode <- NULL

In [17]:
cat("🔍 Verifying final Seurat object before saving...\n")

# 1. Preview metadata
cat("First few rows of metadata:\n")
print(head(seurat_obj@meta.data[, c("Individual_ID"), drop = FALSE]))

# 2. Check that number of Individual_IDs matches number of cells
stopifnot(length(seurat_obj$Individual_ID) == ncol(seurat_obj))

# 3. Check for any NA values (should be 0)
na_count <- sum(is.na(seurat_obj$Individual_ID))
cat("Number of NA values in Individual_ID:", na_count, "\n")
if (na_count > 0) stop("❌ Error: NA values found in Individual_ID. Something went wrong.")

# 4. Show number of unique individuals
cat("Number of unique individuals:\n")
print(length(unique(seurat_obj$Individual_ID)))

# 5. Show a frequency table of first few individuals
cat("Counts per individual (top 10):\n")
print(head(table(seurat_obj$Individual_ID), 10))

🔍 Verifying final Seurat object before saving...
First few rows of metadata:
                       Individual_ID
S6A_AAACCCAAGAATCTAG-1       N366/21
S6A_AAACCCAAGACATCAA-1      N1462/18
S6A_AAACCCAAGATGGTCG-1      N1462/18
S6A_AAACCCAAGATGTAGT-1       N366/21
S6A_AAACCCAAGCACTAGG-1      N1462/18
S6A_AAACCCAAGCCTATTG-1       N581/20
Number of NA values in Individual_ID: 0 
Number of unique individuals:
[1] 62
Counts per individual (top 10):

N1001/08 N1014/16 N1024/14 N1066/18 N1084/22 N1146/20 N1174/19 N1216/06 
     762    11204     5599    20077    13036      174    17680     6757 
N1220/20 N1229/20 
    3688    19497 


In [None]:


colnames(seurat_obj@meta.data)

In [19]:
saveRDS(seurat_obj, output_path)
cat("✅ Final Seurat object saved successfully and is ready for eQTL analysis.\n")

✅ Final Seurat object saved successfully and is ready for eQTL analysis.


In [20]:
Assays(seurat_obj)          
DefaultAssay(seurat_obj)

In [21]:
Layers(seurat_obj[["originalexp"]])