### read unfiltered count data from all samples perform initial filtering add metadata save seurat object


In [2]:
library(Seurat)
library(SingleCellExperiment)

## Read HGG data

In [9]:
getwd()
# List the directories containing the sample data
sample_dirs <- list.dirs("./in/HGG/count/", recursive = FALSE)

data_HGG <- list()
# Read metadata file
metadata <- read.delim("./in/HGG/Metadata/metadata.tsv", sep="\t", header=TRUE)

# Iterate over each sample directory
sample_pick = c()
counter = 0
for (i in 1:length(sample_dirs)) {
    # Get sample ID from directory name
    sample_id <- basename(sample_dirs[i])
    
    # Construct path to RDS file
    rds_file <- file.path(sample_dirs[i], paste0(gsub("SCPCS", "SCPCL", sample_id), "_unfiltered.rds"))
    
    # Read the RDS file and get counts
    sce_obj <- readRDS(rds_file)
    counts_matrix <- counts(sce_obj)
    
    # Create Seurat object from counts
    seurat_obj <- CreateSeuratObject(counts = counts_matrix, 
                                    project = sample_id,
                                    min.cells = 3,
                                    min.features = 200)
    
    
    # Get metadata for this sample
    sample_metadata <- metadata[metadata$scpca_sample_id == sample_id,]
    
    # Add metadata columns to Seurat object
    seurat_obj$sample_name <- sample_id
    seurat_obj$diagnosis <- sample_metadata$diagnosis
    seurat_obj$age <- sample_metadata$age
    seurat_obj$sex <- sample_metadata$sex
    seurat_obj$tissue_location <- sample_metadata$tissue_location
    
    # Add to list
    if (unique(seurat_obj@meta.data$diagnosis)== "Glioblastoma"){
        counter = counter + 1
        print(sample_id)
        sample_pick <- c(sample_pick, sample_id)
        data_HGG[[counter]] <- seurat_obj
    }
    
}
print(sample_pick)

# Print the Seurat objects to check
#sample_names <- basename(sample_dirs)
#sample_names <- gsub("Sample_", "", x = sample_names)
#sample_names <- gsub("_\\d+$", "", sample_names, perl = T)

names(data_HGG) <- sample_pick
print("=================sample picks ================================")
print(names(data_HGG))
# Print final list of Seurat objects
print("Final list of Seurat objects:")
for(name in names(data_HGG)) {
    print(paste0("Sample ", name, unique(data_HGG[[name]]@meta.data$diagnosis)))
}

[1] "SCPCS000002"
[1] "SCPCS000003"
[1] "SCPCS000006"
[1] "SCPCS000007"
[1] "SCPCS000008"
[1] "SCPCS000009"
[1] "SCPCS000010"
[1] "SCPCS000013"
[1] "SCPCS000014"
[1] "SCPCS000015"
[1] "SCPCS000017"
[1] "SCPCS000018"
[1] "SCPCS000019"
[1] "SCPCS000020"
[1] "SCPCS000022"
[1] "SCPCS000023"
 [1] "SCPCS000002" "SCPCS000003" "SCPCS000006" "SCPCS000007" "SCPCS000008"
 [6] "SCPCS000009" "SCPCS000010" "SCPCS000013" "SCPCS000014" "SCPCS000015"
[11] "SCPCS000017" "SCPCS000018" "SCPCS000019" "SCPCS000020" "SCPCS000022"
[16] "SCPCS000023"
 [1] "SCPCS000002" "SCPCS000003" "SCPCS000006" "SCPCS000007" "SCPCS000008"
 [6] "SCPCS000009" "SCPCS000010" "SCPCS000013" "SCPCS000014" "SCPCS000015"
[11] "SCPCS000017" "SCPCS000018" "SCPCS000019" "SCPCS000020" "SCPCS000022"
[16] "SCPCS000023"
[1] "Final list of Seurat objects:"
[1] "Sample SCPCS000002Glioblastoma"
[1] "Sample SCPCS000003Glioblastoma"
[1] "Sample SCPCS000006Glioblastoma"
[1] "Sample SCPCS000007Glioblastoma"
[1] "Sample SCPCS000008Glioblastoma"
[1]

## Read LGG data

In [14]:
getwd()
# List the directories containing the sample data
sample_dirs <- list.dirs("./in/LGG/count/", recursive = FALSE)

data_LGG <- list()
# Read metadata file
metadata <- read.delim("./in/LGG/Metadata/metadata.tsv", sep="\t", header=TRUE)

# Iterate over each sample directory
for (i in 1:length(sample_dirs)) {
    # Get sample ID from directory name
    sample_id <- basename(sample_dirs[i])
    
    # Construct path to RDS file
    rds_file <- file.path(sample_dirs[i], paste0(gsub("SCPCS", "SCPCL", sample_id), "_unfiltered.rds"))
    
    # Read the RDS file and get counts
    sce_obj <- readRDS(rds_file)
    counts_matrix <- counts(sce_obj)
    
    # Create Seurat object from counts
    seurat_obj <- CreateSeuratObject(counts = counts_matrix, 
                                    project = sample_id,
                                    min.cells = 3,
                                    min.features = 200)
        
    # Print object info
    print(seurat_obj)
    print(paste0("Class of object: ", class(seurat_obj)))

    
    # Get metadata for this sample
    sample_metadata <- metadata[metadata$scpca_sample_id == sample_id,]
    
    # Add metadata columns to Seurat object
    seurat_obj$sample_name <- sample_id
    seurat_obj$diagnosis <- sample_metadata$diagnosis
    seurat_obj$age <- sample_metadata$age
    seurat_obj$sex <- sample_metadata$sex
    seurat_obj$tissue_location <- sample_metadata$tissue_location
    
    # Add to list
    data_LGG[[i]] <- seurat_obj
}

# Print the Seurat objects to check
sample_names <- basename(sample_dirs)
sample_names <- gsub("Sample_", "", x = sample_names)
sample_names <- gsub("_\\d+$", "", sample_names, perl = T)

names(data_LGG) <- sample_names

# Print final list of Seurat objects
print("Final list of Seurat objects:")
for(i in 1:length(data_LGG)) {
    print(paste0("Sample ", names(data_LGG)[i], ":"))
    print(data_LGG[[i]])
}

An object of class Seurat 
28211 features across 3741 samples within 1 assay 
Active assay: RNA (28211 features, 0 variable features)
 1 layer present: counts
[1] "Class of object: Seurat"
An object of class Seurat 
26159 features across 2437 samples within 1 assay 
Active assay: RNA (26159 features, 0 variable features)
 1 layer present: counts
[1] "Class of object: Seurat"
An object of class Seurat 
23183 features across 5582 samples within 1 assay 
Active assay: RNA (23183 features, 0 variable features)
 1 layer present: counts
[1] "Class of object: Seurat"
An object of class Seurat 
28027 features across 4230 samples within 1 assay 
Active assay: RNA (28027 features, 0 variable features)
 1 layer present: counts
[1] "Class of object: Seurat"
An object of class Seurat 
25396 features across 4902 samples within 1 assay 
Active assay: RNA (25396 features, 0 variable features)
 1 layer present: counts
[1] "Class of object: Seurat"
An object of class Seurat 
21576 features across 1117 s

In [10]:
#data <- c(data_HGG, data_LGG)
#names(data)
data <- data_HGG

In [11]:
# Create output directory if it doesn't exist
dir.create("out", showWarnings = FALSE)

# Save each Seurat object as RDS file
for(name in names(data)) {
    sample_name <- name
    output_file <- file.path("out", paste0(sample_name, ".rds"))
    saveRDS(data[[name]], file = output_file)
}


In [1]:
library(Seurat)

Loading required package: SeuratObject

Loading required package: sp

'SeuratObject' was built with package 'Matrix' 1.7.0 but the current
version is 1.7.1; it is recomended that you reinstall 'SeuratObject' as
the ABI for 'Matrix' may have changed


Attaching package: 'SeuratObject'


The following objects are masked from 'package:base':

    intersect, t




In [12]:
# Define the base directory containing the data
base_dir <- "./in/GSE271379_RAW"

# List of sample prefixes
samples <- c("GSM8375832_GBM1_L", 
             "GSM8375833_GBM3_Y", 
             "GSM8375834_GBM4_W", 
             "GSM8375835_GBM5_L", 
             "GSM8375836_GBM6_X")

# Initialize an empty list to store Seurat objects
seurat_list <- list()

# Loop through each sample and read data
for (sample in samples) {
  # Construct the full path for the sample
  sample_path <- file.path(base_dir, sample)
  print(sample_path)
  
  # Read the 10X data
  data <- Read10X(data.dir = sample_path)
  
  # Create a Seurat object
  seurat_obj <- CreateSeuratObject(counts = data, project = sample)

  seurat_obj$sample_name <- sample
  seurat_obj$diagnosis <- "Glioblastoma"
  seurat_obj$age <- "NA"
  seurat_obj$sex <- "NA"
  seurat_obj$tissue_location <- "NA"
  
  print(unique(seurat_obj$diagnosis))
  # Store the Seurat object in the list
  seurat_list[[sample]] <- seurat_obj
}

names(seurat_list) <- samples

# Print summary of the list
print(seurat_list)


[1] "./in/GSE271379_RAW/GSM8375832_GBM1_L"


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


[1] "Glioblastoma"
[1] "./in/GSE271379_RAW/GSM8375833_GBM3_Y"


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


[1] "Glioblastoma"
[1] "./in/GSE271379_RAW/GSM8375834_GBM4_W"


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


[1] "Glioblastoma"
[1] "./in/GSE271379_RAW/GSM8375835_GBM5_L"


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


[1] "Glioblastoma"
[1] "./in/GSE271379_RAW/GSM8375836_GBM6_X"


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


[1] "Glioblastoma"
$GSM8375832_GBM1_L
An object of class Seurat 
33694 features across 5148 samples within 1 assay 
Active assay: RNA (33694 features, 0 variable features)
 1 layer present: counts

$GSM8375833_GBM3_Y
An object of class Seurat 
33694 features across 11473 samples within 1 assay 
Active assay: RNA (33694 features, 0 variable features)
 1 layer present: counts

$GSM8375834_GBM4_W
An object of class Seurat 
33694 features across 14451 samples within 1 assay 
Active assay: RNA (33694 features, 0 variable features)
 1 layer present: counts

$GSM8375835_GBM5_L
An object of class Seurat 
33694 features across 5919 samples within 1 assay 
Active assay: RNA (33694 features, 0 variable features)
 1 layer present: counts

$GSM8375836_GBM6_X
An object of class Seurat 
33694 features across 4337 samples within 1 assay 
Active assay: RNA (33694 features, 0 variable features)
 1 layer present: counts



In [13]:
# Save each Seurat object as RDS file
for(i in 1:length(seurat_list)) {
    sample_name <- names(seurat_list)[i]
    output_file <- file.path("out", paste0(sample_name, ".rds"))
    print(unique(seurat_list[[i]]@meta.data$diagnosis))
    saveRDS(seurat_list[[i]], file = output_file)
    cat("---------------------------------\n")
    cat(sample_name, nrow(seurat_list[[i]]), ncol(seurat_list[[i]]))
}

[1] "Glioblastoma"
---------------------------------
GSM8375832_GBM1_L 33694 5148[1] "Glioblastoma"
---------------------------------
GSM8375833_GBM3_Y 33694 11473[1] "Glioblastoma"
---------------------------------
GSM8375834_GBM4_W 33694 14451[1] "Glioblastoma"
---------------------------------
GSM8375835_GBM5_L 33694 5919[1] "Glioblastoma"
---------------------------------
GSM8375836_GBM6_X 33694 4337

In [20]:
names(seurat_list)