In [None]:
%%bash
R --vanilla << 'EOF'
library(qs)
library(Seurat)
library(Matrix)

# Read the Seurat object
seurat <- qread("/home/gdallagl/myworkdir/XDP/data/_old/XDP_striatum_RNA_final_051925.qs")

# Extract and save just the metadata
metadata <- seurat@meta.data
write.csv(metadata, "/home/gdallagl/myworkdir/XDP/data/_old/XDP_striatum_RNA_final_051925_metadata.csv", row.names = TRUE)

# Extract raw counts (RNA assay)
counts <- GetAssayData(seurat, assay = "RNA", layer = "counts")

# Save as sparse MatrixMarket format (recommended for large data)
writeMM(counts, "/home/gdallagl/myworkdir/XDP/data/_old/XDP_striatum_RNA_final_051925_counts.mtx")
write.table(rownames(counts),
            "/home/gdallagl/myworkdir/XDP/data/_old/XDP_striatum_RNA_final_051925_genes.tsv",
            quote = FALSE, row.names = FALSE, col.names = FALSE)
write.table(colnames(counts),
            "/home/gdallagl/myworkdir/XDP/data/_old/XDP_striatum_RNA_final_051925_barcodes.tsv",
            quote = FALSE, row.names = FALSE, col.names = FALSE)

In [None]:
import scanpy as sc
import pandas as pd
from scipy.io import mmread

# Paths
counts_mtx = "/home/gdallagl/myworkdir/XDP/data/_old/XDP_striatum_RNA_final_051925_counts.mtx"
genes_tsv  = "/home/gdallagl/myworkdir/XDP/data/_old/XDP_striatum_RNA_final_051925_genes.tsv"
barcodes_tsv = "/home/gdallagl/myworkdir/XDP/data/_old/XDP_striatum_RNA_final_051925_barcodes.tsv"
metadata_csv = "/home/gdallagl/myworkdir/XDP/data/_old/XDP_striatum_RNA_final_051925_metadata.csv"

# Load counts (cells x genes)
X = mmread(counts_mtx).T.tocsr()

# Load gene and cell names
genes = pd.read_csv(genes_tsv, header=None)[0].values
cells = pd.read_csv(barcodes_tsv, header=None)[0].values

# Create AnnData
adata = sc.AnnData(X=X)
adata.var_names = genes
adata.obs_names = cells

# Load and attach metadata
metadata = pd.read_csv(metadata_csv, index_col=0)
adata.obs = metadata.loc[adata.obs_names]

# Save
adata.write_h5ad(
    "/home/gdallagl/myworkdir/XDP/data/_old/XDP_striatum_RNA_final_051925_raw.h5ad"
)


In [None]:
adata.obs.donor_id.unique()
adata.obs.region.unique()
adata.obs.repeat_length.unique()
adata.obs.age_of_onset.unique()
adata.obs.age_of_death.unique()
adata.obs.disease_duration.unique()
adata.obs["immediate.cause.of.death"].unique()
adata.obs.infection_related_death.unique()
adata.obs.sex.unique()
adata.obs.condition.unique()


adata.obs.columns



table = (
    adata.obs
    .groupby(["infection_related_death", "condition"])["donor_id"]
    .nunique()
    .unstack(fill_value=0) 
)

table