# Step 0 - prepare your data
Prepare cellphoneDB inputs starting from a seurat object object

In [None]:

library(Seurat)
library(SeuratObject)
library(Matrix)

## 1. Load seurat object
The seurat object contains counts that have been normalized (per cell) and log-transformed. If your data are raw counts, please normalize accordingly

# E10.5

In [None]:
Atlas = readRDS('Atlashumanized_E10.5.Rds')

In [None]:
so <- NormalizeData(Atlas, verbose = FALSE)

In [None]:
so

## 2. Write gene expression in mtx format

In [None]:
# Save normalised counts - NOT scaled!
writeMM(so@assays$RNA@data, file = '/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E10.5/matrix.mtx')
# save gene and cell names
write(x = rownames(so@assays$RNA@data), file = "/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E10.5/features.tsv")
write(x = colnames(so@assays$RNA@data), file = "/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E10.5/barcodes.tsv")

## 3. Generate your meta
In this example, our input is an anndata containing the cluster/celltype information in metadat$'cell_type'

The object also has metadat$'lineage' information wich will be used below for a hierarchical DEGs approach.

In [None]:
table(so@meta.data$CellType)

In [None]:
so@meta.data$Cell = rownames(so@meta.data)
df = so@meta.data[, c('Cell','CellType')]
write.table(df, file ='E10.5_meta.tsv', sep = '\t', quote = F, row.names = F)

## 4. Compute DEGs (optional)

Use Seurat `FindAllMarkers` to compute differentially expressed genes and extract the corresponding data frame `DEGs`.
Here there are three options you may be interested on:
1. Identify DEGs for each cell type (compare cell type vs rest, most likely option) 
2. Identify DEGs for each cell type using a per-lineage hierarchycal approach (compare cell type vs rest in the lineage, such as in endometrium paper Garcia-Alonso et al 2021)

In the endometrium paper (Garcia-Alonso et al 2021) we're interested in the differences within the stromal and epithelial lineages, rather than the commonalities (example, what is specific of epithelials in the glands compared to epithelials in the lumen). The reason is that epithelial and stromal subtypes vary in space and type and thus we wanna extract the subtile differences within the lineage to better understand their differential location/ biological role.


In [None]:
Idents(so) <- so$CellType

In [None]:
# OPTION 1 - compute DEGs for all cell types
# Extract DEGs for each cell_type
 DEGs <- FindAllMarkers(so, 
                        test.use = 'LR', 
                        verbose = F, 
                        only.pos = T, 
                        random.seed = 1, 
                        logfc.threshold = 0.2, 
                        min.pct = 0.1, 
                        return.thresh = 0.05)

In [None]:
 'BMP7' %in% rownames(so@assays$RNA@counts)

In [None]:
DEGs

In [None]:
fDEGs = subset(DEGs, p_val_adj < 0.05 & avg_log2FC > 0.1)

# 1st column = cluster; 2nd column = gene 
fDEGs = fDEGs[, c('cluster', 'gene', 'p_val_adj', 'p_val', 'avg_log2FC', 'pct.1', 'pct.2')] 
write.table(fDEGs, file ='/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E10.5/E10.5_DEGs.tsv', sep = '\t', quote = F, row.names = F)

In [None]:
head(fDEGs)

In [None]:
 'BMP7' %in% rownames(fDEGs)

# E11.5

In [None]:
Atlas = readRDS('Atlashumanized_E11.5.Rds')

In [None]:
Idents(Atlas) <- Atlas@meta.data$DevTP

In [None]:
so <- subset(Atlas, idents = c('E11.5'))

In [None]:
so <- NormalizeData(object = Atlas)

In [None]:
so

## 2. Write gene expression in mtx format

In [None]:
# Save normalised counts - NOT scaled!
writeMM(so@assays$RNA@data, file = '/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E11.5/matrix.mtx')
# save gene and cell names
write(x = rownames(so@assays$RNA@data), file = "/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E11.5/features.tsv")
write(x = colnames(so@assays$RNA@data), file = "/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E11.5/barcodes.tsv")

## 3. Generate your meta
In this example, our input is an anndata containing the cluster/celltype information in metadat$'cell_type'

The object also has metadat$'lineage' information wich will be used below for a hierarchical DEGs approach.

In [None]:
table(so@meta.data$CellType)

In [None]:
so@meta.data$Cell = rownames(so@meta.data)
df = so@meta.data[, c('Cell','CellType')]
write.table(df, file ='/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E11.5/E11.5_meta.tsv', sep = '\t', quote = F, row.names = F)

## 4. Compute DEGs (optional)

Use Seurat `FindAllMarkers` to compute differentially expressed genes and extract the corresponding data frame `DEGs`.
Here there are three options you may be interested on:
1. Identify DEGs for each cell type (compare cell type vs rest, most likely option) 
2. Identify DEGs for each cell type using a per-lineage hierarchycal approach (compare cell type vs rest in the lineage, such as in endometrium paper Garcia-Alonso et al 2021)

In the endometrium paper (Garcia-Alonso et al 2021) we're interested in the differences within the stromal and epithelial lineages, rather than the commonalities (example, what is specific of epithelials in the glands compared to epithelials in the lumen). The reason is that epithelial and stromal subtypes vary in space and type and thus we wanna extract the subtile differences within the lineage to better understand their differential location/ biological role.


In [None]:
Idents(so) <- so$CellType

In [None]:
# OPTION 1 - compute DEGs for all cell types
# Extract DEGs for each cell_type
 DEGs <- FindAllMarkers(so, 
                        test.use = 'LR', 
                        verbose = F, 
                        only.pos = T, 
                        random.seed = 1, 
                        logfc.threshold = 0.2, 
                        min.pct = 0.1, 
                        return.thresh = 0.05)

In [None]:
 'BMP7' %in% rownames(so@assays$RNA@counts)

In [None]:
fDEGs = subset(DEGs, p_val_adj < 0.05 & avg_log2FC > 0.1)

# 1st column = cluster; 2nd column = gene 
fDEGs = fDEGs[, c('cluster', 'gene', 'p_val_adj', 'p_val', 'avg_log2FC', 'pct.1', 'pct.2')] 
write.table(fDEGs, file ='/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E11.5/E11.5_DEGs.tsv', sep = '\t', quote = F, row.names = F)

In [None]:
head(fDEGs)

# E12.5

In [None]:
Atlas = readRDS('Atlashumanized_E12.5.Rds')

In [None]:
Idents(Atlas) <- Atlas@meta.data$DevTP

In [None]:
so <- subset(Atlas, idents = c('E12.5'))

In [None]:
so <- NormalizeData(object = Atlas )

In [None]:
so

## 2. Write gene expression in mtx format

In [None]:
# Save normalised counts - NOT scaled!
writeMM(so@assays$RNA@data, file = '/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E12.5/matrix.mtx')
# save gene and cell names
write(x = rownames(so@assays$RNA@data), file = "/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E12.5/features.tsv")
write(x = colnames(so@assays$RNA@data), file = "/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E12.5/barcodes.tsv")

## 3. Generate your meta
In this example, our input is an anndata containing the cluster/celltype information in metadat$'cell_type'

The object also has metadat$'lineage' information wich will be used below for a hierarchical DEGs approach.

In [None]:
table(so@meta.data$CellType)

In [None]:
so@meta.data$Cell = rownames(so@meta.data)
df = so@meta.data[, c('Cell','CellType')]
write.table(df, file ='/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E12.5/E12.5_meta.tsv', sep = '\t', quote = F, row.names = F)

## 4. Compute DEGs (optional)

Use Seurat `FindAllMarkers` to compute differentially expressed genes and extract the corresponding data frame `DEGs`.
Here there are three options you may be interested on:
1. Identify DEGs for each cell type (compare cell type vs rest, most likely option) 
2. Identify DEGs for each cell type using a per-lineage hierarchycal approach (compare cell type vs rest in the lineage, such as in endometrium paper Garcia-Alonso et al 2021)

In the endometrium paper (Garcia-Alonso et al 2021) we're interested in the differences within the stromal and epithelial lineages, rather than the commonalities (example, what is specific of epithelials in the glands compared to epithelials in the lumen). The reason is that epithelial and stromal subtypes vary in space and type and thus we wanna extract the subtile differences within the lineage to better understand their differential location/ biological role.


In [None]:
Idents(so) <- so$CellType

In [None]:
# OPTION 1 - compute DEGs for all cell types
# Extract DEGs for each cell_type
 DEGs <- FindAllMarkers(so, 
                        test.use = 'LR', 
                        verbose = F, 
                        only.pos = T, 
                        random.seed = 1, 
                        logfc.threshold = 0.2, 
                        min.pct = 0.1, 
                        return.thresh = 0.05)

In [None]:
 'BMP7' %in% rownames(so@assays$RNA@counts)

In [None]:
fDEGs = subset(DEGs, p_val_adj < 0.05 & avg_log2FC > 0.1)

# 1st column = cluster; 2nd column = gene 
fDEGs = fDEGs[, c('cluster', 'gene', 'p_val_adj', 'p_val', 'avg_log2FC', 'pct.1', 'pct.2')] 
write.table(fDEGs, file ='/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E12.5/E12.5_DEGs.tsv', sep = '\t', quote = F, row.names = F)

In [None]:
head(fDEGs)

# E13.5

In [None]:
Atlas = readRDS('/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/Atlashumanized_E13.5.Rds')

In [None]:
Idents(Atlas) <- Atlas@meta.data$DevTP

In [None]:
so <- subset(Atlas, idents = c('E12.5'))

In [None]:
so <- NormalizeData(object = Atlas)

In [None]:
so

## 2. Write gene expression in mtx format

In [None]:
# Save normalised counts - NOT scaled!
writeMM(so@assays$RNA@data, file = '/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E13.5/matrix.mtx')
# save gene and cell names
write(x = rownames(so@assays$RNA@data), file = "/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E13.5/features.tsv")
write(x = colnames(so@assays$RNA@data), file = "/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E13.5/barcodes.tsv")

## 3. Generate your meta
In this example, our input is an anndata containing the cluster/celltype information in metadat$'cell_type'

The object also has metadat$'lineage' information wich will be used below for a hierarchical DEGs approach.

In [None]:
table(so@meta.data$CellType)

In [None]:
so@meta.data$Cell = rownames(so@meta.data)
df = so@meta.data[, c('Cell','CellType')]
write.table(df, file ='/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E13.5/E13.5_meta.tsv', sep = '\t', quote = F, row.names = F)

## 4. Compute DEGs (optional)

Use Seurat `FindAllMarkers` to compute differentially expressed genes and extract the corresponding data frame `DEGs`.
Here there are three options you may be interested on:
1. Identify DEGs for each cell type (compare cell type vs rest, most likely option) 
2. Identify DEGs for each cell type using a per-lineage hierarchycal approach (compare cell type vs rest in the lineage, such as in endometrium paper Garcia-Alonso et al 2021)

In the endometrium paper (Garcia-Alonso et al 2021) we're interested in the differences within the stromal and epithelial lineages, rather than the commonalities (example, what is specific of epithelials in the glands compared to epithelials in the lumen). The reason is that epithelial and stromal subtypes vary in space and type and thus we wanna extract the subtile differences within the lineage to better understand their differential location/ biological role.


In [None]:
Idents(so) <- so$CellType

In [None]:
# OPTION 1 - compute DEGs for all cell types
# Extract DEGs for each cell_type
 DEGs <- FindAllMarkers(so, 
                        test.use = 'LR', 
                        verbose = F, 
                        only.pos = T, 
                        random.seed = 1, 
                        logfc.threshold = 0.2, 
                        min.pct = 0.1, 
                        return.thresh = 0.05)

In [None]:
 'BMP7' %in% rownames(so@assays$RNA@counts)

In [None]:
fDEGs = subset(DEGs, p_val_adj < 0.05 & avg_log2FC > 0.1)

# 1st column = cluster; 2nd column = gene 
fDEGs = fDEGs[, c('cluster', 'gene', 'p_val_adj', 'p_val', 'avg_log2FC', 'pct.1', 'pct.2')] 
write.table(fDEGs, file ='/home/jovyan/researcher_home/Documents/Tom/Atlas/data/Cell-cell_communication/E13.5/E13.5_DEGs.tsv', sep = '\t', quote = F, row.names = F)

In [None]:
head(fDEGs)