# 06 Signatures RPE

This notebook details procedure for the generation of Runx3 signatures.

## Initialization

In [1]:
library(tidyr)
library(dplyr)

# Set working directory
setwd("/home/dalbao/2023-012-Runx3mutD8scRNA/AlbaoRunx3Manuscript/single_cell/06_signatures")

# Import gVr differential expression results
degs <- read.csv("../../csv/04_25-12-07-22-56_RPE_gVr-diffExp.csv")


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




## Differential Expression Statistics

In [2]:
head(degs)

Unnamed: 0_level_0,group,names,scores,logfoldchanges,pvals,pvals_adj
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
1,Base,Ly6c2,34.71459,0.8801856,4.745545999999999e-264,8.781633e-260
2,Base,Cd8b1,32.65403,0.4494108,7.0239e-234,6.498863999999999e-230
3,Base,Itgb1,30.9582,1.1651016,1.970419e-210,9.115651e-207
4,Base,Ptpn18,30.363,0.6253648,1.692332e-202,6.263321e-199
5,Base,Zeb2,28.66868,1.620922,9.378099000000001e-181,2.4791670000000003e-177
6,Base,Sp100,27.27088,0.544814,9.397796999999999e-164,1.932291e-160


In [3]:
# Check the number of upregulated genes per group with logfoldchanges > 0
table(degs$group[degs$pvals_adj < 0.05 & degs$logfoldchanges > 0])
table(degs$group[degs$pvals_adj < 0.05 & degs$logfoldchanges > 1])


 Base    d5    d8 Naive  Null    WT 
  609  5618  1151  1843  1038  1620 


 Base    d5    d8 Naive  Null    WT 
    6  1328    31   841    71   228 

In [4]:
# Check the number of upregulated genes per group with logfoldchanges < 0
table(degs$group[degs$pvals_adj < 0.05 & degs$logfoldchanges < 0])
table(degs$group[degs$pvals_adj < 0.05 & degs$logfoldchanges < -1])


 Base    d5    d8 Naive  Null    WT 
 1900   586   294  1736  2522  2073 


 Base    d5    d8 Naive  Null    WT 
   91   105     8   716   256   225 

## Define Signatures

In [5]:
# Define signigicant genes as those with adjusted p-value < 0.05 and absolute logfoldchange > 1
sig_degs <- degs %>%
    filter(pvals_adj < 0.05 & abs(logfoldchanges) > 1)

# Upregulated genes per group
upregulated_genes <- sig_degs %>%
    filter(logfoldchanges > 0)

# Downregulated genes per group
downregulated_genes <- sig_degs %>%
    filter(logfoldchanges < 0)

In [6]:
# Select genes unique to each group for signatures
get_unique_genes <- function(df) {
    df %>%
    group_by(group) %>%
    filter(!names %in% df$names[df$group != first(group)]) %>%
    ungroup()
}

# Get signature genes
up_sig <- get_unique_genes(upregulated_genes)
dn_sig <- get_unique_genes(downregulated_genes)

In [7]:
table(up_sig$group)
table(dn_sig$group)


 Base    d5    d8 Naive  Null    WT 
    3  1200     7   713    61   168 


 Base    d5 Naive  Null    WT 
    8    60   595   139   151 

In [8]:
# Keep only group %in% c(WT, Null)
up_sig <- up_sig %>%
    filter(group %in% c("WT", "Null"))
dn_sig <- dn_sig %>%
    filter(group %in% c("WT", "Null"))

# In up_sig, rename WT to Runx3OE and Null to Runx3KD
up_sig <- up_sig %>%
    mutate(group = recode(group, "WT" = "Albao_Runx3OE", "Null" = "Albao_Runx3KD"))
dn_sig <- dn_sig %>%
    mutate(group = recode(group, "WT" = "Albao_Runx3OE_Down", "Null" = "Albao_Runx3KD_Down"))

# Keep only names and group columns
# Rename as gs_name and gene_symbol
up_sig <- up_sig %>%
    select(gs_name = group, gene_symbol = names)
dn_sig <- dn_sig %>%
    select(gs_name = group, gene_symbol = names)

# Combine up and down signatures
all_sig <- bind_rows(up_sig, dn_sig)

# Table of signature sizes
table(all_sig$gs_name)

# Write to file
write.csv(  all_sig,
            file = "../../signatures/RPE_signatures.csv",
            row.names = FALSE,
            quote = FALSE)


     Albao_Runx3KD Albao_Runx3KD_Down      Albao_Runx3OE Albao_Runx3OE_Down 
                61                139                168                151 