# Annotating GRN

The goal here is to create a fully annotated network that covers the full breadth of information that we are integrating with the data. This will simplify future analysis and allow us to more easily interact with the datasets.

In [1]:
library(pacman)
p_load(tidyverse, igraph, annotables)

In [2]:
out <- "./MLL-AF4/"
dir.create(out, F)

## Importing network

In [3]:
# Network
edges <- suppressMessages(read_tsv("../GRN_Creation/Networks/SEM_MLL-AF4-Network_edges.txt"))
nodes <- suppressMessages(read_tsv("../GRN_Creation/Networks/SEM_MLL-AF4-Network_nodes.txt", col_types = "cc")) %>% 
  distinct(entrez, .keep_all = T)


### Setting up functions

In [4]:
gene_degree <- function(g){
    require(igraph)
    require(tidyverse)
    
    x <- g %>% igraph::degree()  
    data.frame(entrez = names(x), degree = x) %>% 
        dplyr::arrange(desc(degree))
}

In [5]:
gene_stress <- function(g){
    require(igraph)
    require(sna)
    require(tidyverse)
    
    m <- g %>% as_adjacency_matrix(names = TRUE, sparse = FALSE)
    val <- m %>% stresscent()
    data.frame(entrez = colnames(m), stress = val)
}

## Annotate centralities

In [6]:
# Degree centrality
centralities <- graph_from_data_frame(edges, directed = T, vertices = nodes) %>% 
    gene_degree()

nodes_ann <- nodes %>%
    left_join(centralities, by="entrez")

# Stress centrality
centralities <- graph_from_data_frame(edges, directed = T, vertices = nodes) %>% 
    gene_stress()

nodes_ann <- nodes_ann %>%
    left_join(centralities, by="entrez") %>% 
    data.frame()

Loading required package: sna

Loading required package: statnet.common


Attaching package: ‘statnet.common’


The following object is masked from ‘package:base’:

    order


Loading required package: network

network: Classes for Relational Data
Version 1.16.0 created on 2019-11-30.
copyright (c) 2005, Carter T. Butts, University of California-Irvine
                    Mark S. Handcock, University of California -- Los Angeles
                    David R. Hunter, Penn State University
                    Martina Morris, University of Washington
                    Skye Bender-deMoll, University of Washington
 For citation information, type citation("network").
 Type help("network-package") to get started.



Attaching package: ‘network’


The following objects are masked from ‘package:igraph’:

    %c%, %s%, add.edges, add.vertices, delete.edges, delete.vertices,
    get.edge.attribute, get.edges, get.vertex.attribute, is.bipartite,
    is.directed, list.edge.attributes, list.vertex

In [7]:
head(nodes_ann)

Unnamed: 0_level_0,entrez,symbol,degree,stress
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>
1,1958,EGR1,464,29049
2,2355,FOSL2,217,7404
3,405,ARNT,559,7646
4,4609,MYC,1154,89851
5,6239,RREB1,319,15594
6,6772,STAT1,404,21662


## Annotate knockdown RNAseq datasets

In [8]:
rna.list <- list(
    mllaf4 = data.frame(suppressMessages(read_tsv("../data/RNAseq_tables/MLLAF4_KD/contrast_SIMM_vs_SIMA6_counts.tsv"))) %>%
        select(Geneid,logFC,FDR) %>%
        rename(symbol = Geneid, MLL.AF4.logFC = logFC, MLL.AF4.FDR = FDR),
    runx1 = data.frame(suppressMessages(read_tsv("../data/RNAseq_tables/RUNX1KD_and_UNC1999/contrast_NT_NT_vs_RUNX1KD_NT_counts.tsv"))) %>%
        select(Geneid,logFC,FDR) %>%
        rename(symbol = Geneid, RUNX1.logFC = logFC, RUNX1.FDR = FDR),
    unc1999 = data.frame(suppressMessages(read_tsv("../data/RNAseq_tables/RUNX1KD_and_UNC1999/contrast_NT_NT_vs_NT_UNC_counts.tsv"))) %>%
        select(Geneid,logFC,FDR) %>%
        rename(symbol = Geneid, UNC1999.logFC = logFC, UNC1999.FDR = FDR),
    epz = data.frame(suppressMessages(read_tsv("../data/RNAseq_tables/EPZ/contrast_0umEPZ_vs_2umEPZ_counts.tsv"))) %>%
        select(Geneid,logFC,FDR) %>%
        rename(symbol = Geneid, EPZ.logFC = logFC, EPZ.FDR = FDR),
    ibet = data.frame(suppressMessages(read_tsv("../data/RNAseq_tables/IBET/contrast_DMSO_vs_IBET-1HR_counts.tsv"))) %>%
        select(Geneid,logFC,FDR) %>%
        rename(symbol = Geneid, IBET.logFC = logFC, IBET.FDR = FDR)
)

In [9]:
for(i in 1:length(rna.list)){
    nodes_ann <- left_join(nodes_ann, rna.list[[i]], by="symbol")
}
nodes_ann <- nodes_ann %>%
    distinct()

In [10]:
head(nodes_ann)

Unnamed: 0_level_0,entrez,symbol,degree,stress,MLL.AF4.logFC,MLL.AF4.FDR,RUNX1.logFC,RUNX1.FDR,UNC1999.logFC,UNC1999.FDR,EPZ.logFC,EPZ.FDR,IBET.logFC,IBET.FDR
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,1958,EGR1,464,29049,0.9345442,6.73529e-16,-1.0546494,4.56384e-09,0.45937833,0.02877195,-0.7559783,1.372324e-05,1.7365483,1.391195e-46
2,2355,FOSL2,217,7404,-0.5205269,4.7314140000000005e-25,-0.4224753,6.22177e-07,0.09406331,0.4265643,-0.2676087,0.1203066,-0.2068572,0.0008249632
3,405,ARNT,559,7646,0.2374961,0.0004145152,0.1046306,0.2771233,-0.08307452,0.476032,0.2652957,0.1605366,-0.3973892,3.360668e-06
4,4609,MYC,1154,89851,-0.2487874,4.125718e-05,-0.1012395,0.4123907,-0.87270226,1.582076e-18,-0.8124231,6.623918e-08,-1.5108634,2.265482e-127
5,6239,RREB1,319,15594,0.25788,6.073601e-06,0.1526835,0.03948804,0.21473278,0.004552598,-0.187635,0.2789407,-0.2671828,1.356396e-05
6,6772,STAT1,404,21662,0.2396495,0.0006305926,-0.1175383,0.1256426,0.1924111,0.01124953,-0.2185313,0.2135825,0.6186133,2.332276e-25


## Annotate CRISPR screen (Tzlepis et al 2016, supplementary data)

In [11]:
genes.dropout.universe <- suppressMessages(read_csv("../data/CRISPR_dropout_Tzelepis/CRISPR_Dropout_2016_10_18/GeneUniverse.csv"))[,c(1)] %>%
    unlist() %>%
    unique() 
genes.dropout.MOLM13 <- suppressMessages(read_csv("../data/CRISPR_dropout_Tzelepis/CRISPR_Dropout_2016_10_18/Summary_FDR_10.csv"))[,c(2)] %>%
    unlist() %>%
    unique() 
genes.dropout.MV411 <- suppressMessages(read_csv("../data/CRISPR_dropout_Tzelepis/CRISPR_Dropout_2016_10_18/Summary_FDR_10.csv"))[,c(3)] %>%
    unlist() %>%
    unique() 
genes.dropout.HL60 <- suppressMessages(read_csv("../data/CRISPR_dropout_Tzelepis/CRISPR_Dropout_2016_10_18/Summary_FDR_10.csv"))[,c(4)] %>%
    unlist() %>%
    unique() 
genes.dropout.OCIAML2 <- suppressMessages(read_csv("../data/CRISPR_dropout_Tzelepis/CRISPR_Dropout_2016_10_18/Summary_FDR_10.csv"))[,c(5)] %>%
    unlist() %>%
    unique() 
genes.dropout.OCIAML3 <- suppressMessages(read_csv("../data/CRISPR_dropout_Tzelepis/CRISPR_Dropout_2016_10_18/Summary_FDR_10.csv"))[,c(6)] %>%
    unlist() %>%
    unique() 
genes.dropout.HT29 <- suppressMessages(read_csv("../data/CRISPR_dropout_Tzelepis/CRISPR_Dropout_2016_10_18/Summary_FDR_10.csv"))[,c(7)] %>%
    unlist() %>%
    unique() 
genes.dropout.HT1080 <- suppressMessages(read_csv("../data/CRISPR_dropout_Tzelepis/CRISPR_Dropout_2016_10_18/Summary_FDR_10.csv"))[,c(8)] %>%
    unlist() %>%
    unique() 

In [12]:
nodes.dropout <- nodes_ann[unlist(nodes_ann$symbol) %in% genes.dropout.universe,1:2]

nodes.dropout$MOLM13 <- unlist(nodes.dropout[,2]) %in% genes.dropout.MOLM13
nodes.dropout$MV411 <- unlist(nodes.dropout[,2]) %in% genes.dropout.MV411
nodes.dropout$HL60 <- unlist(nodes.dropout[,2]) %in% genes.dropout.HL60
nodes.dropout$OCIAML2 <- unlist(nodes.dropout[,2]) %in% genes.dropout.OCIAML2
nodes.dropout$OCIAML3 <- unlist(nodes.dropout[,2]) %in% genes.dropout.OCIAML3
nodes.dropout$HT29 <- unlist(nodes.dropout[,2]) %in% genes.dropout.HT29
nodes.dropout$HT1080 <- unlist(nodes.dropout[,2]) %in% genes.dropout.HT1080

In [13]:
nodes.dropout.coreGenes <- nodes.dropout[nodes.dropout$MOLM13 & nodes.dropout$MV411 & !nodes.dropout$HT29 & !nodes.dropout$HT1080,]
nodes.dropout.AF4 <- nodes.dropout[!nodes.dropout$MOLM13 & nodes.dropout$MV411 & !nodes.dropout$HT29 & !nodes.dropout$HT1080,]
nodes.dropout.AF9 <- nodes.dropout[nodes.dropout$MOLM13 & !nodes.dropout$MV411 & !nodes.dropout$HT29 & !nodes.dropout$HT1080,]
nodes.dropout.loose <- nodes.dropout[nodes.dropout$MOLM13 | nodes.dropout$MV411,]

In [14]:
nodes.dropout$category <- rep("non-dropout", nrow(nodes.dropout))
nodes.dropout$category[nodes.dropout$symbol %in% nodes.dropout.loose$symbol] <- "non-leukemia-specific dropout"
nodes.dropout$category[nodes.dropout$symbol %in% nodes.dropout.coreGenes$symbol] <- "AF4 & AF9"
nodes.dropout$category[nodes.dropout$symbol %in% nodes.dropout.AF4$symbol] <- "AF4 specific"
nodes.dropout$category[nodes.dropout$symbol %in% nodes.dropout.AF9$symbol] <- "AF9 specific"
nodes.dropout$category <- factor(nodes.dropout$category, levels=c("non-dropout", "non-leukemia-specific dropout", "AF9 specific", "AF4 specific", "AF4 & AF9"))

nodes.dropout <- nodes.dropout %>%
    select(symbol, category) %>%
    rename(Dropout_class = category)

In [15]:
nodes_ann <- nodes_ann %>%
    left_join(nodes.dropout, by="symbol") %>%
    distinct()

head(nodes_ann)

Unnamed: 0_level_0,entrez,symbol,degree,stress,MLL.AF4.logFC,MLL.AF4.FDR,RUNX1.logFC,RUNX1.FDR,UNC1999.logFC,UNC1999.FDR,EPZ.logFC,EPZ.FDR,IBET.logFC,IBET.FDR,Dropout_class
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<fct>
1,1958,EGR1,464,29049,0.9345442,6.73529e-16,-1.0546494,4.56384e-09,0.45937833,0.02877195,-0.7559783,1.372324e-05,1.7365483,1.391195e-46,non-dropout
2,2355,FOSL2,217,7404,-0.5205269,4.7314140000000005e-25,-0.4224753,6.22177e-07,0.09406331,0.4265643,-0.2676087,0.1203066,-0.2068572,0.0008249632,non-dropout
3,405,ARNT,559,7646,0.2374961,0.0004145152,0.1046306,0.2771233,-0.08307452,0.476032,0.2652957,0.1605366,-0.3973892,3.360668e-06,non-leukemia-specific dropout
4,4609,MYC,1154,89851,-0.2487874,4.125718e-05,-0.1012395,0.4123907,-0.87270226,1.582076e-18,-0.8124231,6.623918e-08,-1.5108634,2.265482e-127,non-leukemia-specific dropout
5,6239,RREB1,319,15594,0.25788,6.073601e-06,0.1526835,0.03948804,0.21473278,0.004552598,-0.187635,0.2789407,-0.2671828,1.356396e-05,non-dropout
6,6772,STAT1,404,21662,0.2396495,0.0006305926,-0.1175383,0.1256426,0.1924111,0.01124953,-0.2185313,0.2135825,0.6186133,2.332276e-25,non-dropout


# Graph creation and data export

In [16]:
g <- graph_from_data_frame(edges, directed = T, vertices = nodes_ann)

In [17]:
write_graph(g, paste0(out, "AggregatedGraph_MLL-AF4.gml"), "gml")

In [18]:
g %>%
    igraph::as_data_frame() %>%
    write_tsv(paste0(out, "AggregatedGraph_MLL-AF4_edges.tsv"))

g %>%
    igraph::as_data_frame(what = "vertices") %>%
    write_tsv(paste0(out, "AggregatedGraph_MLL-AF4_nodes.tsv"))

In [19]:
g

IGRAPH 38fc09e DN-- 3850 16893 -- 
+ attr: name (v/c), symbol (v/c), degree (v/n), stress (v/n),
| MLL.AF4.logFC (v/n), MLL.AF4.FDR (v/n), RUNX1.logFC (v/n), RUNX1.FDR
| (v/n), UNC1999.logFC (v/n), UNC1999.FDR (v/n), EPZ.logFC (v/n),
| EPZ.FDR (v/n), IBET.logFC (v/n), IBET.FDR (v/n), Dropout_class (v/c)
+ edges from 38fc09e (vertex names):
 [1] 1958->100       2355->10007     405 ->10007     4609->10007    
 [5] 6239->10007     6772->10007     1385->100093630 1958->100113407
 [9] 2355->100113407 2908->100113407 2969->100113407 3205->100113407
[13] 4005->100113407 4150->100113407 4211->100113407 4800->100113407
[17] 6239->100113407 1875->100128191 2309->100128191 4205->100128191
+ ... omitted several edges