In [1]:
library(tidyverse)
library(hash)
library(gprofiler2)
library(viridis)
library(VennDiagram)

Registered S3 method overwritten by 'rvest':
  method            from
  read_xml.response xml2
-- Attaching packages --------------------------------------- tidyverse 1.2.1 --
v ggplot2 3.3.5     v purrr   0.3.4
v tibble  3.1.6     v dplyr   1.0.7
v tidyr   1.1.4     v stringr 1.4.0
v readr   1.3.1     v forcats 0.4.0
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
"package 'hash' was built under R version 3.6.3"hash-3.0.1 provided by Decision Patterns

Loading required package: viridisLite

Attaching package: 'viridis'

The following object is masked from 'package:viridisLite':

    viridis.map

Loading required package: grid
Loading required package: futile.logger


In [2]:
# I AM ONLY DOING 2018
# but you can do more years by adding the years to the proper list
# change time points as necessary/desired

tissues <- c('Leaf', 'Reproductive')
years <- c('2018')
phenos <- c('Anthesis', 'Veraison', 'Harvest')
rootstocks <- c('Ungrafted', '1103P', '3309C', 'SO4')
dir <- c('Up', 'Down')
prefix <- '/data/projects/julia.pratt/CS1_genomeSelection/all_rootstock_comparisons/'

In [3]:
# filter according to log2foldchange value
log2foldchange <- function(data, alpha, log2FoldChangeVal, d) {
 
    # only keeps significant values
    data.sig <- data %>% filter(padj < alpha)
    
    # separates upregulated/downregulated
    if (d == 'Up') {
        data.adj <- data.sig %>% filter(log2FoldChange > log2FoldChangeVal)
    } else if (d == 'Down') {
         data.adj <- data.sig %>% filter(log2FoldChange < -log2FoldChangeVal)
    } else {
        print(paste(d, "not a valid direction"))
    }

    return(data.adj)
}

In [20]:
plotvenn <- function(rs1103p, rs3309c, rsso4, meta, metaformat){
    
    
    # the function takes in three lists
    # automatically counts how many are shared/private within those lists
    # most of this is just styling
    venn.diagram(
        x = list(rs1103p, rs3309c, rsso4),
        
        cex = 0.5,
        fontfamily = "sans",
        
        category.names = c("1103P", "3309C", "SO4"),
        cat.cex = 0.5,
        cat.default.pos = "outer",
        cat.dist = c(0.075, 0.075, 0.075),
        cat.fontfamily = "sans",
        
        main = metaformat,
        main.cex = 0.6,
        main.fontfamily = "sans",
        
        col=c("#1b9e77", "#7570b3", "#e6ab02"),
        fill = c(alpha("#1b9e77",0.3), alpha("#7570b3",0.3), alpha("#e6ab02",0.3)),
        
        filename = f,
        output=TRUE, 
        resolution = 300,
        imagetype="p",
        margin=0.075,
        rotation = 1,
        height=700,
        width=700
    )
}

In [25]:
prefix <- '/data/projects/julia.pratt/CS1_genomeSelection/all_rootstock_comparisons/'
stats <- list()
saveall <- list()

# all data combos
for (t in tissues){
    for (y in years){
        for (p in phenos){
            for (direction in dir){
                
                meta <- paste(y, p, t, direction, sep='_')
                metaformat <- paste(y, p, t, direction, sep=' ')
                
                for (j in 2:length(rootstocks)) { 
                    
                    # read in the data
                    rs <- str_interp("Ungrafted-${rootstocks[j]}")
                    data <- read.csv(paste(prefix, paste(t, y, p, rs, sep='_'), '.csv', sep=''))
                    fname <- paste(meta,rs,sep='_')
                    
                    # filter the data
                    data.adj <- log2foldchange(data, 0.05, 0, direction)
                    stats[[rs]] <- data.adj
                    saveall[[fname]] <- data.adj
                }
                
                fileprefix= "/data/projects/julia.pratt/CS1_genomeSelection/figs/go/venn/venn_genes_"  
                f <- paste(fileprefix, meta, '.pdf', sep="")
                tmp <- plotvenn(stats[['Ungrafted-1103P']][['X']], 
                         stats[['Ungrafted-3309C']][['X']], 
                         stats[['Ungrafted-SO4']][['X']],
                         meta, metaformat)
                pdf(f)
                    grid.draw(tmp)
                dev.off()
            }
        }
    }
}

In [6]:
length(stats[['Ungrafted-1103P']][['X']])
length(stats[['Ungrafted-3309C']][['X']])
length(stats[['Ungrafted-SO4']][['X']])

In [7]:
length(stats[['Ungrafted-1103P']][['X']])
length(stats[['Ungrafted-3309C']][['X']])
length(stats[['Ungrafted-SO4']][['X']])

In [8]:
length(saveall[['2018_Anthesis_Leaf_Up_Ungrafted-1103P']][["X"]])
length(saveall[['2018_Anthesis_Leaf_Up_Ungrafted-3309C']][["X"]])
length(saveall[['2018_Anthesis_Leaf_Up_Ungrafted-SO4']][["X"]])

In [9]:
length(saveall[['2018_Anthesis_Leaf_Up_Ungrafted-1103P']][["X"]])
length(saveall[['2018_Anthesis_Leaf_Up_Ungrafted-3309C']][["X"]])
length(saveall[['2018_Anthesis_Leaf_Up_Ungrafted-SO4']][["X"]])

In [10]:
saveall[['2018_Anthesis_Leaf_Up_Ungrafted-1103P']]

X,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj
Vitvi01g00355,104.677577,0.7474088,0.18651238,4.007288,6.141994e-05,1.488480e-02
Vitvi01g00379,49.254348,1.2835037,0.28821314,4.453314,8.455492e-06,2.875413e-03
Vitvi01g00483,19.191186,1.9935880,0.37339862,5.339034,9.344297e-08,5.970156e-05
Vitvi01g00525,10.768849,3.3579382,0.60750768,5.527400,3.250106e-08,2.824786e-05
Vitvi01g00732,26.098861,1.8469687,0.46670564,3.957460,7.575110e-05,1.794535e-02
Vitvi01g00902,94.121029,0.7775530,0.14368297,5.411588,6.246830e-08,4.541661e-05
Vitvi01g01346,303.953771,0.5188454,0.11492524,4.514634,6.342632e-06,2.228801e-03
Vitvi01g01459,44.643616,1.1240660,0.27288939,4.119127,3.803104e-05,1.024216e-02
Vitvi01g01845,38.830075,0.9939048,0.26804219,3.708016,2.088892e-04,3.764290e-02
Vitvi01g02228,4.042936,3.6369581,0.65970627,5.512996,3.527761e-08,2.860743e-05
