# CNV-seq Analysis

This script and helper functions implements an R-only version of
the work flow from https://github.com/hliang/cnv-seq. It uses
GenomicRanges utilities to perform read counts across bins, and the
`cnv` package available at the URL above for additional
analysis. No intermediate files are generates.

This R script has been converted into a jupyter notebook from Bioconductor's RNCV_seq.R code from their copy-number-analysis repository https://github.com/Bioconductor/copy-number-analysis/wiki/CNV-seq.

## <span style="color:red">User Input (MANDATORY)</span>

---
<span style="color:red">**NOTE:**</span>

     Run the cell below once the config-CNVseq.yml file has been edited accordingly.

---

In [None]:
configPath <- "config/config-CNVseq.yml"

## Installing Required Libraries (Optional)
Run the following cell if the libraries have not been previously installed. Otherwise, skip.

In [None]:
# # install necessary libraries 
# install.packages("configr")
# install.packages("devtools")
# library(devtools)

# if (!requireNamespace("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# BiocManager::install(c("GenomicAlignments"))

# # install CNV-seq library from Github hliang/cnv-seq
# devtools::install_github("hliang/cnv-seq/cnvHLiang")

## Loading Required Libraries

In [None]:
# load necessary libraries
library(GenomicAlignments)
library(cnvHLiang)
library(configr)

# specify source R script with helper functions
source("helper-functions/RCNV_seq-helper.R")
source("helper-functions/cnvHLiang.R")

## CNV Calculations and Plotting

In [None]:
# extract config file fields and values
config <- read.config(configPath)

# store comparisons to be done
comparisons <- config$comparisons
comparisonNames <- names(config$comparisons)

# store chromosomes to subset
chromosomes <- chromosomes <- unlist(strsplit(config$chromosomes, ", "))

# store provided parameters
parameters <- config$parameters
parameterNames <- names(config$parameters)

# first check if parameter exists. if exist, store value,
# if not exist, use default values
if ('annotate' %in% parameterNames){
    annotate <- config$parameters$annotate
} else {annotate <- TRUE}
if ('bigger' %in% parameterNames){
    bigger <- config$parameters$bigger
} else {bigger <- 1.5}
if ('log2' %in% parameterNames){
    log2 <- config$parameters$log2
} else {log2 <- 0.6}
if ('pvalue' %in% parameterNames){
    pvalue <- config$parameters$pvalue
} else {pvalue <- 0.001}
if ('window_size' %in% parameterNames){
    window_size <- config$parameters$window_size
} else {
    window_size <- windowSize(files, pvalue=pvalue, log2=log2, bigger=bigger)
}

# loop per comparisons 
for (comparison in comparisonNames){
    cat(paste("\nComparison Number:", comparison))
    cat(paste("\nComparing samples:\n"))
    
    # files to compare
    control <- config$comparisons[comparison]$comparison$control
    mutant <- config$comparisons[comparison]$comparison$mutant
    
    cat(paste(control, "\nvs.\n", mutant, "\n"))
    
    # name mutant as "test" and control as "ref"
    files <- file.path(c(mutant, control))
    names(files) <- c("test", "ref")
    
    # calculate overall tiles and hits
    tiles <- tileGenomeOverlap(files, window_size)
    hits <- summarizeOverlaps(tiles, files, binCounter)
    
    # subset only those chromosomes defined in config file
    hitsChrSubset <- subset(hits, seqnames %in% chromosomes)
    
    # create tabulated file with dataframe
    hitsPath <- paste("tab-files/", comparison, "-all-hits.tab", sep="")
    hitsFile <- as.countsfile(hitsChrSubset, hitsPath)

    # calculate Copy Number Variations (CNVs)
    cnv <- cnv.cal(hitsFile, log2=log2, annotate=annotate)
    
    # plot all chromosomes' CNVs
    plotAll <- plot.cnv.all(cnv, title="Copy Number Variants - All", ylim=c(-5,5), colour=length(chromosomes))
    print(plotAll)
    
    # loop per chromosomes
    for (chrom in chromosomes){
        chrSubset <- subset(hits, seqnames %in% chrom)
        chrFilePath <-  paste("tab-files/", comparison, "-", chrom, ".tab", sep="")

        chrSubsetFile <- as.countsfile(chrSubset, chrFilePath)

        cnv <- cnv.cal(chrSubsetFile, log2=log2, annotate=annotate)

        p <- plot.cnv.chr(cnv, chromosome=chrom, title=chrom, ylim=c(-3,3))
        plotTitle <- paste("Copy Number Variation -", chrom)
        print(p)
    }
}