In [None]:
#######################################
Input:
*_depth files(files with depth per region and scaffold)

Function: 
Calculate the mean p_values for depth when comparing sexes on a specific base region(500 here).

Output:
[scaffold name]_means files(files of a specific scaffold with a mean -log10P value per 500b region)
#######################################

In [3]:
library(data.table)
library(ggplot2)
library(dplyr)

filenames <- list.files(path="01.depth", pattern="*_depth", full.names = TRUE)


generate_chr_DT <- function(file){   
    depth_data <- fread(file)
    Chr <- gsub("01.depth/","",file)
    #Chr <- gsub("_depth", "",Chr)
    #name columns:
    #First generate the sample names 
    sample_names <- readLines("00.data/samples_males_then_females_in_order")
    #Now the chromosomal regions
    regions <- c('CHROM', 'POS')
    col_names <- c(regions, sample_names)
    #assign CHROM, POS and then the 12 sample name to the columns in the same order as samtools depth was done
    colnames(depth_data) <- col_names

    # make bins with a new column "bin_start"
    with_bins <- mutate(depth_data, bin_start=(ceiling(POS/500)*500)-500)
    # group bin_start
    by_bin <- group_by(with_bins, bin_start)
    #calculate bin means for each sample, for each bin

    bin_means <- summarise(by_bin,
    group_01 = mean(group_01),
    group_03 = mean(group_03),
    group_05 = mean(group_05),
    group_09 = mean(group_09),
    group_11 = mean(group_11),
    group_12 = mean(group_12),
    group_13 = mean(group_13),
    group_02 = mean(group_02),
    group_04 = mean(group_04),
    group_06 = mean(group_06),
    group_07 = mean(group_07),
    group_08 = mean(group_08),
    group_10 = mean(group_10))
        
    #Do a t-test for the means of males (1:7) and females (8:13)
    t.result <- apply(bin_means[1:14], 1, function (x) t.test(x[2:8],x[9:14]))
        
    #extract p values from t.result and place into the bin_means dataframe
    bin_means$p_value <- unlist(lapply(t.result, function(x) x$p.value))
    
    #calculate false discovery rate
    bin_means$fdr <- p.adjust(bin_means$p_value, method = "fdr")

    #prepare data for plotting    
    bin_means <- data.frame(bin_means)

    #generate -log p values (larger = more significant)
    bin_means <- mutate(bin_means, minus_log10_P=-log10(p_value))
    
    #write files for later use and plot
    outfile <- paste("02.means/", Chr, "_means")
    
    write.table(bin_means, file=outfile, row.names=FALSE)
    }
                                       
lapply(filenames,generate_chr_DT)

"type 31 is unimplemented in 'type2char'"

ERROR: Error: CAR/CDR/TAG or similar applied to unknown type #31 object
