In [None]:
# make a data summary function for some data printouts and data visualizations
# helper CV functions
CV <- function(df) {
    # Computes and returns a vecotr of CV values
        # df - data frame: CV computed per row
    
    ave <- rowMeans(df) # compute averages
    sd <- apply(df, 1, sd) # compute standard deviations
    cv <- 100 * sd / ave # compute CVs in percent (last thing gets returned)
}

check_data <- function(df, title) {
    # Checks data normalizations with distribution summaries and plots
        # df - data frame (expects 3x3 or 3 for this notebook)
        # title - some text to use in print statements and plot labels

    # get some summary numbers: column totals and medians
    cat(sprintf("\nColumn Summaries (%s):\n", title))
    for (i in 1:ncol(df)) {
        cat(sprintf("  %s - tot: %s med: %s iqr: %s\n", colnames(df)[i],
                format(round(sum(pull(df[i])), digits = 0), big.mark = ','),
                format(round(median(pull(df[i])), digits = 0), big.mark = ','),
                format(round(IQR(pull(df[i])), digits = 0), big.mark = ',')))
    }    
    # see what the data distribution boxplot look like
    boxplot(log10(df), 
            col = c(rep(c("red", "blue", "green"), each = 3)), 
            notch = TRUE, main = sprintf("Intensity distributions (%s)", title))
        
    # CV distributions
    if (ncol(df) == 22) {
        CVs <- data.frame(exp2_CV = CV(df[1:11]), exp4_CV = CV(df[12:22]))
        ymax = 50
    } else {
        CVs <- data.frame(exp2_CV = CV(df[1:10]), exp4_CV = CV(df[11:21]))
        ymax = 50
    }
    # print values
    cat(sprintf("\nMedian CVs (%s):\n", title))
    for (i in 1:ncol(CVs)) {
        cat(sprintf("  %s - med: %s iqr: %s\n", colnames(CVs)[i],
                round(median(pull(CVs[i])), digits = 2),
                round(IQR(pull(CVs[i])), digits = 2)))
    }
    # show boxplots
    boxplot(CVs, ylim = c(0, ymax), notch = TRUE, main = sprintf("CV distributions (%s)", title))
}

In [None]:
tmp_meta_1 <- metadata %>%
    filter(Center == "Center1")
tmp_center_1 <- pg_intensities %>%
    select(tmp_meta_1$Quantitative.column.name)

tmp_meta_2 <- metadata %>%
    filter(Center == "Center2")
tmp_center_2 <- pg_intensities %>%
    select(tmp_meta_2$Quantitative.column.name)


################################ SL ###################

norm_facs <- mean(colSums(tmp_center_1, na.rm = TRUE)) / colSums(tmp_center_1, na.rm = TRUE)
tmp_center_1  <- sweep(tmp_center_1, 2, norm_facs, FUN = "*")

norm_facs <- mean(colSums(tmp_center_2, na.rm = TRUE)) / colSums(tmp_center_2, na.rm = TRUE)
tmp_center_2  <- sweep(tmp_center_2, 2, norm_facs, FUN = "*")




###################################################
irs_factor_1 <- data.frame(avePoolExp1 = tmp_center_1$P_1.RIC_1,
                           avePoolExp2 = tmp_center_1$P_2.RIC_1)
irs_factor_2 <- data.frame(avePoolExp3 = tmp_center_2$P_3.RIC_1,
                            avePoolExp4 = tmp_center_2$P_5.RIC_1)




irs_factor_1$geomean <- apply(irs_factor_1, 1, function(x) exp(mean(log(x))))
irs_factor_2$geomean <- apply(irs_factor_2, 1, function(x) exp(mean(log(x))))




irs_factor_1$fac1 <- irs_factor_1$geomean / irs_factor_1$avePoolExp1
irs_factor_1$fac2 <- irs_factor_1$geomean / irs_factor_1$avePoolExp2

irs_factor_2$fac3 <- irs_factor_2$geomean / irs_factor_2$avePoolExp3
irs_factor_2$fac4 <- irs_factor_2$geomean / irs_factor_2$avePoolExp4


all_irs_1 <- tmp_center_1[1:11] * irs_factor_1$fac1
all_irs_1 <- cbind(all_irs_1, tmp_center_1[12:22] * irs_factor_1$fac2)

all_irs_2 <- tmp_center_2[1:10] * irs_factor_2$fac3
all_irs_2 <- cbind(all_irs_2, tmp_center_2[11:21] * irs_factor_2$fac4)


In [None]:
check_data(na.omit(tmp_center_1), "tmp_center_1")