first revision

TanerArslan · Mar 27, 2019 · 7769d88 · 7769d88
1 parent 95bd7f4
commit 7769d88
Show file tree

Hide file tree

Showing 22 changed files with 340 additions and 148 deletions.
diff --git a/R/applyThresholdToCompartment.R b/R/applyThresholdToCompartment.R
@@ -27,7 +27,7 @@
 #'all.A <- cls[[1]]$all.prot.pred
 #'all.B <- cls[[2]]$all.prot.pred
 #'
-#'c.cls.df<- applyThresholdCompartment(all.A, all.B, t.c.df)
+#'c.cls.df <- applyThresholdCompartment(all.A[1:300,],all.B[1:300,],t.c.df)
 #'}
 #'@return c.cls.df
 

diff --git a/R/applyThresholdToNeighborhood.R b/R/applyThresholdToNeighborhood.R
@@ -28,61 +28,21 @@
 #'all.A <- cls[[1]]$all.prot.pred
 #'all.B <- cls[[2]]$all.prot.pred
 #'
-#'n.cls.df <- applyThresholdNeighborhood(all.A, all.B, t.n.df)
+#'n.cls.df <- applyThresholdNeighborhood(all.A[1:300,],all.B[1:300,],t.n.df)
 #'}
 #'@return n.cls.df
 
 applyThresholdNeighborhood <- function(all.repA, all.repB, threshold.df){
 
-    couple.lsit <- list(c("Secretory", "S1"), c("Secretory", "S2"),
-                        c("Secretory", "S3"), c("Secretory", "S4"),
-                        c("Nuclear", "N1"), c("Nuclear", "N2"),
-                        c("Nuclear", "N3"), c("Nuclear", "N4"),
-                        c("Cytosol", "C1"), c("Cytosol", "C2"),
-                        c("Cytosol", "C3"), c("Cytosol", "C4"),
-                        c("Cytosol", "C5"), c("Mitochondria", "M1"),
-                        c("Mitochondria", "M2"))
-
-
     #upgrade compartment labels to neighborhood labels for prediction
-    replacePrediction <- function(df, column = "svm.pred.all"){
-        multiple.lst <- lapply(couple.lsit, function(f){
-            temp.df <- df[df[column] == unname(unlist(f[2])), ]
-            temp.df[[column]] <- as.character(unname(unlist(f[1])))
-            temp.df
-        })
-        replaced.df <- do.call("rbind", multiple.lst)
-    }
-
-    all.n.repA <- replacePrediction(df = all.repA, column = "svm.pred.all")
-    all.n.repB <- replacePrediction(df = all.repB, column = "svm.pred.all")
-
-    merge.probability <- function(df){
-
-        t.secretory.df <- data.frame(df[, colnames(df)[2:5]])
-        t.secretory.df$Secretory <- apply(t.secretory.df, 1, sum)
-        t.nuclear.df <- data.frame(df[, colnames(df)[6:9]])
-        t.nuclear.df$Nuclear <- apply(t.nuclear.df, 1, sum)
-        t.cytosol.df <- data.frame(df[, colnames(df)[10:14]])
-        t.cytosol.df$Cytosol <- apply(t.cytosol.df, 1, sum)
-        t.Mitochondria.df <- data.frame(df[, colnames(df)[15:16]])
-        t.Mitochondria.df$Mitochondria <- apply(t.Mitochondria.df, 1, sum)
-
-        merged.df <- data.frame(Proteins = rownames(df),
-                                svm.pred.all = df[,colnames(df)[1]],
-                                Secretory = t.secretory.df$Secretory,
-                                Nuclear = t.nuclear.df$Nuclear,
-                                Cytosol = t.cytosol.df$Cytosol,
-                                Mitochondria = t.Mitochondria.df$Mitochondria)
-        # temp neihborhood df
-        t.n.df <- merged.df[,3:6]
-        merged.df$svm.pred.all <- colnames(t.n.df)[apply(t.n.df, 1, which.max)]
-        rownames(merged.df) <- merged.df$Proteins
-        return(merged.df)
-    }
-
-    m.all.repA <- merge.probability(all.n.repA)
-    m.all.repB <- merge.probability(all.n.repB)
+    all.n.repA <- SubCellBarCode::replacePrediction(df = all.repA,
+                                            column = "svm.pred.all")
+    all.n.repB <- SubCellBarCode::replacePrediction(df = all.repB,
+                                            column = "svm.pred.all")
+
+    #sum up compartment level predictions to neighborhood predictions
+    m.all.repA <- SubCellBarCode::mergeProbability(all.n.repA)
+    m.all.repB <- SubCellBarCode::mergeProbability(all.n.repB)
 
     m.all.repB <- m.all.repB[rownames(m.all.repA), ]
 
@@ -118,7 +78,7 @@ applyThresholdNeighborhood <- function(all.repA, all.repB, threshold.df){
         t.p <- unname(unlist(threshold.df[threshold.df$Neighborhood == m, ][2]))
         #temp recall
         t.r <- unname(unlist(threshold.df[threshold.df$Neighborhood == m, ][3]))
-        if (! is.na(t.p) == TRUE){
+        if (! is.na(t.p)){
             t.value <- max(t.p, t.r)
             temp.df <- combined.rep.A.B[combined.rep.A.B$svm.pred.all == m, ]
             up.threshold.df <- temp.df[temp.df[m] >= t.value, ]
@@ -133,3 +93,5 @@ applyThresholdNeighborhood <- function(all.repA, all.repB, threshold.df){
     n.cls.df <- rbind(conf.df, no.class)
 
 }
+
+
diff --git a/R/calRowMean.R b/R/calRowMean.R
@@ -3,7 +3,7 @@
 #'summarized by taking their mean for each protein.
 #'After taking the mean, the data log2 transformed.
 #'Further, the 5 main fractions are used to check correlation
-#'between input datas.
+#'between input datas. It is a helper function.
 #'@param d.df data.frame; A data frame of 10 fraction profiles
 #' consisting of replicate A and B.
 #'@export
@@ -16,15 +16,12 @@
 
 calRowMean <- function(d.df){
     r.means <- lapply(seq_len(5), function(x){
-        k <- 2*x -1
-        t.df <- rowMeans(d.df[,c(k:(k+1))])
-        t.df <- data.frame(Proteins = names(t.df), Fr = unname(t.df))
+        k <- 2 * x -1
+        t.df <- rowMeans(d.df[, c(k:(k+1))])
     })
 
-    r.df <- do.call("cbind", r.means)
-    rownames(r.df) <- r.df$Proteins
-    r.df <- r.df[,c(2,4,6,8,10)]
-    colnames(r.df) <- c(" Cyto", "Nsol", "NucI", "Horg", "Lorg")
+    r.df <- data.frame(do.call("cbind", r.means))
+    colnames(r.df) <- c("Cyto", "Nsol", "NucI", "Horg", "Lorg")
     r.df <- log2(r.df)
     return(r.df)
 }

diff --git a/R/candidateRelocatedProteins.R b/R/candidateRelocatedProteins.R
@@ -48,7 +48,7 @@ candidateRelocatedProteins <- function(sampleCls1, s1PSM,s1Quant, sampleCls2,
                         C.A = df1$NeighborhoodCls,
                         C.B = df2$NeighborhoodCls)
 
-    ###########
+    #calculate the mean of duplicates
     s1Quant <- SubCellBarCode::calRowMean(s1Quant)
     s2Quant <- SubCellBarCode::calRowMean(s2Quant)
 

diff --git a/R/coveredMarkerProtein.R b/R/coveredMarkerProtein.R
@@ -47,15 +47,14 @@ calculateCoveredProtein <- function(proteinIDs, markerproteins){
     #check if there is not enough enrichemnt in any compartment
     non.enriched.loc <- coverage.df[coverage.df$ProteinCoverage < 20, ]
     if(nrow(non.enriched.loc) == 1){
-        warning(sprintf("There is not enough enrichment at %s localization.
-                    \nWe recommend you to perform the fractionation, again.",
-                        as.character(non.enriched.loc$Compartments)))
+        warning("There is not enough enrichment at: ",
+                as.character(non.enriched.loc$Compartments),
+                "\nWe recommend you to perform the fractionation, again.")
     }else if(nrow(non.enriched.loc) > 1){
         comp <- paste(as.character(non.enriched.loc$Compartments),
                 collapse = ",")
-        warning(sprintf("There are not enough enrichment at %s localizations.
-                \nWe recommend you to perform the fractionation,
-        as we describe at the manuscprit.", comp))
+        warning("There are not enough enrichments at: ",
+                comp, "\nWe recommend you to perform the fractionation!")
     }
 
 
@@ -78,7 +77,7 @@ calculateCoveredProtein <- function(proteinIDs, markerproteins){
         ))
 
     coverage <- round(length(covered.proteins) / length(markerproteins), 2)
-    cat(sprintf("Overall Coverage of marker proteins : %s ", coverage))
+    cat("Overall Coverage of marker proteins : ", coverage)
 
     return (covered.proteins)
 }
diff --git a/R/load.data.R b/R/load.data.R
@@ -15,13 +15,13 @@
 
 loadData <- function(protein.data){
 
-    if( is.data.frame(protein.data) == FALSE )
+    if(! is.data.frame(protein.data))
         stop('Input must be a data frame format! Type ?loadData')
 
     if(! ncol(protein.data) == 10)
         stop('Input data must have 10 columns! Type ?loadData')
 
-    if (! is.character(rownames(protein.data)) == TRUE)
+    if (! is.character(rownames(protein.data)))
         stop('Rownames must be character!')
 
 

diff --git a/R/markerQualityControl.R b/R/markerQualityControl.R
@@ -56,8 +56,7 @@ markerQualityControl <- function(coveredProteins, protein.data){
     #remove replicate-wise markerp proteins
     rep.prots <- names(cor.reps.pearson[cor.reps.pearson < 0.8 ])
 
-    message(sprintf("Number of removed replicate-wise proteins: %s",
-                    length(rep.prots)))
+    message("Number of removed replicate-wise proteins: ", length(rep.prots))
 
     # sample-wise correlation marker QC
     prot.names <- setdiff(rownames(m.prot.df), rep.prots)
@@ -123,13 +122,13 @@ markerQualityControl <- function(coveredProteins, protein.data){
     sample.removed.prot <- df[df$Pearson < 0.8 | df$Spearman < 0.599,]
     sample.removed.prot <- as.character(sample.removed.prot$Protein)
 
-    message(sprintf("Number of removed sample-wise proteins: %s",
-                    length(sample.removed.prot)))
+    message("Number of removed sample-wise proteins: ",
+            length(sample.removed.prot))
 
     robustMarkerProteins <- setdiff(prot.names, sample.removed.prot)
 
-    message(sprintf("Number of total removed marker proteins: %s",
-                    length(sample.removed.prot) + length(rep.prots)))
+    message("Number of total removed marker proteins: ",
+                    length(sample.removed.prot) + length(rep.prots))
 
     grid.arrange(p1, p2, ncol=2)
 
@@ -151,15 +150,14 @@ markerQualityControl <- function(coveredProteins, protein.data){
 
     non.enriched.loc <- r.cov.df[r.cov.df$ProteinCoverage < 20, ]
     if(nrow(non.enriched.loc) == 1){
-        warning(sprintf("There is not enough enrichment at %s localization.
-                \nWe recommend you to perform the fractionation, again.",
-                        as.character(non.enriched.loc$Compartments)))
+        warning("There is not enough enrichment at: ",
+                as.character(non.enriched.loc$Compartments),
+                "\nWe recommend you to perform the fractionation, again.")
     }else if(nrow(non.enriched.loc) > 1){
         comp <- paste(as.character(non.enriched.loc$Compartments),
                     collapse = ",")
-        warning(sprintf("There are not enough enrichment at %s localizations.
-                        \nWe recommend you to perform the fractionation,
-    as we describe at the manuscprit.", comp))
+        warning("There are not enough enrichments at: ",
+                comp, "\nWe recommend you to perform the fractionation.")
     }
 
     return(robustMarkerProteins)

diff --git a/R/mergeCompNeigh.R b/R/mergeCompNeigh.R
@@ -28,9 +28,9 @@
 #'all.A <- cls[[1]]$all.prot.pred
 #'all.B <- cls[[2]]$all.prot.pred
 #'
-#'c.cls.df <- applyThresholdCompartment(all.A, all.B, t.c.df)
+#'c.cls.df <- applyThresholdCompartment(all.A[1:300,],all.B[1:300,],t.c.df)
 #'
-#'n.cls.df <- applyThresholdNeighborhood(all.A, all.B, t.n.df)
+#'n.cls.df <- applyThresholdNeighborhood(all.A[1:300,],all.B[1:300,],t.n.df)
 #'
 #'cls.df <- mergeCls(c.cls.df, n.cls.df)
 #'}

diff --git a/R/mergeProbability.R b/R/mergeProbability.R
@@ -0,0 +1,48 @@
+#'@title Merge compartment probabilities to neighborhood probabilities
+#'@description Compartment levels classifications are summed up to
+#'associated neighborhood levels. It is a helper function.
+#'@param df data.frame; all predictions at the neighborhood level and
+#'probablity vectors for each protein
+#'@export
+#'@examples {
+#'
+#'df <- loadData(SubCellBarCode::hcc827Ctrl)
+#'
+#'c.prots <- calculateCoveredProtein(rownames(df), markerProteins[,1])
+#'
+#'set.seed(7)
+#'c.prots <- sample(c.prots, 365)
+#'cls <- svmClassification(c.prots, df, markerProteins)
+#'
+#'all.A <- cls[[1]]$all.prot.pred
+#'
+#'all.n.repA <- replacePrediction(all.A, column = "svm.pred.all")
+#'
+#'m.all.repA <- mergeProbability(all.n.repA)
+#'
+#'}
+#'@return merged.df
+
+mergeProbability <- function(df){
+
+    t.secretory.df <- data.frame(df[, colnames(df)[2:5]])
+    t.secretory.df$Secretory <- apply(t.secretory.df, 1, sum)
+    t.nuclear.df <- data.frame(df[, colnames(df)[6:9]])
+    t.nuclear.df$Nuclear <- apply(t.nuclear.df, 1, sum)
+    t.cytosol.df <- data.frame(df[, colnames(df)[10:14]])
+    t.cytosol.df$Cytosol <- apply(t.cytosol.df, 1, sum)
+    t.Mitochondria.df <- data.frame(df[, colnames(df)[15:16]])
+    t.Mitochondria.df$Mitochondria <- apply(t.Mitochondria.df, 1, sum)
+
+    merged.df <- data.frame(Proteins = rownames(df),
+                            svm.pred.all = df[,colnames(df)[1]],
+                            Secretory = t.secretory.df$Secretory,
+                            Nuclear = t.nuclear.df$Nuclear,
+                            Cytosol = t.cytosol.df$Cytosol,
+                            Mitochondria = t.Mitochondria.df$Mitochondria)
+    # temp neihborhood df
+    t.n.df <- merged.df[,3:6]
+    merged.df$svm.pred.all <- colnames(t.n.df)[apply(t.n.df, 1, which.max)]
+    rownames(merged.df) <- merged.df$Proteins
+    return(merged.df)
+}
diff --git a/R/plotBarcode.R b/R/plotBarcode.R
@@ -27,13 +27,13 @@
 #'all.A <- cls[[1]]$all.prot.pred
 #'all.B <- cls[[2]]$all.prot.pred
 #'
-#'c.cls.df <- applyThresholdCompartment(all.A, all.B, t.c.df)
+#'c.cls.df <- applyThresholdCompartment(all.A[1:300,],all.B[1:300,],t.c.df)
 #'
-#'n.cls.df <- applyThresholdNeighborhood(all.A, all.B, t.n.df)
+#'n.cls.df <- applyThresholdNeighborhood(all.A[1:300,],all.B[1:300,],t.n.df)
 #'
 #'cls.df <- mergeCls(c.cls.df, n.cls.df)
 #'
-#'proteinPlot <- plotBarcode(cls.df, "AAR2", hcc827CtrlPSMCount)
+#'proteinPlot <- plotBarcode(cls.df, "ACAA2", hcc827CtrlPSMCount)
 #'}
 #'@import ggplot2
 #'@importFrom graphics plot
@@ -74,7 +74,7 @@ plotBarcode <- function(sampleClassification, protein, s1PSM){
     #get the PSM count
     psm <- as.numeric(s1PSM[protein,][2])
 
-    if( length(psm) < 1 & is.numeric(psm) == FALSE)
+    if( length(psm) < 1 & !is.numeric(psm))
         stop('PSM count could not obtain properly.
             Please check the PSM input data')