Skip to content

Commit

Permalink
first revision
Browse files Browse the repository at this point in the history
  • Loading branch information
TanerArslan committed Mar 27, 2019
1 parent 95bd7f4 commit 7769d88
Show file tree
Hide file tree
Showing 22 changed files with 340 additions and 148 deletions.
2 changes: 1 addition & 1 deletion R/applyThresholdToCompartment.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#'all.A <- cls[[1]]$all.prot.pred
#'all.B <- cls[[2]]$all.prot.pred
#'
#'c.cls.df<- applyThresholdCompartment(all.A, all.B, t.c.df)
#'c.cls.df <- applyThresholdCompartment(all.A[1:300,],all.B[1:300,],t.c.df)
#'}
#'@return c.cls.df

Expand Down
62 changes: 12 additions & 50 deletions R/applyThresholdToNeighborhood.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,61 +28,21 @@
#'all.A <- cls[[1]]$all.prot.pred
#'all.B <- cls[[2]]$all.prot.pred
#'
#'n.cls.df <- applyThresholdNeighborhood(all.A, all.B, t.n.df)
#'n.cls.df <- applyThresholdNeighborhood(all.A[1:300,],all.B[1:300,],t.n.df)
#'}
#'@return n.cls.df

applyThresholdNeighborhood <- function(all.repA, all.repB, threshold.df){

couple.lsit <- list(c("Secretory", "S1"), c("Secretory", "S2"),
c("Secretory", "S3"), c("Secretory", "S4"),
c("Nuclear", "N1"), c("Nuclear", "N2"),
c("Nuclear", "N3"), c("Nuclear", "N4"),
c("Cytosol", "C1"), c("Cytosol", "C2"),
c("Cytosol", "C3"), c("Cytosol", "C4"),
c("Cytosol", "C5"), c("Mitochondria", "M1"),
c("Mitochondria", "M2"))


#upgrade compartment labels to neighborhood labels for prediction
replacePrediction <- function(df, column = "svm.pred.all"){
multiple.lst <- lapply(couple.lsit, function(f){
temp.df <- df[df[column] == unname(unlist(f[2])), ]
temp.df[[column]] <- as.character(unname(unlist(f[1])))
temp.df
})
replaced.df <- do.call("rbind", multiple.lst)
}

all.n.repA <- replacePrediction(df = all.repA, column = "svm.pred.all")
all.n.repB <- replacePrediction(df = all.repB, column = "svm.pred.all")

merge.probability <- function(df){

t.secretory.df <- data.frame(df[, colnames(df)[2:5]])
t.secretory.df$Secretory <- apply(t.secretory.df, 1, sum)
t.nuclear.df <- data.frame(df[, colnames(df)[6:9]])
t.nuclear.df$Nuclear <- apply(t.nuclear.df, 1, sum)
t.cytosol.df <- data.frame(df[, colnames(df)[10:14]])
t.cytosol.df$Cytosol <- apply(t.cytosol.df, 1, sum)
t.Mitochondria.df <- data.frame(df[, colnames(df)[15:16]])
t.Mitochondria.df$Mitochondria <- apply(t.Mitochondria.df, 1, sum)

merged.df <- data.frame(Proteins = rownames(df),
svm.pred.all = df[,colnames(df)[1]],
Secretory = t.secretory.df$Secretory,
Nuclear = t.nuclear.df$Nuclear,
Cytosol = t.cytosol.df$Cytosol,
Mitochondria = t.Mitochondria.df$Mitochondria)
# temp neihborhood df
t.n.df <- merged.df[,3:6]
merged.df$svm.pred.all <- colnames(t.n.df)[apply(t.n.df, 1, which.max)]
rownames(merged.df) <- merged.df$Proteins
return(merged.df)
}

m.all.repA <- merge.probability(all.n.repA)
m.all.repB <- merge.probability(all.n.repB)
all.n.repA <- SubCellBarCode::replacePrediction(df = all.repA,
column = "svm.pred.all")
all.n.repB <- SubCellBarCode::replacePrediction(df = all.repB,
column = "svm.pred.all")

#sum up compartment level predictions to neighborhood predictions
m.all.repA <- SubCellBarCode::mergeProbability(all.n.repA)
m.all.repB <- SubCellBarCode::mergeProbability(all.n.repB)

m.all.repB <- m.all.repB[rownames(m.all.repA), ]

Expand Down Expand Up @@ -118,7 +78,7 @@ applyThresholdNeighborhood <- function(all.repA, all.repB, threshold.df){
t.p <- unname(unlist(threshold.df[threshold.df$Neighborhood == m, ][2]))
#temp recall
t.r <- unname(unlist(threshold.df[threshold.df$Neighborhood == m, ][3]))
if (! is.na(t.p) == TRUE){
if (! is.na(t.p)){
t.value <- max(t.p, t.r)
temp.df <- combined.rep.A.B[combined.rep.A.B$svm.pred.all == m, ]
up.threshold.df <- temp.df[temp.df[m] >= t.value, ]
Expand All @@ -133,3 +93,5 @@ applyThresholdNeighborhood <- function(all.repA, all.repB, threshold.df){
n.cls.df <- rbind(conf.df, no.class)

}


13 changes: 5 additions & 8 deletions R/calRowMean.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#'summarized by taking their mean for each protein.
#'After taking the mean, the data log2 transformed.
#'Further, the 5 main fractions are used to check correlation
#'between input datas.
#'between input datas. It is a helper function.
#'@param d.df data.frame; A data frame of 10 fraction profiles
#' consisting of replicate A and B.
#'@export
Expand All @@ -16,15 +16,12 @@

calRowMean <- function(d.df){
r.means <- lapply(seq_len(5), function(x){
k <- 2*x -1
t.df <- rowMeans(d.df[,c(k:(k+1))])
t.df <- data.frame(Proteins = names(t.df), Fr = unname(t.df))
k <- 2 * x -1
t.df <- rowMeans(d.df[, c(k:(k+1))])
})

r.df <- do.call("cbind", r.means)
rownames(r.df) <- r.df$Proteins
r.df <- r.df[,c(2,4,6,8,10)]
colnames(r.df) <- c(" Cyto", "Nsol", "NucI", "Horg", "Lorg")
r.df <- data.frame(do.call("cbind", r.means))
colnames(r.df) <- c("Cyto", "Nsol", "NucI", "Horg", "Lorg")
r.df <- log2(r.df)
return(r.df)
}
Expand Down
2 changes: 1 addition & 1 deletion R/candidateRelocatedProteins.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ candidateRelocatedProteins <- function(sampleCls1, s1PSM,s1Quant, sampleCls2,
C.A = df1$NeighborhoodCls,
C.B = df2$NeighborhoodCls)

###########
#calculate the mean of duplicates
s1Quant <- SubCellBarCode::calRowMean(s1Quant)
s2Quant <- SubCellBarCode::calRowMean(s2Quant)

Expand Down
13 changes: 6 additions & 7 deletions R/coveredMarkerProtein.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,14 @@ calculateCoveredProtein <- function(proteinIDs, markerproteins){
#check if there is not enough enrichemnt in any compartment
non.enriched.loc <- coverage.df[coverage.df$ProteinCoverage < 20, ]
if(nrow(non.enriched.loc) == 1){
warning(sprintf("There is not enough enrichment at %s localization.
\nWe recommend you to perform the fractionation, again.",
as.character(non.enriched.loc$Compartments)))
warning("There is not enough enrichment at: ",
as.character(non.enriched.loc$Compartments),
"\nWe recommend you to perform the fractionation, again.")
}else if(nrow(non.enriched.loc) > 1){
comp <- paste(as.character(non.enriched.loc$Compartments),
collapse = ",")
warning(sprintf("There are not enough enrichment at %s localizations.
\nWe recommend you to perform the fractionation,
as we describe at the manuscprit.", comp))
warning("There are not enough enrichments at: ",
comp, "\nWe recommend you to perform the fractionation!")
}


Expand All @@ -78,7 +77,7 @@ calculateCoveredProtein <- function(proteinIDs, markerproteins){
))

coverage <- round(length(covered.proteins) / length(markerproteins), 2)
cat(sprintf("Overall Coverage of marker proteins : %s ", coverage))
cat("Overall Coverage of marker proteins : ", coverage)

return (covered.proteins)
}
4 changes: 2 additions & 2 deletions R/load.data.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@

loadData <- function(protein.data){

if( is.data.frame(protein.data) == FALSE )
if(! is.data.frame(protein.data))
stop('Input must be a data frame format! Type ?loadData')

if(! ncol(protein.data) == 10)
stop('Input data must have 10 columns! Type ?loadData')

if (! is.character(rownames(protein.data)) == TRUE)
if (! is.character(rownames(protein.data)))
stop('Rownames must be character!')


Expand Down
22 changes: 10 additions & 12 deletions R/markerQualityControl.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ markerQualityControl <- function(coveredProteins, protein.data){
#remove replicate-wise markerp proteins
rep.prots <- names(cor.reps.pearson[cor.reps.pearson < 0.8 ])

message(sprintf("Number of removed replicate-wise proteins: %s",
length(rep.prots)))
message("Number of removed replicate-wise proteins: ", length(rep.prots))

# sample-wise correlation marker QC
prot.names <- setdiff(rownames(m.prot.df), rep.prots)
Expand Down Expand Up @@ -123,13 +122,13 @@ markerQualityControl <- function(coveredProteins, protein.data){
sample.removed.prot <- df[df$Pearson < 0.8 | df$Spearman < 0.599,]
sample.removed.prot <- as.character(sample.removed.prot$Protein)

message(sprintf("Number of removed sample-wise proteins: %s",
length(sample.removed.prot)))
message("Number of removed sample-wise proteins: ",
length(sample.removed.prot))

robustMarkerProteins <- setdiff(prot.names, sample.removed.prot)

message(sprintf("Number of total removed marker proteins: %s",
length(sample.removed.prot) + length(rep.prots)))
message("Number of total removed marker proteins: ",
length(sample.removed.prot) + length(rep.prots))

grid.arrange(p1, p2, ncol=2)

Expand All @@ -151,15 +150,14 @@ markerQualityControl <- function(coveredProteins, protein.data){

non.enriched.loc <- r.cov.df[r.cov.df$ProteinCoverage < 20, ]
if(nrow(non.enriched.loc) == 1){
warning(sprintf("There is not enough enrichment at %s localization.
\nWe recommend you to perform the fractionation, again.",
as.character(non.enriched.loc$Compartments)))
warning("There is not enough enrichment at: ",
as.character(non.enriched.loc$Compartments),
"\nWe recommend you to perform the fractionation, again.")
}else if(nrow(non.enriched.loc) > 1){
comp <- paste(as.character(non.enriched.loc$Compartments),
collapse = ",")
warning(sprintf("There are not enough enrichment at %s localizations.
\nWe recommend you to perform the fractionation,
as we describe at the manuscprit.", comp))
warning("There are not enough enrichments at: ",
comp, "\nWe recommend you to perform the fractionation.")
}

return(robustMarkerProteins)
Expand Down
4 changes: 2 additions & 2 deletions R/mergeCompNeigh.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@
#'all.A <- cls[[1]]$all.prot.pred
#'all.B <- cls[[2]]$all.prot.pred
#'
#'c.cls.df <- applyThresholdCompartment(all.A, all.B, t.c.df)
#'c.cls.df <- applyThresholdCompartment(all.A[1:300,],all.B[1:300,],t.c.df)
#'
#'n.cls.df <- applyThresholdNeighborhood(all.A, all.B, t.n.df)
#'n.cls.df <- applyThresholdNeighborhood(all.A[1:300,],all.B[1:300,],t.n.df)
#'
#'cls.df <- mergeCls(c.cls.df, n.cls.df)
#'}
Expand Down
48 changes: 48 additions & 0 deletions R/mergeProbability.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#'@title Merge compartment probabilities to neighborhood probabilities
#'@description Compartment levels classifications are summed up to
#'associated neighborhood levels. It is a helper function.
#'@param df data.frame; all predictions at the neighborhood level and
#'probablity vectors for each protein
#'@export
#'@examples {
#'
#'df <- loadData(SubCellBarCode::hcc827Ctrl)
#'
#'c.prots <- calculateCoveredProtein(rownames(df), markerProteins[,1])
#'
#'set.seed(7)
#'c.prots <- sample(c.prots, 365)
#'cls <- svmClassification(c.prots, df, markerProteins)
#'
#'all.A <- cls[[1]]$all.prot.pred
#'
#'all.n.repA <- replacePrediction(all.A, column = "svm.pred.all")
#'
#'m.all.repA <- mergeProbability(all.n.repA)
#'
#'}
#'@return merged.df

mergeProbability <- function(df){

t.secretory.df <- data.frame(df[, colnames(df)[2:5]])
t.secretory.df$Secretory <- apply(t.secretory.df, 1, sum)
t.nuclear.df <- data.frame(df[, colnames(df)[6:9]])
t.nuclear.df$Nuclear <- apply(t.nuclear.df, 1, sum)
t.cytosol.df <- data.frame(df[, colnames(df)[10:14]])
t.cytosol.df$Cytosol <- apply(t.cytosol.df, 1, sum)
t.Mitochondria.df <- data.frame(df[, colnames(df)[15:16]])
t.Mitochondria.df$Mitochondria <- apply(t.Mitochondria.df, 1, sum)

merged.df <- data.frame(Proteins = rownames(df),
svm.pred.all = df[,colnames(df)[1]],
Secretory = t.secretory.df$Secretory,
Nuclear = t.nuclear.df$Nuclear,
Cytosol = t.cytosol.df$Cytosol,
Mitochondria = t.Mitochondria.df$Mitochondria)
# temp neihborhood df
t.n.df <- merged.df[,3:6]
merged.df$svm.pred.all <- colnames(t.n.df)[apply(t.n.df, 1, which.max)]
rownames(merged.df) <- merged.df$Proteins
return(merged.df)
}
8 changes: 4 additions & 4 deletions R/plotBarcode.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@
#'all.A <- cls[[1]]$all.prot.pred
#'all.B <- cls[[2]]$all.prot.pred
#'
#'c.cls.df <- applyThresholdCompartment(all.A, all.B, t.c.df)
#'c.cls.df <- applyThresholdCompartment(all.A[1:300,],all.B[1:300,],t.c.df)
#'
#'n.cls.df <- applyThresholdNeighborhood(all.A, all.B, t.n.df)
#'n.cls.df <- applyThresholdNeighborhood(all.A[1:300,],all.B[1:300,],t.n.df)
#'
#'cls.df <- mergeCls(c.cls.df, n.cls.df)
#'
#'proteinPlot <- plotBarcode(cls.df, "AAR2", hcc827CtrlPSMCount)
#'proteinPlot <- plotBarcode(cls.df, "ACAA2", hcc827CtrlPSMCount)
#'}
#'@import ggplot2
#'@importFrom graphics plot
Expand Down Expand Up @@ -74,7 +74,7 @@ plotBarcode <- function(sampleClassification, protein, s1PSM){
#get the PSM count
psm <- as.numeric(s1PSM[protein,][2])

if( length(psm) < 1 & is.numeric(psm) == FALSE)
if( length(psm) < 1 & !is.numeric(psm))
stop('PSM count could not obtain properly.
Please check the PSM input data')

Expand Down
Loading

0 comments on commit 7769d88

Please sign in to comment.