-
Notifications
You must be signed in to change notification settings - Fork 6
/
formatSubmissionFunctions.0.1.R
122 lines (104 loc) · 4.5 KB
/
formatSubmissionFunctions.0.1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# formatSubmissionFunctions.0.1.R
#
# Purpose: Provides functions to load format a prediction
# dataframe into a valid DREAM challenge submission.
#
# This file can be loaded as a FORMATTER asset in
# the DREAM_Combi_main.R workflow and is
# therefore expected to provide a
# format_prediction() function and a
# format_confidence() function.
#
# This file should contain only assets of functions
# and constants. Sourcing this file must not have
# any side-effects. Functions should not have side-
# effects either.
#
# Version: 0.1
#
# Date: Jan 24 2016
# Author: Boris and DREAM team UofT
#
# V 0.1 First code
#
# TODO:
#
# ==========================================================
format_prediction <- function(type, pred) {
# From https://www.synapse.org/#!Synapse:syn4231880/wiki/235659
# Constraints:
# CELL_LINE column contains cell line identifier. Those IDs
# are the normalized cell line name, which is controlled vocabulary.
# COMBINATION_ID column contains the combination identifier,
# which consist out of both drug names separated by a dot.
# Please note that drug names are controlled vocabulary too
# and alphabetically sorted.
# PREDICTION column contains scalar values of the predicted
# synergy, where positive values indicate synergy, values around
# zero additivity and negative values infer antagonism.
# NA's (i.e. null) predictions are not accepted.
if (type == "1A" | type == "1B") {
N <- nrow(pred)
sub <- data.frame("CELL_LINE" = character(N),
"COMBINATION_ID" = character(N),
"PREDICTION" = numeric(N),
stringsAsFactors = FALSE)
for (i in 1:N) {
sub$CELL_LINE[i] <- pred$C[i]
sub$COMBINATION_ID[i] <- sprintf("%s.%s", pred$A[i], pred$B[i])
sub$PREDICTION[i] <- pred$S[i]
}
} else {
stop("PANIC: type \"", type, "\" not yet supported.", sep="")
}
sub <- sub[order(sub$COMBINATION_ID), ]
return(sub)
}
format_confidence <- function(type, # Challenge type
pred, # Prediction input
method="quantile", # Use method to calculate
# single confidence
# from multiple combination IDs
pQuant=0.8 # Return value of pQuant quantile
) {
# From https://www.synapse.org/#!Synapse:syn4231880/wiki/235659
# Constraints:
# - Confidence is in a range from 0 to 1, where high values
# correspond to high confidence in that particular combination.
# - Confidence value cannot be null or NA.
# - Only valid COMBINATION_ID's, meaning controlled vocabulary
# drug names separated by dot and alphabetically ordered.
# - List must contain all combinations from challenge 1.
# - No duplicated COMBINATION_ID's allowed!
if (type == "1A" | type == "1B") {
N <- nrow(pred)
conf <- data.frame("A" = character(N),
"B" = character(N),
"COMBINATION_ID" = character(N),
"CONFIDENCE" = numeric(N),
stringsAsFactors = FALSE)
for (i in 1:N) {
conf$A[i] <- pred$A[i]
conf$B[i] <- pred$B[i]
conf$COMBINATION_ID[i] <- sprintf("%s.%s", pred$A[i], pred$B[i])
}
conf <- unique(conf)
conf <- conf[order(conf$COMBINATION_ID), ]
for (i in 1:nrow(conf)) {
pVals <- pred$conf[pred$A == conf$A[i] & pred$B == conf$B[i]]
if (method == "quantile") {
conf$CONFIDENCE[i] <- quantile(pVals, probs = pQuant)
} else {
stop("PANIC: method \"", method, "\" not supported.", sep="")
}
}
# rank and normalize
conf$CONFIDENCE <- rank(conf$CONFIDENCE)/nrow(conf)
# drop A and B columns
conf <- conf[ , c("COMBINATION_ID", "CONFIDENCE")]
} else {
stop("PANIC: type \"", type, "\" not yet supported.", sep="")
}
return(conf)
}
# [END]