In [1]:
# Run under SigMat/R_code

# Set topK
topK = 10

# Version info & Welcome message
cat("\n========================================\n")
cat("Welcome to SigMat v1.0.1 (Updated Feb. 12, 2018).\n")
cat("SigMat matches gene expression signatures to experimental conditions, i.e. classes.\n")
cat("Documentation is available at https://github.com/JinfengXiao/SigMat.\n")
cat("You are running the iPython Notebook version.\n")
cat("You can also run in command line: Rscript sigmat_main.R [-k int]\n")
cat("k is a small positive integer indicating how many classes you want a signature to be matched to.\n")
cat("Default: k=10.\n")
cat(paste0("You input k=", topK, ".\n"))
cat("========================================\n\n")

source("functions.R")

# Read in signatures
cat("Reading data...\n")
X_train = as.matrix(read.table("../data/train/sig_train.csv", header = F, sep = ",", colClasses = "numeric"))
X_tune = as.matrix(read.table("../data/tune/sig_tune.csv", header = F, sep = ",", colClasses = "numeric"))
X_test = as.matrix(read.table("../data/test/sig_test.csv", header = F, sep = ",", colClasses = "numeric"))

# Read in drug IDs of signatures
drug_train = unlist(read.table("../data/train/class_train.csv", header = F, colClasses = "character"))
drug_tune = unlist(read.table("../data/tune/class_tune.csv", header = F, colClasses = "character"))

# Convert drug IDs to classes
drug_uniq = unique(drug_train)
class_conv = seq(1, length(drug_uniq), 1)  # A converter between drug IDs and class labels
names(class_conv) = drug_uniq
class_train = class_conv[drug_train]
class_tune = class_conv[drug_tune]

# Compute kernels
kernel_train = exp(cor(t(X_train), method = "spearman"))
corr_tune = cor(t(X_tune), t(X_train), method = "spearman")
kernel_tune = exp(corr_tune)
corr_test = cor(t(X_test), t(X_train), method = "spearman")
kernel_test = exp(corr_test)

# Train SigMat and let it vote
sigmat = kSVM_train(class_train, kernel_train)
alpha = kSVM_scale(sigmat, class_tune, kernel_tune)
cat(paste0("SigMat found best alpha = ", alpha, ".\n"))
sigmat@coef = lapply(sigmat@coef, function(x) x * alpha)
cat("Collecting votes...\n")
vote_tune = kSVM_vote_m(sigmat, kernel_tune)
vote_test = kSVM_vote_m(sigmat, kernel_test)

# Tune beta
cat("Searching for the best beta value...\n")
ac_tune = rep(0, 11)
for(beta_i in 1:11){
  beta = 0.1 * (beta_i - 1)
  score_tune = beta * num_rescale(vote_tune, -1, 1) + (1 - beta) * cor_score_m(corr_tune, class_train)
  ac_tune[beta_i] = success_at_K(score_tune, class_tune, K=topK)
}
beta_best = 0.1 * (which.max(ac_tune) - 1)
cat(paste0("SigMat found best beta = ", beta_best, ".\n"))

# Rank classes on test data and output topK classes
cat("Matching test signatures to classes...\n")
score_test = beta_best * num_rescale(vote_test, -1, 1) + (1 - beta_best) * cor_score_m(corr_test, class_train)
pred_drug = t(apply(score_test, 1, function(x){
  return(names(class_conv)[order(x, decreasing = T)[1:topK]])
}))
write.table(pred_drug, file = "../data/test/class_pred.csv", sep = ",", quote = F, row.names = F, col.names = F)
cat("All done!\n")



Welcome to SigMat v1.0.0 (Updated Feb. 12, 2018).
SigMat matches gene expression signatures to experimental conditions, i.e. classes.
Documentation is available at https://github.com/JinfengXiao/SigMat.
You are running the iPython Notebook version.
You can also run in command line: Rscript sigmat_main.R [-k int]
k is a small positive integer indicating how many classes you want a signature to be matched to.
Default: k=10.
You input k=10.

Reading data...
Training a KSVM ensemble...
Searching for the best alpha value between 0.2 and 7. This can take long...
  Trying alpha = 0.2 ...
  Trying alpha = 0.4 ...
  Trying alpha = 0.6 ...
  Trying alpha = 0.8 ...
  Trying alpha = 1 ...
  Trying alpha = 1.5 ...
  Trying alpha = 2 ...
  Trying alpha = 2.5 ...
  Trying alpha = 3 ...
  Trying alpha = 3.5 ...
  Trying alpha = 4 ...
  Trying alpha = 4.5 ...
  Trying alpha = 5 ...
  Trying alpha = 5.5 ...
  Trying alpha = 6 ...
  Trying alpha = 6.5 ...
  Trying alpha = 7 ...
SigMat found best alpha =