In [9]:
set.seed(1919)
library(e1071)
library(dplyr)
library(ROCR)

In [10]:
dat <- read.table("RFE_data.txt", header = T, sep = "\t")
sample_dat <- read.table("scirep_classes.txt", header = T, sep = ",", stringsAsFactors = F)
sample_class <- as.factor(arrange(sample_dat, sample_id)$label)

In [11]:
merge_dat <- cbind(dat, sample_class)

In [12]:
colnames(merge_dat)

In [13]:
n_samples <- nrow(merge_dat)
n_train <- floor(n_samples * 0.8)
indices <- sample(1:n_samples)
indices <- indices[1:n_train]
SVM_train_sample <- merge_dat[indices,]
SVM_test_sample <- merge_dat[-indices,]

In [14]:
svm.prob.fit<-svm(sample_class ~ ENST00000623130.1 + ENST00000383869.1 + piR.hsa.23317 + ENST00000516053.2 +
            ENST00000626826.1 + ENST00000362154.1 + ENST00000536684.2 + ENST00000384886.3 + ENST00000384278.1+
            ENST00000385273.1, data = SVM_train_sample, probability = TRUE)
predicted_probs <- predict(svm.prob.fit, SVM_test_sample, probability = TRUE)
stat_res <- table(predicted_probs, SVM_test_sample$sample_class)
pre_prob <- attr(predicted_probs, "probabilities")
train_predicted_probs <- predict(svm.prob.fit, SVM_train_sample, probability = TRUE)
train_stat_res <- table(train_predicted_probs, SVM_train_sample$sample_class)
train_pre_prob <- attr(train_predicted_probs, "probabilities")

In [15]:
stat_res
predicted_probs

                   
predicted_probs     Colorectal Cancer Healthy Control Pancreatic Cancer
  Colorectal Cancer                18               2                 0
  Healthy Control                   5               5                 1
  Pancreatic Cancer                 0               0                 0
  Prostate Cancer                   2               1                 0
                   
predicted_probs     Prostate Cancer
  Colorectal Cancer               0
  Healthy Control                 0
  Pancreatic Cancer               0
  Prostate Cancer                 5

In [16]:
accurary <- (stat_res[1, 1] + stat_res[2, 2] + stat_res[3, 3] + stat_res[4, 4])/length(predicted_probs)
accurary

In [25]:
## Colorectal Cancer的测试集ROC
positive_class <- 'Colorectal Cancer'
test_labels <- vector('integer', length(predicted_probs))
test_labels[SVM_test_sample$sample_class != positive_class] <- 0
test_labels[SVM_test_sample$sample_class == positive_class] <- 1
pred <- prediction(pre_prob[, positive_class], test_labels)
roc <- performance(pred, 'tpr', 'fpr') 
pdf("Col_Can_ROC_SVM_test.pdf")
plot(roc, main = 'ROC Curve for Colorectal Cancer', sub = 'AUC = 0.837')
dev.off()
auc <- performance(pred, 'auc')
cat('auc =', auc@y.values[[1]], '\n')

auc = 0.8371429 


In [26]:
## Healthy Control的测试集ROC
positive_class <- 'Healthy Control'
test_labels[SVM_test_sample$sample_class != positive_class] <- 0
test_labels[SVM_test_sample$sample_class == positive_class] <- 1
pred <- prediction(pre_prob[, positive_class], test_labels)
roc <- performance(pred, 'tpr', 'fpr') 
pdf("Healthy_Control_ROC_SVM_test.pdf")
plot(roc, main = 'ROC Curve for Healthy Control', sub = 'AUC = 0.730')
dev.off()
auc <- performance(pred, 'auc')
cat('auc =', auc@y.values[[1]], '\n')

auc = 0.7298387 


In [27]:
## Prostate Cancer的测试集ROC
positive_class <- 'Prostate Cancer'
test_labels[SVM_test_sample$sample_class != positive_class] <- 0
test_labels[SVM_test_sample$sample_class == positive_class] <- 1
pred <- prediction(pre_prob[, positive_class], test_labels)
roc <- performance(pred, 'tpr', 'fpr') 
pdf("Pro_Can_ROC_SVM_test.pdf")
plot(roc, main = 'ROC Curve for Prostate Cancer', sub = 'AUC = 0.953')
dev.off()
auc <- performance(pred, 'auc')
cat('auc =', auc@y.values[[1]], '\n')

auc = 0.9529412 


In [20]:
## Pancreatic Cancer样本量过小，故在测试集中不再另行绘制其ROC曲线
positive_class <- 'Pancreatic Cancer'
test_labels[SVM_test_sample$sample_class != positive_class] <- 0
test_labels[SVM_test_sample$sample_class == positive_class] <- 1
#pred <- prediction(pre_prob[, positive_class], test_labels)
#roc <- performance(pred, 'tpr', 'fpr') 
#pdf("Pan_Can_ROC_SVM_test.pdf")
#plot(roc, main = 'ROC Curve for Pancreatic Cancer', sub = 'AUC = 0.989')
#dev.off()
#auc <- performance(pred, 'auc')
#cat('auc =', auc@y.values[[1]], '\n')

In [28]:
## Healthy Control的训练集ROC
positive_class <- 'Healthy Control'
test_labels[SVM_train_sample$sample_class != positive_class] <- 0
test_labels[SVM_train_sample$sample_class == positive_class] <- 1
pred <- prediction(train_pre_prob[, positive_class], test_labels)
roc <- performance(pred, 'tpr', 'fpr') 
pdf("Healthy_Control_ROC_SVM_train.pdf")
plot(roc, main = 'ROC Curve for Healthy Control in Train ', sub = 'AUC = 0.939')
dev.off()
auc <- performance(pred, 'auc')
cat('auc =', auc@y.values[[1]], '\n')

auc = 0.9532468 


In [29]:
## Colorectal Cancer的训练集ROC
positive_class <- 'Colorectal Cancer'
test_labels[SVM_train_sample$sample_class != positive_class] <- 0
test_labels[SVM_train_sample$sample_class == positive_class] <- 1
pred <- prediction(train_pre_prob[, positive_class], test_labels)
roc <- performance(pred, 'tpr', 'fpr') 
pdf("Col_Can_ROC_SVM_train.pdf")
plot(roc, main = 'ROC Curve for Colorectal Cancer in Train Set', sub = 'AUC = 0.915')
dev.off()
auc <- performance(pred, 'auc')
cat('auc =', auc@y.values[[1]], '\n')

auc = 0.9152807 


In [30]:
## Prostate Cancer的训练集ROC
positive_class <- 'Prostate Cancer'
test_labels[SVM_train_sample$sample_class != positive_class] <- 0
test_labels[SVM_train_sample$sample_class == positive_class] <- 1
pred <- prediction(train_pre_prob[, positive_class], test_labels)
roc <- performance(pred, 'tpr', 'fpr') 
pdf("Pro_Can_ROC_SVM_train.pdf")
plot(roc, main = 'ROC Curve for Prostate Cancer in Train Set', sub = 'AUC = 0.993')
dev.off()
auc <- performance(pred, 'auc')
cat('auc =', auc@y.values[[1]], '\n')

auc = 0.9928019 


In [31]:
## Pancreatic Cancer的训练集ROC
positive_class <- 'Pancreatic Cancer'
test_labels[SVM_train_sample$sample_class != positive_class] <- 0
test_labels[SVM_train_sample$sample_class == positive_class] <- 1
pred <- prediction(train_pre_prob[, positive_class], test_labels)
roc <- performance(pred, 'tpr', 'fpr') 
pdf("Pan_Can_ROC_SVM_train.pdf")
plot(roc, main = 'ROC Curve for Pancreatic Cancer in Train Set', sub = 'AUC = 0.835')
dev.off()
auc <- performance(pred, 'auc')
cat('auc =', auc@y.values[[1]], '\n')

auc = 0.8353741 
