In [25]:
library(ggplot2)
library(precrec)
library(readr)
library(reticulate)
sklearn <- import("sklearn.metrics")

In [26]:
calculate <- function(data, result_path, column) {
  epitope_groups <- split(data, data[[column]])
  roc_prc_values <- data.frame(
    epitope = character(),
    roc_auc = numeric(),
    prc_auc = numeric(),
    accuracy = numeric(),
    precision = numeric(),
    recall = numeric(),
    specificity = numeric(),
    mcc = numeric(),
    f1 = numeric(),
    stringsAsFactors = FALSE
  )
  
  for (epitope in names(epitope_groups)) {
    subset_data <- epitope_groups[[epitope]]
    accuracy <- sklearn$accuracy_score(subset_data$y_true, subset_data$y_pred)
    precision <- sklearn$precision_score(subset_data$y_true, subset_data$y_pred)
    recall <- sklearn$recall_score(subset_data$y_true, subset_data$y_pred)
    specificity <- sklearn$recall_score(subset_data$y_true, subset_data$y_pred, pos_label=0)
    mcc <- sklearn$matthews_corrcoef(subset_data$y_true, subset_data$y_pred)
    f1 <- sklearn$f1_score(subset_data$y_true, subset_data$y_pred)

    sscurves <- evalmod(scores = subset_data$y_prob, labels = subset_data$y_true)
    auc_values <- auc(sscurves)
    roc_auc <- subset(auc_values, curvetypes == "ROC")$aucs
    prc_auc <- subset(auc_values, curvetypes == "PRC")$aucs
      
    roc_prc_values <- rbind(
      roc_prc_values,
      data.frame(
        epitope = epitope,
        roc_auc = roc_auc,
        prc_auc = prc_auc,
        accuracy = accuracy,
        precision = precision,
        recall = recall,
        specificity = specificity,
        mcc = mcc,
        f1 = f1,
        stringsAsFactors = FALSE
      )
    )
  }
  
  accuracy_all <- sklearn$accuracy_score(data$y_true, data$y_pred)
  precision_all <- sklearn$precision_score(data$y_true, data$y_pred)
  recall_all <- sklearn$recall_score(data$y_true, data$y_pred)
  specificity_all <- sklearn$recall_score(data$y_true, data$y_pred, pos_label=0)
  mcc_all <- sklearn$matthews_corrcoef(data$y_true, data$y_pred)
  f1_all <- sklearn$f1_score(data$y_true, data$y_pred)
  
  sscurves_all <- evalmod(scores = data$y_prob, labels = data$y_true)
  auc_values_all <- auc(sscurves_all)
  roc_auc_all <- subset(auc_values_all, curvetypes == "ROC")$aucs
  prc_auc_all <- subset(auc_values_all, curvetypes == "PRC")$aucs
  
  roc_prc_values <- rbind(
    roc_prc_values,
    data.frame(
      epitope = "all_values",
      roc_auc = roc_auc_all,
      prc_auc = prc_auc_all,
      accuracy = accuracy_all,
      precision = precision_all,
      recall = recall_all,
      specificity = specificity_all,
      mcc = mcc_all,
      f1 = f1_all,
      stringsAsFactors = FALSE
    )
  )
  
  output_file_path <- paste0(result_path, "all_result.csv")
  write.csv(roc_prc_values, file = output_file_path, row.names = FALSE)
  print(paste("Saved File：", output_file_path))
}



In [None]:
'''data_path refers to the prediction results of each model, 
which must include four columns: epitope, y_true, y_pred, and y_prob'''

In [None]:
data_path="result_path/predition.csv"
result_path="result_path/predition"
column='epitope'
calculate(data_path, result_path, column)