In [None]:
library(ggplot2)
library(precrec)
library(readr)
library(reticulate)
library(dplyr)
sklearn <- import("sklearn.metrics")

In [None]:
calculate <- function(data, result_path, Epitope) {
  if (!Epitope %in% colnames(data)) {
    stop("Error: The specified Epitope column does not exist in the data.")
  }
  Epitope_groups <- split(data, data[[Epitope]])
  roc_prc_values <- data.frame(
    Epitope = character(),
    roc_auc = numeric(),
    prc_auc = numeric(),
    accuracy = numeric(),
    precision = numeric(),
    recall = numeric(),
    specificity = numeric(),
    mcc = numeric(),
    f1 = numeric(),
    TP = numeric(),
    FP = numeric(),
    TN = numeric(),
    FN = numeric(),
    stringsAsFactors = FALSE
  )
  for (Epitope_group in names(Epitope_groups)) {
    subset_data <- Epitope_groups[[Epitope_group]]
    
    if (nrow(subset_data) == 0) {
      next
    }
    
    confusion <- sklearn$confusion_matrix(subset_data$y_true, subset_data$y_pred)
    TN <- confusion[1, 1]
    FP <- confusion[1, 2]
    FN <- confusion[2, 1]
    TP <- confusion[2, 2]
      
    accuracy <- sklearn$accuracy_score(subset_data$y_true, subset_data$y_pred)
    precision <- sklearn$precision_score(subset_data$y_true, subset_data$y_pred)
    recall <- sklearn$recall_score(subset_data$y_true, subset_data$y_pred)
    specificity <- TN / (TN + FP)
    mcc <- sklearn$matthews_corrcoef(subset_data$y_true, subset_data$y_pred)
    f1 <- sklearn$f1_score(subset_data$y_true, subset_data$y_pred)

    sscurves <- evalmod(scores = subset_data$y_prob, labels = subset_data$y_true)
    auc_values <- auc(sscurves)
    roc_auc <- subset(auc_values, curvetypes == "ROC")$aucs
    prc_auc <- subset(auc_values, curvetypes == "PRC")$aucs

    roc_prc_values <- rbind(
      roc_prc_values,
      data.frame(
        Epitope = Epitope_group,
        roc_auc = roc_auc,
        prc_auc = prc_auc,
        accuracy = accuracy,
        precision = precision,
        recall = recall,
        specificity = specificity,
        mcc = mcc,
        f1 = f1,
        TP = TP,
        FP = FP,
        TN = TN,
        FN = FN,
        stringsAsFactors = FALSE
      )
    )
  }
    
  confusion_all <- sklearn$confusion_matrix(data$y_true, data$y_pred)
  TN_all <- confusion_all[1, 1]
  FP_all <- confusion_all[1, 2]
  FN_all <- confusion_all[2, 1]
  TP_all <- confusion_all[2, 2]
    
  accuracy_all <- sklearn$accuracy_score(data$y_true, data$y_pred)
  precision_all <- sklearn$precision_score(data$y_true, data$y_pred)
  recall_all <- sklearn$recall_score(data$y_true, data$y_pred)
  specificity_all <- TN_all / (TN_all + FP_all)
  mcc_all <- sklearn$matthews_corrcoef(data$y_true, data$y_pred)
  f1_all <- sklearn$f1_score(data$y_true, data$y_pred)

  sscurves_all <- evalmod(scores = data$y_prob, labels = data$y_true)
  auc_values_all <- auc(sscurves_all)
  roc_auc_all <- subset(auc_values_all, curvetypes == "ROC")$aucs
  prc_auc_all <- subset(auc_values_all, curvetypes == "PRC")$aucs
  
  roc_prc_values <- rbind(
    roc_prc_values,
    data.frame(
      Epitope = "all_values",
      roc_auc = roc_auc_all,
      prc_auc = prc_auc_all,
      accuracy = accuracy_all,
      precision = precision_all,
      recall = recall_all,
      specificity = specificity_all,
      mcc = mcc_all,
      f1 = f1_all,
      TP = TP_all,
      FP = FP_all,
      TN = TN_all,
      FN = FN_all,
      stringsAsFactors = FALSE
    )
  )
  output_file_path <- paste0(result_path, "all_result.csv")
  write.csv(roc_prc_values, file = output_file_path, row.names = FALSE)
  print(paste("Saved File：", output_file_path))
}


'''data_path refers to the prediction results of each model, which must include four columns: Epitope, y_true, y_pred, and y_prob'''

In [None]:
data_path="result_path/predition.csv"
result_path="result_path/predition"
column='Epitope'
calculate(data_path, result_path, column)