# Result Analysis

## import libraries

In [1]:
# import libraries
import numpy as np
import pandas as pd
from tqdm import tqdm
from collections import Counter

import sklearn
from sklearn.model_selection import train_test_split, KFold
from sklearn.linear_model import Ridge
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score, mean_squared_error, mean_absolute_error

from catboost import CatBoostRegressor

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.optim as optim

from transformers import BertTokenizerFast, BertModel


# 导入其他文件
from extract_features import load_features
from models import BioNN, BioDeepNN, BioResNet
from Model_Training import PeptidesDataLoader

# constant
SAVE = True
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

## 分析所有模型的指标

In [2]:
lr_rg_error = np.load("./Result/lr_rg_error.npy").mean(axis=0)
lr_cl_error = np.load("./Result/lr_cl_error.npy").mean(axis=0)
dtr_rg_error = np.load("./Result/dtr_rg_error.npy").mean(axis=0)
dtr_cl_error = np.load("./Result/dtr_cl_error.npy").mean(axis=0)
cat_rg_error = np.load("./Result/cat_rg_error.npy").mean(axis=0)
cat_cl_error = np.load("./Result/cat_cl_error.npy").mean(axis=0)

nn_rg_error = np.load("./Result/nn_rg_error.npy").mean(axis=0)
nn_cl_error = np.load("./Result/nn_cl_error.npy").mean(axis=0)
bionn_rg_error = np.load("./Result/bionn_rg_error.npy").mean(axis=0)
bionn_cl_error = np.load("./Result/bionn_cl_error.npy").mean(axis=0)
biodnn_rg_error = np.load("./Result/biodnn_rg_error.npy").mean(axis=0)
biodnn_cl_error = np.load("./Result/biodnn_cl_error.npy").mean(axis=0)
biores_rg_error = np.load("./Result/biores_rg_error.npy").mean(axis=0)
biores_cl_error = np.load("./Result/biores_cl_error.npy").mean(axis=0)

# rg_errors = np.stack([lr_rg_error, dtr_rg_error, cat_rg_error, bionn_rg_error, biodnn_rg_error, biores_rg_error], axis=0)
# cl_errors = np.stack([lr_cl_error, dtr_cl_error, cat_cl_error, bionn_cl_error, biodnn_cl_error, biores_cl_error], axis=0)
rg_errors = np.stack([lr_rg_error, dtr_rg_error, cat_rg_error, nn_rg_error, bionn_rg_error, biodnn_rg_error, biores_rg_error], axis=0)
cl_errors = np.stack([lr_cl_error, dtr_cl_error, cat_cl_error, nn_cl_error, bionn_cl_error, biodnn_cl_error, biores_cl_error], axis=0)

In [3]:
model_names = ["Linear Regression", "Decision Tree", "CatBoost", "Neural Network", "Bert+NN", "Bert+DNN", "Bert+ResNet"]
rg_errors = pd.DataFrame(rg_errors.mean(axis=1), columns=["MSE", "MAE", "RMSE"], index=model_names)
cl_errors = pd.DataFrame(cl_errors.mean(axis=1), columns=["AUC", "F1", "Precision", "Recall"], index=model_names)
rg_errors

Unnamed: 0,MSE,MAE,RMSE
Linear Regression,0.512977,0.55764,0.712983
Decision Tree,0.668079,0.626936,0.815707
CatBoost,0.44205,0.513623,0.661144
Neural Network,0.467357,0.528035,0.67954
Bert+NN,0.498872,0.546401,0.703091
Bert+DNN,0.496257,0.544362,0.701144
Bert+ResNet,0.569136,0.585,0.751848


In [4]:
cl_errors

Unnamed: 0,AUC,F1,Precision,Recall
Linear Regression,0.648518,0.408949,0.637565,0.307108
Decision Tree,0.604278,0.289247,0.442474,0.224564
CatBoost,0.643197,0.394149,0.669256,0.294923
Neural Network,0.643944,0.395146,0.663755,0.297064
Bert+NN,0.634408,0.358905,0.587865,0.279696
Bert+DNN,0.654857,0.387605,0.553524,0.324327
Bert+ResNet,0.699796,0.431274,0.458345,0.429237
