In [23]:
import torch
from torch.utils.data import DataLoader
from torch import nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import roc_auc_score, accuracy_score, matthews_corrcoef, recall_score, precision_score
from sklearn.metrics import confusion_matrix, f1_score
import torch.nn.functional as F
from sklearn.metrics import roc_curve

In [11]:
loaded_datasets_info = torch.load('/root/autodl-tmp/data/saved_datasets.pth')
train_dataset = loaded_datasets_info['train_dataset']
test_dataset = loaded_datasets_info['test_dataset']

  loaded_datasets_info = torch.load('/root/autodl-tmp/data/saved_datasets.pth')


In [12]:
batch_size = 10
loaded_train_dataset = DataLoader(train_dataset, batch_size = batch_size, shuffle = False)
loaded_test_dataset = DataLoader(test_dataset, batch_size = batch_size, shuffle = False)

In [76]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(3, stride=2),
        )
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(3, stride=2),
        )
        self.fcs = nn.Sequential(
            nn.Linear(2304, 1152),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(1152, 576),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(576, 10)
        )
    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fcs(x)
        return x

In [77]:
device = "cuda"
model = Net()
model = model.to(device)
criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)
optimizer = optim.Adam(params=model.parameters(), lr=0.001)
num_epochs = 10

In [78]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch_indx, (inputs, labels) in enumerate(loaded_train_dataset):
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    # Print average loss for the epoch
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss / (len(loaded_train_dataset) / batch_size)}")  

Epoch 1/10, Loss: 2.0136951094567874
Epoch 2/10, Loss: 0.9024833912942127
Epoch 3/10, Loss: 0.6816384742372905
Epoch 4/10, Loss: 0.6298759774011101
Epoch 5/10, Loss: 0.5439084368601116
Epoch 6/10, Loss: 0.5121743980479171
Epoch 7/10, Loss: 0.47307930462214187
Epoch 8/10, Loss: 0.44432091532405554
Epoch 9/10, Loss: 0.4294630671802565
Epoch 10/10, Loss: 0.44201848253771403


In [122]:
predicted_probabilities = []
true_labels = []
with torch.set_grad_enabled(False):
    for batch_indx, (inputs, labels) in enumerate(loaded_train_dataset):
        inputs = inputs.to(device)
        labels = labels.to(device)      
        outputs = model(inputs)
        predicted_probabilities.extend(outputs.tolist())
        true_labels.extend(labels.tolist())

In [80]:
# auc (micro version)
predicted_probabilities_r = np.array(predicted_probabilities).ravel()
true_labels_r = np.array(true_labels).ravel()
fpr, tpr, thresholds = roc_curve(true_labels_r, predicted_probabilities_r)
micro_auc = auc(fpr, tpr)
print(f"auc: {micro_auc}")

auc: 0.9893394501080247


In [81]:
# accuracy
predicted_labels_wo_oh = np.argmax(np.array(predicted_probabilities), axis=1)
true_labels_wo_oh = np.argmax(np.array(true_labels), axis=1)
print(true_labels_wo_oh)
print(predicted_labels_wo_oh)
accuracy = accuracy_score(true_labels_wo_oh, predicted_labels_wo_oh)
print(accuracy)

[5 0 4 ... 5 6 8]
[5 0 4 ... 5 6 8]
0.9868


In [136]:
# f1-score (micro)
preds = torch.tensor(predicted_probabilities)
preds = F.softmax(preds, dim=-1)
predicted_labels = (preds >= 0.5).int().numpy()
predicted_labels_r = predicted_labels.ravel()
f1_micro = f1_score(true_labels_r, predicted_labels_r, average='macro')
print(f"f1: {f1_micro}")

f1: 0.9924841842731837


In [None]:
# sen


In [None]:
# sp

In [99]:
# mcc
mcc = matthews_corrcoef(true_labels, predicted_labels)

In [123]:
predicted_labels = np.argmax(predicted_probabilities, axis=-1)
predicted_labels

array([5, 0, 4, ..., 5, 6, 8])

In [124]:
true_labels_ = np.argmax(true_labels, axis=-1)
true_labels_

array([5, 0, 4, ..., 5, 6, 8])

In [119]:
result = calculate_multiclass_metrics(true_labels_, predicted_labels, 10)

Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      0.99      5923
           1       0.98      1.00      0.99      6742
           2       0.99      0.99      0.99      5958
           3       1.00      0.97      0.99      6131
           4       1.00      0.96      0.98      5842
           5       0.97      1.00      0.98      5421
           6       0.99      0.99      0.99      5918
           7       1.00      0.99      0.99      6265
           8       0.98      0.99      0.99      5851
           9       0.97      0.99      0.98      5949

    accuracy                           0.99     60000
   macro avg       0.99      0.99      0.99     60000
weighted avg       0.99      0.99      0.99     60000

Matthews Correlation Coefficient (MCC): 0.9853
Class 0: Sensitivity = 0.9954, Specificity = 0.9991
Class 1: Sensitivity = 0.9985, Specificity = 0.9978
Class 2: Sensitivity = 0.9856, Specificity = 0.9989
Class 3: Sensit

In [134]:
import numpy as np
from sklearn.metrics import confusion_matrix, matthews_corrcoef, classification_report, roc_auc_score, accuracy_score

np.set_printoptions(precision=4, suppress=True)

def calculate_multiclass_metrics(true_labels, predicted_labels, predicted_probabilities, num_classes):
    # 初始化指标存储
    sensitivity_per_class = []
    specificity_per_class = []
    auc_per_class = []

    # 计算 Accuracy
    accuracy = accuracy_score(true_labels, predicted_labels)

    # 计算 MCC
    mcc = matthews_corrcoef(true_labels, predicted_labels)

    # 针对每个类别计算 Sensitivity, Specificity, AUC
    for i in range(num_classes):
        # 将当前类别设置为正类，其余类别为负类 (One-vs-All)
        true_binary = (np.array(true_labels) == i).astype(int)
        pred_binary = (np.array(predicted_labels) == i).astype(int)

        # 计算混淆矩阵并解包 TN, FP, FN, TP
        cm = confusion_matrix(true_binary, pred_binary, labels=[0, 1])
        tn, fp, fn, tp = cm.ravel()

        # 计算 Sensitivity 和 Specificity
        sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0

        sensitivity_per_class.append(sensitivity)
        specificity_per_class.append(specificity)

        # 计算 AUC（需要预测概率）
        if predicted_probabilities is not None:
            auc = roc_auc_score(true_binary, predicted_probabilities[:, i]) if len(np.unique(true_binary)) > 1 else 0
            auc_per_class.append(auc)

    # 平均指标计算
    avg_sensitivity = np.mean(sensitivity_per_class)
    avg_specificity = np.mean(specificity_per_class)
    avg_auc = np.mean(auc_per_class) if auc_per_class else 0

    # 打印逐类别指标
    print("Classification Report:")
    print(classification_report(true_labels, predicted_labels, digits=4))  # 设置四位小数
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Matthews Correlation Coefficient (MCC): {mcc:.4f}")
    print(f"Average Sensitivity: {avg_sensitivity:.4f}")
    print(f"Average Specificity: {avg_specificity:.4f}")
    print(f"Average AUC: {avg_auc:.4f}")
    
    for i in range(num_classes):
        print(f"Class {i}: Sensitivity = {sensitivity_per_class[i]:.4f}, Specificity = {specificity_per_class[i]:.4f}")
        if predicted_probabilities is not None:
            print(f"Class {i}: AUC = {auc_per_class[i]:.4f}")

In [129]:
import numpy as np
from sklearn.metrics import confusion_matrix, matthews_corrcoef, classification_report, roc_auc_score, accuracy_score

np.set_printoptions(precision=4, suppress=True)

def calculate_multiclass_metrics(true_labels, predicted_labels, predicted_probabilities, num_classes):
    # 初始化指标存储
    sensitivity_per_class = []
    specificity_per_class = []
    auc_per_class = []

    # 计算 Accuracy
    accuracy = accuracy_score(true_labels, predicted_labels)

    # 计算 MCC
    mcc = matthews_corrcoef(true_labels, predicted_labels)

    # 针对每个类别计算 Sensitivity, Specificity, AUC
    for i in range(num_classes):
        # 将当前类别设置为正类，其余类别为负类 (One-vs-All)
        true_binary = (np.array(true_labels) == i).astype(int)
        pred_binary = (np.array(predicted_labels) == i).astype(int)

        # 计算混淆矩阵并解包 TN, FP, FN, TP
        tn, fp, fn, tp = confusion_matrix(true_binary, pred_binary, labels=[0, 1]).ravel()

        # 计算 Sensitivity 和 Specificity
        sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0

        sensitivity_per_class.append(sensitivity)
        specificity_per_class.append(specificity)

        # 计算 AUC（需要预测概率）
        if predicted_probabilities is not None:
            auc = roc_auc_score(true_binary, predicted_probabilities[:, i]) if len(np.unique(true_binary)) > 1 else 0
            auc_per_class.append(auc)

    # 打印逐类别指标
    print("Classification Report:")
    print(classification_report(true_labels, predicted_labels, digits=4))  # 设置四位小数
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Matthews Correlation Coefficient (MCC): {mcc:.4f}")
    
    for i in range(num_classes):
        print(f"Class {i}: Sensitivity = {sensitivity_per_class[i]:.4f}, Specificity = {specificity_per_class[i]:.4f}")
        if predicted_probabilities is not None:
            print(f"Class {i}: AUC = {auc_per_class[i]:.4f}")

In [135]:
# 调用函数
calculate_multiclass_metrics(true_labels_, predicted_labels, preds, num_classes=10)


Classification Report:
              precision    recall  f1-score   support

           0     0.9929    0.9946    0.9938      5923
           1     0.9842    0.9970    0.9906      6742
           2     0.9907    0.9871    0.9889      5958
           3     0.9980    0.9739    0.9858      6131
           4     0.9989    0.9596    0.9789      5842
           5     0.9635    0.9969    0.9799      5421
           6     0.9907    0.9900    0.9904      5918
           7     0.9944    0.9837    0.9890      6265
           8     0.9814    0.9928    0.9871      5851
           9     0.9687    0.9877    0.9781      5949

    accuracy                         0.9864     60000
   macro avg     0.9863    0.9863    0.9862     60000
weighted avg     0.9866    0.9864    0.9864     60000

Accuracy: 0.9864
Matthews Correlation Coefficient (MCC): 0.9849
Average Sensitivity: 0.9863
Average Specificity: 0.9985
Average AUC: 0.9998
Class 0: Sensitivity = 0.9946, Specificity = 0.9992
Class 0: AUC = 1.0000
Clas

[5 2 8 ... 3 1 5]
[5 2 8 ... 3 1 5]
0.984920634920635


In [159]:
predicted_probabilities = []  
true_labels = []  
with torch.set_grad_enabled(False): 
    for batch_indx, (inputs, labels) in enumerate(loaded_test_dataset):
        inputs = inputs.to(device)
        labels = labels.to(device)    
        outputs = model(inputs)
        predicted_probabilities.extend(outputs.tolist())
        true_labels.extend(labels.tolist())

In [160]:
predicted_labels_wo_oh = np.argmax(np.array(predicted_probabilities), axis=1)
true_labels_wo_oh = np.argmax(np.array(true_labels), axis=1)
print(true_labels_wo_oh)
print(predicted_labels_wo_oh)
accuracy = accuracy_score(true_labels_wo_oh, predicted_labels_wo_oh)
print(accuracy)

[2 1 8 ... 6 5 1]
[2 1 8 ... 6 5 1]
0.9863492063492063


In [None]:
# calculation by hand
#correct_predictions = np.sum(true_labels == predicted_labels)
#accuracy = correct_predictions / len(true_labels)
#print(accuracy)

In [103]:
def metrics_output(predicted_probabilities, true_labels, num_classes=10):
    
    predicted_labels = np.argmax(np.array(predicted_probabilities), axis=1)
    print(predicted_labels)
    
    true_labels = np.argmax(np.array(true_labels), axis=1)
    print(true_labels)
    
    # Confusion matrix: for multi-class, we get a confusion matrix for all classes
    conf_matrix = confusion_matrix(true_labels, predicted_labels)
    
    # Sensitivity (Recall for each class): TP / (TP + FN) for each class
    sensitivity = np.diag(conf_matrix) / np.sum(conf_matrix, axis=1)
    
    # Specificity for each class: TN / (TN + FP) for each class
    tn = np.sum(conf_matrix) - np.sum(conf_matrix, axis=0) - np.sum(conf_matrix, axis=1) + np.diag(conf_matrix)
    specificity = tn / (tn + np.sum(conf_matrix, axis=0) - np.diag(conf_matrix))
    
    # Accuracy: overall accuracy
    accuracy = accuracy_score(true_labels, predicted_labels)
    
    # F1-score (macro average): F1 for each class, then average
    f1 = f1_score(true_labels, predicted_labels, average='macro', labels=np.arange(num_classes))
    
    # Matthews Correlation Coefficient (MCC) for multi-class classification
    mcc = matthews_corrcoef(true_labels, predicted_labels)
    
    # AUC: Use roc_auc_score with multi_class='ovr' for one-vs-rest ROC AUC calculation
    auc = roc_auc_score(true_labels, predicted_probabilities, multi_class='ovr', average='macro', labels=np.arange(num_classes))
    
    #return auc, sensitivity, specificity, accuracy, f1, mcc
    return sensitivity, specificity, accuracy, f1, mcc

In [104]:
# Example use:
# predicted_probs should be the probability predictions from your model (shape: [batch_size, num_classes])
# predicted_labels should be the hard predictions (shape: [batch_size,])

#sensitivity, specificity, accuracy, f1, mcc, auc = metrics_output(predicted_probabilities, true_labels, num_classes=10)
sensitivity, specificity, accuracy, f1, mcc = metrics_output(predicted_probabilities, true_labels, num_classes=10)
print(sensitivity, specificity, accuracy, f1, mcc)


[3 3 3 ... 3 3 3]
[5 2 8 ... 3 1 5]


ValueError: Target scores need to be probabilities for multiclass roc_auc, i.e. they should sum up to 1.0 over classes

In [67]:
roc_auc, metrics_sn, metrics_sp, metrics_ACC, metrics_F1, metrics_MCC = metrics_output(predicted_probabilities, true_labels)
print(roc_auc, metrics_sn, metrics_sp, metrics_ACC, metrics_F1, metrics_MCC)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[[2.00770688e-33 3.91716668e-36 3.45217724e-39 ... 0.00000000e+00
  3.09459973e-36 2.29368335e-36]
 [3.22450259e-33 3.61976413e-40 1.80637865e-37 ... 0.00000000e+00
  3.00996668e-37 9.06860110e-39]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 1.12103877e-44]
 ...
 [1.31520689e-39 7.43793362e-35 2.91722314e-41 ... 1.33123354e-43
  4.94043748e-38 1.20058015e-36]
 [1.40129846e-45 2.53635022e-43 6.87196767e-42 ... 1.26116862e-44
  3.61619082e-41 1.38125990e-40]
 [0.00000000e+00 0.00000000e+00 1.40129846e-44 ... 0.00000000e+00
  0.00000000e+00 3.82554481e-42]]


ValueError: Classification metrics can't handle a mix of multilabel-indicator and binary targets

In [29]:
np.save('/root/autodl-tmp/ROC/CNN/y_val_pred.npy', predicted_probabilities)
np.save('/root/autodl-tmp/ROC/CNN/y_val.npy', true_labels)

FileNotFoundError: [Errno 2] No such file or directory: '/root/autodl-tmp/ROC/CNN/y_val_pred.npy'

In [None]:
predicted_probabilities = []  
true_labels = []  
with torch.set_grad_enabled(False): 
    for batch_indx, (inputs, labels) in enumerate(loaded_test_dataset):
        inputs = inputs.to(device)
        labels = labels.to(device)    
        outputs = model(inputs)
        predicted_probabilities.extend(outputs.tolist())
        true_labels.extend(labels.tolist())

In [None]:
roc_auc, metrics_sn, metrics_sp, metrics_ACC, metrics_F1, metrics_MCC = metrics_output(predicted_probabilities, true_labels)
print(roc_auc, metrics_sn, metrics_sp, metrics_ACC, metrics_F1, metrics_MCC)

In [None]:
np.save('/root/autodl-tmp/ROC/CNN/y_test_pred.npy', predicted_probabilities)
np.save('/root/autodl-tmp/ROC/CNN/y_test.npy', true_labels)