In [60]:
import torch
from torch.utils.data import DataLoader
from torch import nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import roc_auc_score, accuracy_score, matthews_corrcoef, recall_score, precision_score
from sklearn.metrics import confusion_matrix, f1_score
import torch.nn.functional as F


In [4]:
loaded_datasets_info = torch.load('/root/autodl-tmp/data/saved_datasets_imgs.pth')
train_dataset = loaded_datasets_info['train_dataset']
val_dataset = loaded_datasets_info['val_dataset']
test_dataset = loaded_datasets_info['test_dataset']

  loaded_datasets_info = torch.load('/root/autodl-tmp/data/saved_datasets_imgs.pth')


In [7]:
batch_size = 10
loaded_train_dataset = DataLoader(train_dataset, batch_size = batch_size, shuffle = False)
loaded_val_dataset = DataLoader(val_dataset, batch_size = batch_size, shuffle = False)
loaded_test_dataset = DataLoader(test_dataset, batch_size = batch_size, shuffle = False)

In [118]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(3, stride=2),
        )
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(3, stride=2),
        )
        self.fcs = nn.Sequential(
            nn.Linear(2304, 1152),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(1152, 576),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(576, 10)
        )
    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fcs(x)
        return x

In [123]:
device = "cuda"
model = Net()
model = model.to(device)
criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)
optimizer = optim.Adam(params=model.parameters(), lr=0.001)
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
num_epochs = 10

In [124]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch_indx, (inputs, labels) in enumerate(loaded_train_dataset):
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    # Print average loss for the epoch
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss / (len(loaded_train_dataset) / batch_size)}")  

Epoch 1/10, Loss: 2.4467889076195686
Epoch 2/10, Loss: 1.0620415796572902
Epoch 3/10, Loss: 0.7876996425374206
Epoch 4/10, Loss: 0.6862258213725735
Epoch 5/10, Loss: 0.6090972748177458
Epoch 6/10, Loss: 0.5475551188204418
Epoch 7/10, Loss: 0.4607432547646114
Epoch 8/10, Loss: 0.4555582572555397
Epoch 9/10, Loss: 0.42493565610237777
Epoch 10/10, Loss: 0.43536393710843496


In [173]:
predicted_probabilities = []
true_labels = []
with torch.set_grad_enabled(False):
    for batch_indx, (inputs, labels) in enumerate(loaded_val_dataset):
        inputs = inputs.to(device)
        labels = labels.to(device)      
        outputs = model(inputs)
        predicted_probabilities.extend(outputs.tolist())
        true_labels.extend(labels.tolist())

In [174]:
np.array(predicted_probabilities).shape

(10080, 10)

In [175]:
np.array(true_labels).shape

(10080, 10)

In [176]:
predicted_labels_wo_oh = np.argmax(np.array(predicted_probabilities), axis=1)
true_labels_wo_oh = np.argmax(np.array(true_labels), axis=1)
print(true_labels_wo_oh)
print(predicted_labels_wo_oh)
accuracy = accuracy_score(true_labels_wo_oh, predicted_labels_wo_oh)
print(accuracy)

[5 2 8 ... 3 1 5]
[5 2 8 ... 3 1 5]
0.984920634920635


In [159]:
predicted_probabilities = []  
true_labels = []  
with torch.set_grad_enabled(False): 
    for batch_indx, (inputs, labels) in enumerate(loaded_test_dataset):
        inputs = inputs.to(device)
        labels = labels.to(device)    
        outputs = model(inputs)
        predicted_probabilities.extend(outputs.tolist())
        true_labels.extend(labels.tolist())

In [160]:
predicted_labels_wo_oh = np.argmax(np.array(predicted_probabilities), axis=1)
true_labels_wo_oh = np.argmax(np.array(true_labels), axis=1)
print(true_labels_wo_oh)
print(predicted_labels_wo_oh)
accuracy = accuracy_score(true_labels_wo_oh, predicted_labels_wo_oh)
print(accuracy)

[2 1 8 ... 6 5 1]
[2 1 8 ... 6 5 1]
0.9863492063492063


In [None]:
# calculation by hand
#correct_predictions = np.sum(true_labels == predicted_labels)
#accuracy = correct_predictions / len(true_labels)
#print(accuracy)

In [103]:
def metrics_output(predicted_probabilities, true_labels, num_classes=10):
    
    predicted_labels = np.argmax(np.array(predicted_probabilities), axis=1)
    print(predicted_labels)
    
    true_labels = np.argmax(np.array(true_labels), axis=1)
    print(true_labels)
    
    # Confusion matrix: for multi-class, we get a confusion matrix for all classes
    conf_matrix = confusion_matrix(true_labels, predicted_labels)
    
    # Sensitivity (Recall for each class): TP / (TP + FN) for each class
    sensitivity = np.diag(conf_matrix) / np.sum(conf_matrix, axis=1)
    
    # Specificity for each class: TN / (TN + FP) for each class
    tn = np.sum(conf_matrix) - np.sum(conf_matrix, axis=0) - np.sum(conf_matrix, axis=1) + np.diag(conf_matrix)
    specificity = tn / (tn + np.sum(conf_matrix, axis=0) - np.diag(conf_matrix))
    
    # Accuracy: overall accuracy
    accuracy = accuracy_score(true_labels, predicted_labels)
    
    # F1-score (macro average): F1 for each class, then average
    f1 = f1_score(true_labels, predicted_labels, average='macro', labels=np.arange(num_classes))
    
    # Matthews Correlation Coefficient (MCC) for multi-class classification
    mcc = matthews_corrcoef(true_labels, predicted_labels)
    
    # AUC: Use roc_auc_score with multi_class='ovr' for one-vs-rest ROC AUC calculation
    auc = roc_auc_score(true_labels, predicted_probabilities, multi_class='ovr', average='macro', labels=np.arange(num_classes))
    
    #return auc, sensitivity, specificity, accuracy, f1, mcc
    return sensitivity, specificity, accuracy, f1, mcc

In [104]:
# Example use:
# predicted_probs should be the probability predictions from your model (shape: [batch_size, num_classes])
# predicted_labels should be the hard predictions (shape: [batch_size,])

#sensitivity, specificity, accuracy, f1, mcc, auc = metrics_output(predicted_probabilities, true_labels, num_classes=10)
sensitivity, specificity, accuracy, f1, mcc = metrics_output(predicted_probabilities, true_labels, num_classes=10)
print(sensitivity, specificity, accuracy, f1, mcc)


[3 3 3 ... 3 3 3]
[5 2 8 ... 3 1 5]


ValueError: Target scores need to be probabilities for multiclass roc_auc, i.e. they should sum up to 1.0 over classes

In [67]:
roc_auc, metrics_sn, metrics_sp, metrics_ACC, metrics_F1, metrics_MCC = metrics_output(predicted_probabilities, true_labels)
print(roc_auc, metrics_sn, metrics_sp, metrics_ACC, metrics_F1, metrics_MCC)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[[2.00770688e-33 3.91716668e-36 3.45217724e-39 ... 0.00000000e+00
  3.09459973e-36 2.29368335e-36]
 [3.22450259e-33 3.61976413e-40 1.80637865e-37 ... 0.00000000e+00
  3.00996668e-37 9.06860110e-39]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 1.12103877e-44]
 ...
 [1.31520689e-39 7.43793362e-35 2.91722314e-41 ... 1.33123354e-43
  4.94043748e-38 1.20058015e-36]
 [1.40129846e-45 2.53635022e-43 6.87196767e-42 ... 1.26116862e-44
  3.61619082e-41 1.38125990e-40]
 [0.00000000e+00 0.00000000e+00 1.40129846e-44 ... 0.00000000e+00
  0.00000000e+00 3.82554481e-42]]


ValueError: Classification metrics can't handle a mix of multilabel-indicator and binary targets

In [29]:
np.save('/root/autodl-tmp/ROC/CNN/y_val_pred.npy', predicted_probabilities)
np.save('/root/autodl-tmp/ROC/CNN/y_val.npy', true_labels)

FileNotFoundError: [Errno 2] No such file or directory: '/root/autodl-tmp/ROC/CNN/y_val_pred.npy'

In [None]:
predicted_probabilities = []  
true_labels = []  
with torch.set_grad_enabled(False): 
    for batch_indx, (inputs, labels) in enumerate(loaded_test_dataset):
        inputs = inputs.to(device)
        labels = labels.to(device)    
        outputs = model(inputs)
        predicted_probabilities.extend(outputs.tolist())
        true_labels.extend(labels.tolist())

In [None]:
roc_auc, metrics_sn, metrics_sp, metrics_ACC, metrics_F1, metrics_MCC = metrics_output(predicted_probabilities, true_labels)
print(roc_auc, metrics_sn, metrics_sp, metrics_ACC, metrics_F1, metrics_MCC)

In [None]:
np.save('/root/autodl-tmp/ROC/CNN/y_test_pred.npy', predicted_probabilities)
np.save('/root/autodl-tmp/ROC/CNN/y_test.npy', true_labels)