# Script for ```Testing```

In [1]:
import os
import sys
import re
import traceback
from typing import List, Dict
from datetime import datetime
from glob import glob
import json
import argparse

from tqdm.auto import tqdm
import pandas as pd
import numpy as np
import cv2

import torch
from torch import nn, utils
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

sys.path.append(r"C:\Users\confocal_microscope\Desktop\ZebraFish_AP_POS\modules") # add path to scan customized module
from logger import init_logger
from fileop import create_new_dir
from dl_utils import set_gpu, ImgDataset, caulculate_metrics, save_model, plot_training_trend, \
                     confusion_matrix_with_class, get_sortedClassMapper_from_dir

# print("="*100, "\n")

In [2]:
testing_logger = init_logger(r"Testing")

constant path

In [3]:
ap_dataset_root = r"C:\Users\confocal_microscope\Desktop\{Test}_DataSet"
load_dir_root = r"C:\Users\confocal_microscope\Desktop\{Test}_Model_history"

args

In [4]:
dataset_name = r"{20230305_NEW_STRUCT}_Academia_Sinica_i409"
dataset_gen_method = "fish_dataset_horiz_cut_1l2_Mix_AP"
dataset_param_name = "DS_SURF3C_CRPS512_SF14_INT20_DRP100_RS2022"
cuda_idx = 1
label_in_filename = 0
batch_size = 32
model_name = "vit_b_16"
model_history = r"20230403_03_01_49_{EarlyStop}_{84_epochs_AugOnFly}"
model_desc = "best" # best / final
use_hsv = False # using 'HSV' when getting images from the 'ImgDataset'

debug_mode = False
rand_seed = 2022 # only for debug_mode

# Create path var
load_dir = os.path.join(load_dir_root, model_name, model_history)
test_selected_dir = os.path.join(ap_dataset_root, dataset_name, dataset_gen_method, dataset_param_name, "test", "selected")

# Set GPU
device, device_name = set_gpu(cuda_idx)
testing_logger.info(f"Using '{device}', device_name = '{device_name}'")

| 2023-04-03 11:21:27,724 | Testing | INFO | Using 'cuda', device_name = 'NVIDIA GeForce RTX 2080 Ti'


In [5]:
# Get datetime
time_stamp = datetime.now().strftime('%Y%m%d_%H_%M_%S')


# Set 'np.random.seed'
np.random.seed(rand_seed)


# Scan classes to create 'class_mapper'
num2class_list, class2num_dict = get_sortedClassMapper_from_dir(test_selected_dir)
testing_logger.info(f"num2class_list = {num2class_list}, class2num_dict = {class2num_dict}")


# Scan tiff
test_img_list = glob(os.path.normpath(f"{test_selected_dir}/*/*.tiff"))
testing_logger.info(f"total = {len(test_img_list)}")
## debug mode: random select 200 images
if debug_mode:
    test_img_list = np.random.choice(test_img_list, size=200, replace=False)
    testing_logger.info(f"Debug mode, only select first {len(test_img_list)}")


# Save 'testing_amount'
testing_amount = f"{{ datatest_{len(test_img_list)} }}_{{ test_{len(test_img_list)} }}"
with open(os.path.normpath(f"{load_dir}/{testing_amount}"), mode="w") as f_writer: pass


# Create 'test_set', 'test_dataloader'
testing_logger.info(f"test_data ({len(test_img_list)})")
[testing_logger.info(f"{i} : img_path = {test_img_list[i]}") for i in range(5)]
test_set = ImgDataset(test_img_list, class_mapper=class2num_dict, label_in_filename=label_in_filename, 
                      use_hsv=use_hsv)
test_dataloader = DataLoader(test_set, batch_size=batch_size, shuffle=False, pin_memory=True)
testing_logger.info(f"※ : total test batches: {len(test_dataloader)}")


# Read test ( debug mode only )
if debug_mode:
    read_test = cv2.imread(test_img_list[-1])
    testing_logger.info(f"Read Test: {test_img_list[-1]}")
    cv2.imshow("Read Test", read_test)
    cv2.waitKey(0)


# Create model
testing_logger.info((f"load model using 'torch.hub.load()', "
                     f"model_name: '{model_name}', weights: '{model_name}/{model_history}/{model_desc}_model.pth'"))
model = torch.hub.load('pytorch/vision', model_name, weights=None)
## modify model structure
model.heads.head = nn.Linear(in_features=768, out_features=len(class2num_dict), bias=True)
model.to(device)
# print(model)
## load 'model_state_dict'
model_path = os.path.join(load_dir, f"{model_desc}_model.pth")
pth_file = torch.load(model_path, map_location=device) # unpack to device directly
model.load_state_dict(pth_file["model_state_dict"])


# Testing
## testing variable
test_log = { "Test": time_stamp, "model_desc": f"{model_desc}_model.pth" }
pred_list = []
gt_list = []
## progress bar
pbar_n_test = tqdm(total=len(test_dataloader), desc="Test ")
## start testing
## set to evaluation mode
model.eval()
with torch.no_grad(): 
    for batch, data in enumerate(test_dataloader):
        x_test, y_test = data
        x_test, y_test = x_test.to(device), y_test.to(device) # move to GPU
        preds = model(x_test)
        _, pred_test = torch.max(preds, 1)
        
        ## extend 'pred_list', 'gt_list'
        pred_list.extend(pred_test.cpu().numpy().tolist())
        gt_list.extend(y_test.cpu().numpy().tolist())
        
        ## show predict_status of current_batch in CLI
        testing_logger.info((f"Batch[ {(batch+1):0{len(str(len(test_dataloader)))}} / {len(test_dataloader)} ], "
                             f"# of (ground truth == prediction) in_this_batch： "
                             f"{(pred_test.cpu() == y_test.cpu()).sum().item():{len(str(len(y_test)))}}/{len(y_test)}"))
        
        ## update 'pbar_n_test'
        pbar_n_test.update(1)
        pbar_n_test.refresh()

caulculate_metrics(test_log, None,
                   gt_list, pred_list, class2num_dict)
# print(json.dumps(test_log, indent=4))
pbar_n_test.close()
## end testing


# Save infomations to a file
with open(os.path.normpath(f"{load_dir}/{{Logs}}_test.log"), mode="w") as f_writer:

    ## change direction of 'sys.stdout'
    orig_stdout = sys.stdout # store original 'sys.stdout'
    sys.stdout = f_writer

    ## write 'test_log'
    print(json.dumps(test_log, indent=4), "\n\n")

    ## write 'classification_report'
    gt_list_to_name = [ num2class_list[i] for i in gt_list ]
    pred_list_to_name = [ num2class_list[i] for i in pred_list ]
    cls_report = classification_report(y_true=gt_list_to_name, y_pred=pred_list_to_name)
    print("Classification Report:\n\n", cls_report, "\n")

    ## write 'confusion_matrix'
    #   row: Ground truth
    #   column: predict
    #  *　0　1　2
    #  0 [] [] []
    #  1 [] [] []
    #  2 [] [] []
    #
    confusion_mat = confusion_matrix_with_class(ground_truth=gt_list_to_name, prediction=pred_list_to_name)

    ## recover direct of 'sys.stdout'
    sys.stdout = orig_stdout


# Rename 'load_dir'
## new_name_format = {time_stamp}_{state}_{target_epochs_with_ImgLoadOptions}_{test_f1}
## state = {EarlyStop, Interrupt, Completed, Tested, etc.}
model_history_list = re.split("{|}", model_history)
new_name = f"{model_history_list[0]}{{Tested}}_{{{model_history_list[3]}}}_{{{model_desc}}}_{{avg_f1_{test_log['average_f1']}}}" 
os.rename(load_dir, os.path.join(load_dir_root, model_name, new_name))

| 2023-04-03 11:21:27,831 | Testing | INFO | {'L': 0, 'M': 1, 'S': 2}
| 2023-04-03 11:21:27,844 | Testing | INFO | total = 1980
| 2023-04-03 11:21:27,845 | Testing | INFO | test_data (1980)
| 2023-04-03 11:21:27,846 | Testing | INFO | 0 : img_path = C:\Users\confocal_microscope\Desktop\{Test}_DataSet\{20230305_NEW_STRUCT}_Academia_Sinica_i409\fish_dataset_horiz_cut_1l2_Mix_AP\DS_SURF3C_CRPS512_SF14_INT20_DRP100_RS2022\test\selected\L\L_fish_111_A_selected_0.tiff
| 2023-04-03 11:21:27,846 | Testing | INFO | 1 : img_path = C:\Users\confocal_microscope\Desktop\{Test}_DataSet\{20230305_NEW_STRUCT}_Academia_Sinica_i409\fish_dataset_horiz_cut_1l2_Mix_AP\DS_SURF3C_CRPS512_SF14_INT20_DRP100_RS2022\test\selected\L\L_fish_111_A_selected_1.tiff
| 2023-04-03 11:21:27,847 | Testing | INFO | 2 : img_path = C:\Users\confocal_microscope\Desktop\{Test}_DataSet\{20230305_NEW_STRUCT}_Academia_Sinica_i409\fish_dataset_horiz_cut_1l2_Mix_AP\DS_SURF3C_CRPS512_SF14_INT20_DRP100_RS2022\test\selected\L\L_fish_1

Test :   0%|          | 0/62 [00:00<?, ?it/s]

| 2023-04-03 11:21:33,362 | Testing | INFO | Batch[ 01 / 62 ], # of (ground truth == prediction) in_this_batch： 31/32
| 2023-04-03 11:21:33,693 | Testing | INFO | Batch[ 02 / 62 ], # of (ground truth == prediction) in_this_batch： 32/32
| 2023-04-03 11:21:34,011 | Testing | INFO | Batch[ 03 / 62 ], # of (ground truth == prediction) in_this_batch： 32/32
| 2023-04-03 11:21:34,339 | Testing | INFO | Batch[ 04 / 62 ], # of (ground truth == prediction) in_this_batch： 32/32
| 2023-04-03 11:21:34,661 | Testing | INFO | Batch[ 05 / 62 ], # of (ground truth == prediction) in_this_batch： 32/32
| 2023-04-03 11:21:34,981 | Testing | INFO | Batch[ 06 / 62 ], # of (ground truth == prediction) in_this_batch： 30/32
| 2023-04-03 11:21:35,287 | Testing | INFO | Batch[ 07 / 62 ], # of (ground truth == prediction) in_this_batch： 31/32
| 2023-04-03 11:21:35,611 | Testing | INFO | Batch[ 08 / 62 ], # of (ground truth == prediction) in_this_batch： 32/32
| 2023-04-03 11:21:35,943 | Testing | INFO | Batch[ 09 /