# Script for ```Testing```

In [1]:
import os
import sys
import re
import traceback
from typing import List, Dict
from datetime import datetime
from glob import glob
import json
import yaml

from tqdm.auto import tqdm
import pandas as pd
import numpy as np
import cv2

import torch
from torch import nn, utils
from torch.utils.data import Dataset, DataLoader
import torchvision

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report


sys.path.append("/home/rime97410000/ZebraFish_Code/ZebraFish_AP_POS/modules") # add path to scan customized module
from logger import init_logger
from fileop import create_new_dir
from dl_utils import set_gpu, ImgDataset, caulculate_metrics, save_model, plot_training_trend, \
                     confusion_matrix_with_class, get_sortedClassMapper_from_dir

# print("="*100, "\n")

In [2]:
testingByImg_logger = init_logger(r"Testing")

Load `vit_b_16_PredByImg.yaml`

In [3]:
with open("vit_b_16_PredByImg.yaml", mode="r") as f_reader:
    config = yaml.load(f_reader, Loader=yaml.SafeLoader)
    
batch_size        = config["test_opts"]["base"]["batch_size"]
debug_mode        = config["test_opts"]["debug_mode"]["enable"]
debug_rand_select = config["test_opts"]["debug_mode"]["rand_select"]

load_dir_root = config["model"]["history_root"]
model_name    = config["model"]["model_name"]
model_history = config["model"]["history"]
model_desc    = config["model"]["desc"]

Load `train_config.yaml`

In [4]:
load_dir = os.path.join(load_dir_root, model_name, model_history)
train_config_path = os.path.join(load_dir, r"train_config.yaml")

with open(train_config_path, mode="r") as f_reader:
    train_config = yaml.load(f_reader, Loader=yaml.SafeLoader)

dataset_root       = os.path.normpath(train_config["dataset"]["root"])
dataset_name       = train_config["dataset"]["name"]
dataset_gen_method = train_config["dataset"]["gen_method"]
dataset_stdev      = train_config["dataset"]["stdev"]
dataset_param_name = train_config["dataset"]["param_name"]

rand_seed         = train_config["train_opts"]["random_seed"]
cuda_idx          = train_config["train_opts"]["cuda"]["index"]
use_hsv           = train_config["train_opts"]["data"]["use_hsv"]

Generate `path_vars`

In [5]:
dataset_dir = os.path.join(dataset_root, dataset_name, dataset_gen_method, dataset_stdev, dataset_param_name)
test_selected_dir = os.path.join(dataset_dir, "test", "selected")

Run

In [6]:
# Set GPU
device, device_name = set_gpu(cuda_idx)
testingByImg_logger.info(f"Using '{device}', device_name = '{device_name}'")


# Get datetime
time_stamp = datetime.now().strftime('%Y%m%d_%H_%M_%S')


# Set 'np.random.seed'
np.random.seed(rand_seed)


# Scan classes to create 'class_mapper'
num2class_list, class2num_dict = get_sortedClassMapper_from_dir(test_selected_dir)
testingByImg_logger.info(f"num2class_list = {num2class_list}, class2num_dict = {class2num_dict}")


# Scan tiff
test_img_list = glob(os.path.normpath(f"{test_selected_dir}/*/*.tiff"))
testingByImg_logger.info(f"total = {len(test_img_list)}")
## debug mode: random select [debug_rand_select] images
if debug_mode:
    test_img_list = np.random.choice(test_img_list, size=debug_rand_select, replace=False)
    testingByImg_logger.info(f"Debug mode, only select first {len(test_img_list)}")


# Save 'testing_amount'
testing_amount = f"{{ datatest_{len(test_img_list)} }}_{{ test_{len(test_img_list)} }}"
with open(os.path.normpath(f"{load_dir}/{testing_amount}"), mode="w") as f_writer: pass


# Create 'test_set', 'test_dataloader'
testingByImg_logger.info(f"test_data ({len(test_img_list)})")
[testingByImg_logger.info(f"{i} : img_path = {test_img_list[i]}") for i in range(5)]
test_set = ImgDataset(test_img_list, class_mapper=class2num_dict, resize=(224, 224), 
                      use_hsv=use_hsv)
test_dataloader = DataLoader(test_set, batch_size=batch_size, shuffle=False, pin_memory=True)
testingByImg_logger.info(f"※ : total test batches: {len(test_dataloader)}")


# Read test ( debug mode only )
if debug_mode:
    read_test = cv2.imread(test_img_list[-1])
    testingByImg_logger.info(f"Read Test: {test_img_list[-1]}")
    cv2.imshow("Read Test", read_test)
    cv2.waitKey(0)


# Create model ( ref: https://github.com/pytorch/vision/issues/7397 )
testingByImg_logger.info((f"load model from `torchvision`, "
                          f"model_name: '{model_name}', weights: '{model_name}/{model_history}/{model_desc}_model.pth'"))
model = getattr(torchvision.models, model_name)
model = model(weights=None)
## modify model structure
model.heads.head = nn.Linear(in_features=768, out_features=len(class2num_dict), bias=True)
model.to(device)
# print(model)
## load 'model_state_dict'
model_path = os.path.join(load_dir, f"{model_desc}_model.pth")
pth_file = torch.load(model_path, map_location=device) # unpack to device directly
model.load_state_dict(pth_file["model_state_dict"])


# Testing
## testing variable
test_log = { "Test": time_stamp, "model_desc": f"{model_desc}_model.pth" }
pred_list = []
gt_list = []
## progress bar
pbar_n_test = tqdm(total=len(test_dataloader), desc="Test ")
## start testing
## set to evaluation mode
model.eval()
with torch.no_grad(): 
    for batch, data in enumerate(test_dataloader):
        x_test, y_test, crop_name_batch = data
        x_test, y_test = x_test.to(device), y_test.to(device) # move to GPU
        preds = model(x_test)
        _, pred_test = torch.max(preds, 1)
        
        ## extend 'pred_list', 'gt_list'
        pred_list.extend(pred_test.cpu().numpy().tolist())
        gt_list.extend(y_test.cpu().numpy().tolist())
        
        ## show predict_status of current_batch in CLI
        testingByImg_logger.info((f"Batch[ {(batch+1):0{len(str(len(test_dataloader)))}} / {len(test_dataloader)} ], "
                                  f"# of (ground truth == prediction) in this batch : "
                                  f"{(pred_test.cpu() == y_test.cpu()).sum().item():{len(str(len(y_test)))}}/{len(y_test)}"))
        
        ## update 'pbar_n_test'
        pbar_n_test.update(1)
        pbar_n_test.refresh()

pbar_n_test.close()
## end testing


caulculate_metrics(test_log, None,
                   gt_list, pred_list, class2num_dict)
# print(json.dumps(test_log, indent=4))


# Save infomations to a file
with open(os.path.normpath(f"{load_dir}/{{Logs}}_test.log"), mode="w") as f_writer:

    ## change direction of 'sys.stdout'
    orig_stdout = sys.stdout # store original 'sys.stdout'
    sys.stdout = f_writer

    ## write 'test_log'
    print(json.dumps(test_log, indent=4), "\n\n")

    ## write 'classification_report'
    gt_list_to_name = [ num2class_list[i] for i in gt_list ]
    pred_list_to_name = [ num2class_list[i] for i in pred_list ]
    cls_report = classification_report(y_true=gt_list_to_name, y_pred=pred_list_to_name)
    print("Classification Report:\n\n", cls_report, "\n")

    ## write 'confusion_matrix'
    #   row: Ground truth
    #   column: predict
    #  *　0　1　2
    #  0 [] [] []
    #  1 [] [] []
    #  2 [] [] []
    #
    confusion_mat = confusion_matrix_with_class(ground_truth=gt_list_to_name, prediction=pred_list_to_name)

    ## recover direct of 'sys.stdout'
    sys.stdout = orig_stdout


# Rename 'load_dir'
## new_name_format = {time_stamp}_{state}_{target_epochs_with_ImgLoadOptions}_{test_f1}
## state = {EarlyStop, Interrupt, Completed, Tested, etc.}
model_history_list = re.split("{|}", model_history)
new_name = f"{model_history_list[0]}{{Tested}}_{{{model_history_list[3]}}}_{{{model_desc}}}_{{avg_f1_{test_log['average_f1']}}}" 
os.rename(load_dir, os.path.join(load_dir_root, model_name, new_name))

| 2023-04-27 00:30:04,055 | Testing | INFO | Using 'cuda', device_name = 'NVIDIA GeForce RTX 4090'
| 2023-04-27 00:30:04,055 | Testing | INFO | num2class_list = ['L', 'M', 'S'], class2num_dict = {'L': 0, 'M': 1, 'S': 2}
| 2023-04-27 00:30:04,058 | Testing | INFO | total = 2460
| 2023-04-27 00:30:04,058 | Testing | INFO | test_data (2460)
| 2023-04-27 00:30:04,058 | Testing | INFO | 0 : img_path = /home/rime97410000/ZebraFish_DB/{Dataset}_Cropped/{20230424_Update}_Academia_Sinica_i505/fish_dataset_horiz_cut_1l2_Mix_AP/0.75_STDEV/DS_SURF3C_CRPS512_SF14_INT20_DRP100_RS2022/test/selected/S/S_fish_132_P_selected_3.tiff
| 2023-04-27 00:30:04,059 | Testing | INFO | 1 : img_path = /home/rime97410000/ZebraFish_DB/{Dataset}_Cropped/{20230424_Update}_Academia_Sinica_i505/fish_dataset_horiz_cut_1l2_Mix_AP/0.75_STDEV/DS_SURF3C_CRPS512_SF14_INT20_DRP100_RS2022/test/selected/S/S_fish_135_A_selected_4.tiff
| 2023-04-27 00:30:04,059 | Testing | INFO | 2 : img_path = /home/rime97410000/ZebraFish_DB/{Dat

Test :   0%|          | 0/20 [00:00<?, ?it/s]

| 2023-04-27 00:30:06,818 | Testing | INFO | Batch[ 01 / 20 ], # of (ground truth == prediction) in this batch : 108/128
| 2023-04-27 00:30:07,498 | Testing | INFO | Batch[ 02 / 20 ], # of (ground truth == prediction) in this batch : 117/128
| 2023-04-27 00:30:08,158 | Testing | INFO | Batch[ 03 / 20 ], # of (ground truth == prediction) in this batch : 115/128
| 2023-04-27 00:30:08,812 | Testing | INFO | Batch[ 04 / 20 ], # of (ground truth == prediction) in this batch : 116/128
| 2023-04-27 00:30:09,479 | Testing | INFO | Batch[ 05 / 20 ], # of (ground truth == prediction) in this batch : 108/128
| 2023-04-27 00:30:10,129 | Testing | INFO | Batch[ 06 / 20 ], # of (ground truth == prediction) in this batch : 112/128
| 2023-04-27 00:30:10,784 | Testing | INFO | Batch[ 07 / 20 ], # of (ground truth == prediction) in this batch :  91/128
| 2023-04-27 00:30:11,455 | Testing | INFO | Batch[ 08 / 20 ], # of (ground truth == prediction) in this batch :  98/128
| 2023-04-27 00:30:12,113 | Test