In [1]:
import os
import sys
import re
import traceback
from typing import List, Dict
from datetime import datetime
from glob import glob
import json
import argparse

from tqdm.auto import tqdm
import pandas as pd
import numpy as np
import cv2

import torch
from torch import nn, utils
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

sys.path.append(r"C:\Users\confocal_microscope\Desktop\ZebraFish_AP_POS\modules") # add path to scan customized module
from logger import init_logger
from fileop import create_new_dir
from dl_utils import set_gpu, ImgDataset, caulculate_metrics, save_model, plot_training_trend, confusion_matrix_with_class

# print("="*100, "\n")

In [2]:
cli_logger = init_logger(r"Testing")

constant path

In [3]:
ap_dataset_root = r"C:\Users\confocal_microscope\Desktop\{Test}_DataSet"
load_dir_root = r"C:\Users\confocal_microscope\Desktop\{Test}_Model_history"

args

In [4]:
dataset_name = r"{20230305_NEW_STRUCT}_Academia_Sinica_i409"
dataset_gen_method = "fish_dataset_horiz_cut_1l2_Mix_AP"
dataset_param_name = "DS_SURF3C_CRPS512_SF14_INT20_DRP100_RS2022"
cuda_idx = 1
label_in_filename = 0
batch_size = 32
model_name = "vit_b_16"
model_history = r"20230322_02_35_45_{Completed}_{20_epochs}"
model_desc = "best" # best / final

debug_mode = False
rand_seed = 2022 # only for debug_mode

# Create path var
load_dir = os.path.join(load_dir_root, model_name, model_history)
test_selected_dir = os.path.join(ap_dataset_root, dataset_name, dataset_gen_method, dataset_param_name, "test", "selected")

# Set GPU
device, device_name = set_gpu(cuda_idx)
cli_logger.info(f"Using '{device}', device_name = '{device_name}'")

| 2023-03-22 03:59:22,420 | Testing | INFO | Using 'cuda', device_name = 'NVIDIA GeForce RTX 2080 Ti'


In [5]:
# Get datetime
time_stamp = datetime.now().strftime('%Y%m%d_%H_%M_%S')


## set random seed
np.random.seed(rand_seed)


# Scan classes to create 'class_map'
all_class_list = glob(os.path.normpath(f"{test_selected_dir}/*"))
all_class_list = [path.split(os.sep)[-1] for path in all_class_list]
all_class_list.sort()
class_map = {cls:i for i, cls in enumerate(all_class_list)}
cli_logger.info(class_map)


# Scan tiff
test_img_list = glob(os.path.normpath(f"{test_selected_dir}/*/*.tiff"))
cli_logger.info(f"total = {len(test_img_list)}")
## debug mode: only select first 200
if debug_mode:
    test_img_list = np.random.choice(test_img_list, size=200, replace=False)
    cli_logger.info(f"Debug mode, only select first {len(test_img_list)}")


# Split train, test dataset
cli_logger.info(f"test_data ({len(test_img_list)})")
[cli_logger.info(f"{i}：img_path = {test_img_list[i]}") for i in range(5)]
## debug mode: read test
if debug_mode:
    reat_test = cv2.imread(test_img_list[-1])
    cli_logger.info(f"Read Test: {test_img_list[-1]}")
    cv2.imshow("Read Test", reat_test)
    cv2.waitKey(0)
## save 'training_amount'
training_amount = f"{{ datatest_{len(test_img_list)} }}_{{ test_{len(test_img_list)} }}"
with open(os.path.normpath(f"{load_dir}/{training_amount}"), mode="w") as f_writer: pass


# Create dataSets
test_set = ImgDataset(test_img_list, class_map=class_map, label_in_filename=label_in_filename)


# Initial dataLoader
test_dataloader = DataLoader(test_set, batch_size=batch_size, shuffle=False, pin_memory=True)
cli_logger.info(f"total test batches: {len(test_dataloader)}")


# Create model
cli_logger.info(("load model using 'torch.hub.load()', "
                 "model_name: '{}', weights: '{}/{}/{}_model.pth'").format(model_name, model_name, model_history, model_desc))
model = torch.hub.load('pytorch/vision', model_name, weights=None)
## modify model structure
model.heads.head = nn.Linear(in_features=768, out_features=len(class_map), bias=True)
model.to(device)
# print(model)
## load model_state_dict
model_path = os.path.join(load_dir, f"{model_desc}_model.pth")
pth_file = torch.load(model_path, map_location=device) # unpack to device directly
model.load_state_dict(pth_file["model_state_dict"])


# Testing
## testing variable
test_log = { "Test": time_stamp, "model_desc": f"{model_desc}_model.pth" }
pred_list = []
gt_list = []
## progress bar
pbar_n_test = tqdm(total=len(test_dataloader), desc="Test ")
## start testing
## set to evaluation mode
model.eval()
with torch.no_grad(): 
    for batch, data in enumerate(test_dataloader):
        x_test, y_test = data
        x_test, y_test = x_test.to(device), y_test.to(device) # move to GPU
        preds = model(x_test)
        _, pred_test = torch.max(preds, 1)
        
        ## extend 'pred_list', 'gt_list'
        pred_list.extend(pred_test.cpu().numpy().tolist())
        gt_list.extend(y_test.cpu().numpy().tolist())
        
        ## show predict_status of current_batch in CLI
        cli_logger.info("Batch[{}/{}], # of (ground truth == prediction) in_this_batch： {}/{}".format(
                             (batch+1), len(test_dataloader), 
                             (pred_test.cpu() == y_test.cpu()).sum().item(), 
                             len(y_test)
                         ))
        
        ## update 'pbar_n_test'
        pbar_n_test.update(1)
        pbar_n_test.refresh()

caulculate_metrics(test_log, None,
                   gt_list, pred_list, class_map)
# print(json.dumps(test_log, indent=4))
pbar_n_test.close()
## end testing


# Save infomations to a file
with open(os.path.normpath(f"{load_dir}/{{Logs}}_test.log"), mode="w") as f_writer:

    ## change direction of 'sys.stdout'
    orig_stdout = sys.stdout # store original 'sys.stdout'
    sys.stdout = f_writer

    ## write 'test_log'
    print(json.dumps(test_log, indent=4), "\n\n")

    ## write 'classification_report'
    gt_list_to_name = [ all_class_list[i] for i in gt_list ]
    pred_list_to_name = [ all_class_list[i] for i in pred_list ]
    cls_report = classification_report(y_true=gt_list_to_name, y_pred=pred_list_to_name)
    print("Classification Report:\n\n", cls_report, "\n")

    ## write 'confusion_matrix'
    #   row: Ground truth
    #   column: predict
    #  *　0　1　2
    #  0 [] [] []
    #  1 [] [] []
    #  2 [] [] []
    #
    confusion_mat = confusion_matrix_with_class(ground_truth=gt_list_to_name, prediction=pred_list_to_name)

    ## recover direct of 'sys.stdout'
    sys.stdout = orig_stdout


# Rename 'load_dir', 
# new_name_format = {time_stamp}_{status}_{target_epoch}_{test_f1}
# status = {early_stop, interrupt, Completed, Tested, etc.}
model_history_list = re.split("{|}", model_history)
new_name = "{}{{Tested}}_{}_{}_{{avg_f1_{}}}".format(model_history_list[0], f"{{{model_history_list[3]}}}", 
                                                     f"{{{model_desc}}}", test_log["average_f1"])
os.rename(load_dir, os.path.join(load_dir_root, model_name, new_name))

| 2023-03-22 03:59:23,552 | Testing | INFO | {'L': 0, 'M': 1, 'S': 2}
| 2023-03-22 03:59:23,565 | Testing | INFO | total = 1990
| 2023-03-22 03:59:23,566 | Testing | INFO | test_data (1990)
| 2023-03-22 03:59:23,566 | Testing | INFO | 0：img_path = C:\Users\confocal_microscope\Desktop\{Test}_DataSet\{20230305_NEW_STRUCT}_Academia_Sinica_i409\fish_dataset_horiz_cut_1l2_Mix_AP\DS_SURF3C_CRPS512_SF14_INT20_DRP100_RS2022\test\selected\L\L_fish_100_A_selected_0.tiff
| 2023-03-22 03:59:23,567 | Testing | INFO | 1：img_path = C:\Users\confocal_microscope\Desktop\{Test}_DataSet\{20230305_NEW_STRUCT}_Academia_Sinica_i409\fish_dataset_horiz_cut_1l2_Mix_AP\DS_SURF3C_CRPS512_SF14_INT20_DRP100_RS2022\test\selected\L\L_fish_100_A_selected_1.tiff
| 2023-03-22 03:59:23,567 | Testing | INFO | 2：img_path = C:\Users\confocal_microscope\Desktop\{Test}_DataSet\{20230305_NEW_STRUCT}_Academia_Sinica_i409\fish_dataset_horiz_cut_1l2_Mix_AP\DS_SURF3C_CRPS512_SF14_INT20_DRP100_RS2022\test\selected\L\L_fish_100_A_s

Test :   0%|          | 0/63 [00:00<?, ?it/s]

| 2023-03-22 03:59:28,588 | Testing | INFO | Batch[1/63], # of (ground truth == prediction) in_this_batch： 26/32
| 2023-03-22 03:59:28,934 | Testing | INFO | Batch[2/63], # of (ground truth == prediction) in_this_batch： 32/32
| 2023-03-22 03:59:29,249 | Testing | INFO | Batch[3/63], # of (ground truth == prediction) in_this_batch： 29/32
| 2023-03-22 03:59:29,571 | Testing | INFO | Batch[4/63], # of (ground truth == prediction) in_this_batch： 32/32
| 2023-03-22 03:59:29,903 | Testing | INFO | Batch[5/63], # of (ground truth == prediction) in_this_batch： 31/32
| 2023-03-22 03:59:30,245 | Testing | INFO | Batch[6/63], # of (ground truth == prediction) in_this_batch： 32/32
| 2023-03-22 03:59:30,566 | Testing | INFO | Batch[7/63], # of (ground truth == prediction) in_this_batch： 31/32
| 2023-03-22 03:59:30,893 | Testing | INFO | Batch[8/63], # of (ground truth == prediction) in_this_batch： 32/32
| 2023-03-22 03:59:31,224 | Testing | INFO | Batch[9/63], # of (ground truth == prediction) in_t

使用正則表達式從文件中提取JSON字符串 ( by ChatGPT )

In [27]:
import json
import re


file_path = r"C:\Users\confocal_microscope\Desktop\{Test}_Model_history\vit_b_16\20230321_12_49_04_{Completed}_{10_epochs}\{Logs}_test.log"

# 讀取文件內容
with open(file_path, 'r') as f:
    content = f.read()

# 使用正則表達式匹配JSON字符串
pattern = r"(?s)\{.*\}" # (?s) 表示啟用 dot-all 模式，'.*' 就能夠匹配包括換行符號在內的所有字元。
result = re.findall(pattern, content)

# 解析 JSON 字串
data = json.loads(result[0])


print(json.dumps(data, indent=4))

{
    "Test": "20230322_00_53_20",
    "L_f1": "0.94961",
    "M_f1": "0.71855",
    "S_f1": "0.78155",
    "macro_f1": "0.81657",
    "weighted_f1": "0.79613",
    "micro_f1": "0.79648",
    "average_f1": "0.80653"
}


In [34]:
model_history_list = re.split("{|}", model_history)
print(model_desc)
print(model_history_list)
epochs = 10
new_name = "{}{{Tested}}_{}_{}_{{avg_f1_{}}}".format(model_history_list[0], f"{{{model_history_list[3]}}}", 
                                                     f"{{{model_desc}}}", test_log["average_f1"])
print(new_name)
name = os.path.join(load_dir_root, model_name, new_name)
print(load_dir)
print(name)

best
['20230322_02_35_45_', 'Completed', '_', '20_epochs', '']
20230322_02_35_45_{Tested}_{20_epochs}_{best}_{avg_f1_0.79508}
C:\Users\confocal_microscope\Desktop\{Test}_Model_history\vit_b_16\20230322_02_35_45_{Completed}_{20_epochs}
C:\Users\confocal_microscope\Desktop\{Test}_Model_history\vit_b_16\20230322_02_35_45_{Tested}_{20_epochs}_{best}_{avg_f1_0.79508}
