# 环境准备

In [6]:
import os
import sys
import json
import paddle
import paddle.nn as nn
from paddle.io import DataLoader
import pandas as pd

BASE_DIR = "../"
sys.path.append(BASE_DIR)

from src.dataset import TomatoDataset
from src.models.resnet_se import ResNet50_SE
from src.runner import Runner
from src.features import create_dataloader
from src.models.baseline_lr import ImprovedLogisticRegressionCV
from src.tools import plot_confusion_matrix, plot_metrics_bar

PROCESSED_DATA_DIR = os.path.join(BASE_DIR, "data/processed")
CKPT_DIR_CNN = os.path.join(BASE_DIR, "checkpoints")
CKPT_DIR_LR = os.path.join(BASE_DIR, "checkpoints")
TEST_DIR = os.path.join(PROCESSED_DATA_DIR, "test")

device = "gpu" if paddle.is_compiled_with_cuda() else "cpu"
paddle.set_device(device)

Place(cpu)

# 数据准备

In [7]:
with open(os.path.join(PROCESSED_DATA_DIR, "split_metadata.json"), "r") as f:
    split_metadata = json.load(f)
    
# 创建测试 DataLoader
test_loader_lr, feature_dim = create_dataloader(
    PROCESSED_DATA_DIR, split='test', img_size=224, batch_size=32,
    split_metadata=split_metadata
)

test 特征提取: 100%|██████████| 3207/3207 [06:13<00:00,  8.59it/s]


In [8]:
test_dataset_cnn = TomatoDataset(TEST_DIR, mode="val")
test_loader_cnn = DataLoader(test_dataset_cnn, batch_size=16)

# 模型准备

In [9]:
MODEL_PATH = os.path.join(CKPT_DIR_LR, "improved_logistic_regression_cv_model.pkl")
model_lr = ImprovedLogisticRegressionCV(num_classes=10, class_weights=split_metadata.get('class_weights'))
model_lr.load_model(MODEL_PATH)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [10]:
model_cnn = ResNet50_SE(num_classes=10, pretrained=False)

ckpt_path = os.path.join(CKPT_DIR_CNN, "best.ckpt")
ckpt = paddle.load(ckpt_path)
model_cnn.set_state_dict(ckpt["model"])
model_cnn.eval()


ModuleNotFoundError: No module named 'numpy._core'

# 模型测试

In [None]:
results = model_lr.evaluate_on_test(test_loader_lr)

In [None]:
class_order = [
    "Tomato_Bacterial_spot",
    "Tomato_Early_blight",
    "Tomato_healthy",
    "Tomato_Late_blight",
    "Tomato_Leaf_Mold",
    "Tomato_Septoria_leaf_spot",
    "Tomato_Spider_mites_Two_spotted_spider_mite",
    "Tomato__Target_Spot",
    "Tomato__Tomato_mosaic_virus",
    "Tomato__Tomato_YellowLeaf__Curl_Virus"
]

class_weights = [split_metadata["class_weights"][k] for k in class_order]
class_weights_tensor = paddle.to_tensor(class_weights, dtype="float32")

loss_fn = nn.CrossEntropyLoss(weight=class_weights_tensor)
runner = Runner(
    model=model_cnn,
    optimizer=None,
    loss_fn=loss_fn,
    device=device
)

# 模型评估

指标对比柱状图

In [None]:
# LR 测试
lr_metrics = {
    "accuracy": results['accuracy'],
    "f1": results['f1_score'],
    "precision": results['precision'],
    "recall": results['recall']
}
lr_preds = results['predictions']


# CNN 测试
test_loss, test_acc, test_f1, test_p, test_r = runner.evaluate_loader(test_loader_cnn)
print("\n===== Test Results (CNN) =====")
print(f"Loss       : {test_loss:.4f}")
print(f"Accuracy   : {test_acc:.4f}")
print(f"F1-weighted: {test_f1:.4f}")
print(f"Precision  : {test_p:.4f}")
print(f"Recall     : {test_r:.4f}")

cnn_metrics = {
    "accuracy": test_acc,
    "f1": test_f1,
    "precision": test_p,
    "recall": test_r
}
cnn_preds = runner.predict(test_loader_cnn)  # 如果想画混淆矩阵，或者用 evaluate_loader 里的 all_preds

metrics_dict = {
    "CNN": cnn_metrics,
    "LogisticRegression": lr_metrics
}
df = pd.DataFrame(metrics_dict).T  # 转置，行是模型
df

plot_metrics_bar(metrics_dict)

混淆矩阵

In [None]:
# LR
plot_confusion_matrix(model_lr, test_loader_lr, model_name="LogisticRegression", class_names=class_order)

# CNN
plot_confusion_matrix(runner.model, test_loader_cnn, model_name="CNN", class_names=class_order)