In [None]:
# 导入必要的库
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import pickle
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import sys

# 添加项目根目录到路径中，确保可以导入自定义模块
sys.path.append('d:/document/unsw/9517/9517mango')
from dataset.mango_dataset import MangoTestDataset  # 假设有这个类，根据实际情况修改


In [None]:
# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")


In [None]:
# 加载测试数据集
def load_test_dataset(batch_size=32):
    # 根据项目实际情况调整数据加载方式
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    test_dataset = MangoTestDataset(transform=transform)  # 根据实际项目修改
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    print(f"测试数据集加载完成，共 {len(test_dataset)} 个样本")
    return test_loader


In [None]:
# 加载所有模型
def load_models(models_dir='../models'):
    models = {}
    
    for model_file in os.listdir(models_dir):
        model_path = os.path.join(models_dir, model_file)
        model_name = os.path.splitext(model_file)[0]
        
        try:
            # 尝试加载PyTorch模型
            if model_file.endswith('.pth') or model_file.endswith('.pt'):
                model = torch.load(model_path, map_location=device)
                model.eval()  # 设置为评估模式
                models[model_name] = {'model': model, 'type': 'pytorch'}
                
            # 尝试加载sklearn模型
            elif model_file.endswith('.pkl') or model_file.endswith('.joblib'):
                with open(model_path, 'rb') as f:
                    model = pickle.load(f)
                models[model_name] = {'model': model, 'type': 'sklearn'}
                
            print(f"成功加载模型: {model_name}")
        except Exception as e:
            print(f"加载模型 {model_name} 时出错: {e}")
    
    return models


In [None]:
# 评估PyTorch模型
def evaluate_pytorch_model(model, test_loader, device):
    model.eval()
    all_predictions = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            
            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    return calculate_metrics(all_labels, all_predictions)


In [None]:
# 评估sklearn模型
def evaluate_sklearn_model(model, test_loader):
    all_predictions = []
    all_labels = []
    
    # 收集所有数据后再进行预测
    all_features = []
    
    for inputs, labels in test_loader:
        # 对于sklearn模型，需要将输入调整为合适的格式
        features = inputs.view(inputs.size(0), -1).cpu().numpy()  # 展平为2D数组
        all_features.append(features)
        all_labels.extend(labels.cpu().numpy())
    
    all_features = np.vstack(all_features)
    all_predictions = model.predict(all_features)
    
    return calculate_metrics(all_labels, all_predictions)


In [None]:
# 计算评估指标
def calculate_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    }


In [None]:
# 主函数
def main():
    # 加载测试数据集
    test_loader = load_test_dataset()
    
    # 加载所有模型
    models = load_models()
    print(f"共加载 {len(models)} 个模型进行评估")
    
    # 评估结果存储
    results = []
    
    # 对每个模型进行评估
    for model_name, model_info in models.items():
        print(f"正在评估模型: {model_name}")
        model = model_info['model']
        model_type = model_info['type']
        
        if model_type == 'pytorch':
            metrics = evaluate_pytorch_model(model, test_loader, device)
        else:  # sklearn模型
            metrics = evaluate_sklearn_model(model, test_loader)
            
        metrics['model'] = model_name
        results.append(metrics)
    
    # 创建结果数据框
    results_df = pd.DataFrame(results)
    
    # 重新排序列，使模型名称显示在第一列
    results_df = results_df[['model', 'accuracy', 'precision', 'recall', 'f1_score']]
    
    # 结果表格排序（按准确率降序）
    results_df = results_df.sort_values(by='accuracy', ascending=False)
    
    # 显示结果表格
    print("\n模型评估结果:")
    display(results_df.style.format({
        'accuracy': '{:.4f}',
        'precision': '{:.4f}',
        'recall': '{:.4f}',
        'f1_score': '{:.4f}'
    }))
    
    # 可视化结果
    plt.figure(figsize=(14, 10))
    
    # 条形图比较各指标
    plt.subplot(2, 1, 1)
    results_melted = pd.melt(results_df, id_vars=['model'], 
                            value_vars=['accuracy', 'precision', 'recall', 'f1_score'],
                            var_name='指标', value_name='分数')
    sns.barplot(x='model', y='分数', hue='指标', data=results_melted)
    plt.title('各模型评估指标比较')
    plt.xticks(rotation=45)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # 热力图
    plt.subplot(2, 1, 2)
    sns.heatmap(results_df.set_index('model')[['accuracy', 'precision', 'recall', 'f1_score']], 
                annot=True, cmap='YlGnBu', fmt='.4f')
    plt.title('模型性能热力图')
    
    plt.tight_layout()
    plt.show()
    
    return results_df


In [None]:
# 执行主函数
results_table = main()


In [None]:
# 保存结果到CSV文件
results_table.to_csv('../results/model_evaluation_results.csv', index=False)
print("评估结果已保存到CSV文件")
