# 3D打印质量评估 - 数据探索与模型演示

这个notebook演示了3D打印质量评估项目的数据处理流程和模型使用方法。

## 导入必要的库

In [1]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torchvision
import cv2
from PIL import Image
from tqdm.notebook import tqdm

# 添加项目根目录到路径
sys.path.append(os.path.abspath('..'))

# 设置显示参数
pd.set_option('display.max_columns', None)
plt.rcParams['figure.figsize'] = (12, 8)

%matplotlib inline

## 加载和探索元数据

In [None]:
# 加载元数据
metadata_path = '../data/metadata.csv'
metadata = pd.read_csv(metadata_path)

# 显示前几行
print(f"数据集大小: {len(metadata)} 条记录")
metadata.head()

In [None]:
# 查看数据统计信息
metadata.describe()

In [None]:
# 检查缺失值
print("缺失值统计:")
metadata.isnull().sum()

## 数据可视化

In [None]:
# 可视化质量分数分布
plt.figure(figsize=(10, 6))
sns.histplot(metadata['quality_score'], kde=True, bins=20)
plt.title('质量分数分布', fontsize=14)
plt.xlabel('质量分数', fontsize=12)
plt.ylabel('频率', fontsize=12)
plt.show()

In [None]:
# 可视化缺陷类型分布
plt.figure(figsize=(12, 6))
defect_counts = metadata['defect_type'].value_counts()
sns.barplot(x=defect_counts.index, y=defect_counts.values)
plt.title('缺陷类型分布', fontsize=14)
plt.xlabel('缺陷类型', fontsize=12)
plt.ylabel('数量', fontsize=12)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# 可视化参数与质量分数的关系
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
axes = axes.flatten()

params = ['layer_thickness', 'exposure_time', 'intensity', 'temperature']
param_names = ['层厚', '曝光时间', '强度', '温度']

for i, (param, name) in enumerate(zip(params, param_names)):
    sns.scatterplot(x=param, y='quality_score', data=metadata, ax=axes[i], hue='defect_type', palette='viridis')
    axes[i].set_title(f'{name} vs 质量分数', fontsize=14)
    axes[i].set_xlabel(name, fontsize=12)
    axes[i].set_ylabel('质量分数', fontsize=12)

plt.tight_layout()
plt.show()

## 参数之间的相关性

In [None]:
# 计算参数与质量分数的相关系数
numeric_data = metadata[['layer_thickness', 'exposure_time', 'intensity', 'temperature', 'quality_score']]
correlation = numeric_data.corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title('参数相关性矩阵', fontsize=14)
plt.tight_layout()
plt.show()

## 查看图像样本

In [None]:
# 定义图像显示函数
def show_images(image_dir, image_ids, titles=None, rows=1, cols=3, figsize=(15, 5)):
    """显示多张图像"""
    fig, axes = plt.subplots(rows, cols, figsize=figsize)
    axes = axes.flatten() if rows * cols > 1 else [axes]
    
    for i, (ax, img_id) in enumerate(zip(axes, image_ids)):
        img_path = os.path.join(image_dir, f"{img_id}.jpg")
        if os.path.exists(img_path):
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            ax.imshow(img)
            if titles is not None:
                ax.set_title(titles[i])
        else:
            ax.text(0.5, 0.5, f'图像不存在: {img_id}', 
                    horizontalalignment='center', verticalalignment='center')
        ax.axis('off')
    
    plt.tight_layout()
    plt.show()

In [None]:
# 查看一些样本图像
raw_image_dir = '../data/raw'
processed_image_dir = '../data/processed'

sample_ids = metadata['image_id'].iloc[:5].tolist()
sample_titles = [f"ID: {img_id}, 质量: {score:.2f}, 缺陷: {defect}" 
                for img_id, score, defect in zip(
                    metadata['image_id'].iloc[:5],
                    metadata['quality_score'].iloc[:5],
                    metadata['defect_type'].iloc[:5]
                )]

# 尝试显示原始图像
print("原始图像:")
show_images(raw_image_dir, sample_ids, sample_titles)

In [None]:
# 尝试显示处理后的图像
print("处理后的图像:")
show_images(processed_image_dir, sample_ids, sample_titles)

## 按缺陷类型查看样本

In [None]:
# 按缺陷类型分组并展示样本
defect_types = metadata['defect_type'].unique()

for defect_type in defect_types:
    print(f"缺陷类型: {defect_type}")
    defect_samples = metadata[metadata['defect_type'] == defect_type]['image_id'].iloc[:3].tolist()
    defect_scores = metadata[metadata['defect_type'] == defect_type]['quality_score'].iloc[:3].tolist()
    defect_titles = [f"ID: {img_id}, 质量: {score:.2f}" for img_id, score in zip(defect_samples, defect_scores)]
    
    show_images(processed_image_dir, defect_samples, defect_titles)

## 模型推理演示

这部分代码展示如何使用训练好的模型对图像进行推理。

In [None]:
from inference.inference import PrintQualityInference

# 加载模型（假设已经有训练好的模型）
# 注意: 请替换为实际的模型路径
model_path = '../models/checkpoints/best_model.pth'
try:
    inferencer = PrintQualityInference(model_path)
    model_loaded = True
except Exception as e:
    print(f"模型加载失败: {str(e)}")
    print("请先训练模型或提供正确的模型路径")
    model_loaded = False

In [None]:
if model_loaded:
    # 选择一些样本进行预测
    test_samples = metadata['image_id'].iloc[5:8].tolist()
    
    for sample_id in test_samples:
        image_path = os.path.join(processed_image_dir, f"{sample_id}.jpg")
        if os.path.exists(image_path):
            print(f"\n预测图像: {sample_id}")
            result = inferencer.predict(image_path, visualize=True)
            
            # 获取真实标签
            true_sample = metadata[metadata['image_id'] == sample_id].iloc[0]
            print(f"真实质量分数: {true_sample['quality_score']:.2f}")
            print(f"真实缺陷类型: {true_sample['defect_type']}")
            print(f"真实参数: 层厚={true_sample['layer_thickness']}, " + 
                  f"曝光时间={true_sample['exposure_time']}, " + 
                  f"强度={true_sample['intensity']}, " + 
                  f"温度={true_sample['temperature']}")
        else:
            print(f"图像不存在: {sample_id}")

## 模型性能分析

In [None]:
if model_loaded:
    # 对所有可用图像进行预测
    all_results = []
    
    # 限制数量，避免处理过多图像
    max_samples = 20
    sample_ids = metadata['image_id'].iloc[:max_samples].tolist()
    
    for sample_id in tqdm(sample_ids, desc="处理图像"):
        image_path = os.path.join(processed_image_dir, f"{sample_id}.jpg")
        if os.path.exists(image_path):
            result = inferencer.predict(image_path)
            
            # 获取真实标签
            true_sample = metadata[metadata['image_id'] == sample_id].iloc[0]
            
            # 创建结果记录
            result_record = {
                'image_id': sample_id,
                'pred_quality': result['quality']['class_name'],
                'pred_quality_conf': result['quality']['confidence'],
                'pred_defect': result['defect']['class_name'],
                'pred_defect_conf': result['defect']['confidence'],
                'true_quality_score': true_sample['quality_score'],
                'true_defect': true_sample['defect_type']
            }
            
            # 添加参数预测结果
            for i, (name, value) in enumerate(result['parameters'].items()):
                param_name = name.split('(')[0]  # 移除单位
                result_record[f'pred_{param_name}'] = value
            
            # 添加真实参数
            result_record['true_层厚'] = true_sample['layer_thickness']
            result_record['true_曝光时间'] = true_sample['exposure_time']
            result_record['true_强度'] = true_sample['intensity']
            result_record['true_温度'] = true_sample['temperature']
            
            all_results.append(result_record)
    
    # 转换为DataFrame
    results_df = pd.DataFrame(all_results)
    results_df.head()

In [None]:
if model_loaded and 'results_df' in locals() and len(results_df) > 0:
    # 计算参数预测的平均绝对误差
    mae_层厚 = np.mean(np.abs(results_df['pred_层厚'] - results_df['true_层厚']))
    mae_曝光时间 = np.mean(np.abs(results_df['pred_曝光时间'] - results_df['true_曝光时间']))
    mae_强度 = np.mean(np.abs(results_df['pred_强度'] - results_df['true_强度']))
    mae_温度 = np.mean(np.abs(results_df['pred_温度'] - results_df['true_温度']))
    
    print("参数预测平均绝对误差 (MAE):")
    print(f"层厚: {mae_层厚:.4f} mm")
    print(f"曝光时间: {mae_曝光时间:.4f} s")
    print(f"强度: {mae_强度:.4f} %")
    print(f"温度: {mae_温度:.4f} °C")
    
    # 可视化预测vs真实值
    params = ['层厚', '曝光时间', '强度', '温度']
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    axes = axes.flatten()
    
    for i, param in enumerate(params):
        axes[i].scatter(results_df[f'true_{param}'], results_df[f'pred_{param}'])
        axes[i].plot([results_df[f'true_{param}'].min(), results_df[f'true_{param}'].max()], 
                  [results_df[f'true_{param}'].min(), results_df[f'true_{param}'].max()], 'r--')
        axes[i].set_xlabel(f'真实{param}')
        axes[i].set_ylabel(f'预测{param}')
        axes[i].set_title(f'{param} 预测 vs 真实值')
        axes[i].grid(True)
    
    plt.tight_layout()
    plt.show()

## 总结与观察

在这个notebook中，我们：

1. 探索了3D打印数据集的结构和统计特征
2. 可视化了不同参数之间的关系
3. 查看了不同缺陷类型的示例
4. 使用训练好的模型进行了推理演示
5. 分析了模型的性能和预测准确度

主要发现：
- [在实际使用时填写观察结果]
- [模型优势和不足]
- [可能的改进方向]