In [None]:
# 从 partial 续跑/合并 - 检查和恢复之前的生成进度
def check_existing_progress():
    """检查已有的partial文件，显示续跑选项"""
    
    print("\n📋 检查现有生成进度:")
    
    total_existing = 0
    progress_info = {}
    
    for task_type in ['ALC', 'AR', 'RSD']:
        partial_file = runs_dir / task_type / 'partial.jsonl'
        target = locals().get(f'TARGET_{task_type}', 0)
        
        if partial_file.exists():
            existing_count = sum(1 for line in open(partial_file, 'r', encoding='utf-8') if line.strip())
            remaining = max(0, target - existing_count)
            total_existing += existing_count
            
            progress_info[task_type] = {
                'existing': existing_count,
                'target': target,
                'remaining': remaining,
                'status': '完成' if existing_count >= target else f'需生成{remaining}个'
            }
            
            print(f"  {task_type}: {existing_count}/{target} 已完成 ({progress_info[task_type]['status']})")
        else:
            progress_info[task_type] = {
                'existing': 0,
                'target': target,
                'remaining': target,
                'status': f'需生成{target}个'
            }
            print(f"  {task_type}: 0/{target} 未开始 ({progress_info[task_type]['status']})")
    
    print(f"\n📊 总体进度: {total_existing}/{TARGET_ALC + TARGET_AR + TARGET_RSD} 样本")
    
    if total_existing > 0:
        print("\n💡 续跑选项:")
        print("  - 继续生成剩余样本：直接运行下面的生成cell")
        print("  - 重新开始：先运行下面的'清理partial目录'cell")
    
    return progress_info

# 检查现有进度
progress_info = check_existing_progress()

In [None]:
# 清理 partial 目录 - 安全删除中断的残片文件
def cleanup_partial_files():
    """清理所有partial文件，用于重新开始生成"""
    
    import shutil
    
    print("\n🧹 清理partial目录:")
    
    cleaned_files = []
    total_samples_removed = 0
    
    for task_type in ['ALC', 'AR', 'RSD']:
        partial_file = runs_dir / task_type / 'partial.jsonl'
        
        if partial_file.exists():
            # 统计现有样本数
            sample_count = sum(1 for line in open(partial_file, 'r', encoding='utf-8') if line.strip())
            total_samples_removed += sample_count
            
            # 删除文件
            partial_file.unlink()
            cleaned_files.append(f"{task_type}: {sample_count} samples")
            
            print(f"  ✅ 删除 {task_type}/partial.jsonl ({sample_count} 样本)")
        else:
            print(f"  ⏭️ {task_type}/partial.jsonl 不存在，跳过")
    
    if cleaned_files:
        print(f"\n🗑️ 已清理: {total_samples_removed} 个样本")
        print("💡 提示: 现在可以重新运行生成cell")
    else:
        print("\n✨ 无需清理: 没有找到partial文件")
    
    return cleaned_files

# 重要提醒
print("⚠️  警告: 此操作将删除所有partial文件!")
print("   如果你想保留进度，请不要运行此cell")
print("   只有在需要完全重新开始时才运行")

# 取消注释下面的行来执行清理:
# cleanup_partial_files()