In [4]:
import json
import re
from pathlib import Path
from typing import Dict, List, Tuple, Optional
import numpy as np
from utils.dsl import *
from utils.constants import *

# Configure paths
logs_dir = Path('../logs/gemini3flashpreview_similar_notrainrepair_2a2b_prog_k4_82869')

def calculate_grid_similarity(actual, expected):
    """Calculate similarity between actual and expected grids."""
    if actual is None or expected is None:
        return 0.0
    
    try:
        actual_arr = np.array(actual)
        expected_arr = np.array(expected)
        
        if actual_arr.shape != expected_arr.shape:
            return 0.0
        
        matches = np.sum(actual_arr == expected_arr)
        total = actual_arr.size
        return matches / total if total > 0 else 0.0
    except:
        return 0.0

def open_and_solve_example(task_id, example_index, solver, set='train'):
    """Open task file and solve a specific example."""
    path = Path(f'../data_v2/evaluation/{task_id}.json')
    if not path.exists():
        return None, None, None
    
    try:
        with path.open('r', encoding='utf-8') as f:
            task = json.load(f)
        I = tuple(tuple(row) for row in task[set][example_index]['input'])
        expected = task[set][example_index]['output']
        output = solver(I)
        return I, expected, output
    except Exception as e:
        print(f"Error solving {task_id} idx {example_index}: {e}")
        return None, None, None

def extract_code_from_file(file_path: Path) -> Optional[str]:
    """Extract solver code from summary file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        # Try to find "FINAL CODE:" first
        final_code_idx = content.find('FINAL CODE:')
        if final_code_idx != -1:
            code_section = content[final_code_idx:]
        else:
            gen_code_idx = content.find('CODE:')
            if gen_code_idx == -1:
                return None
            code_section = content[gen_code_idx:]
        
        # Look for code in markdown block first
        python_blocks = re.findall(r'```python\n(.*?)```', code_section, re.DOTALL)
        if python_blocks:
            for block in python_blocks:
                if 'def solve' in block:
                    return block.strip()
            return python_blocks[0].strip()
        
        # Find def solve
        def_match = re.search(r'\bdef\s+solve\s*\(', code_section)
        if not def_match:
            return None
        
        # NEW: Look backwards for imports from def solve position
        def_start = def_match.start()
        
        # Search backwards for import statements
        lines_before_def = code_section[:def_start].split('\n')
        import_start_idx = None
        
        for i in range(len(lines_before_def) - 1, -1, -1):
            line = lines_before_def[i].strip()
            if line.startswith('import ') or line.startswith('from '):
                import_start_idx = i
            elif line and not line.startswith('#') and import_start_idx is not None:
                # Found a non-import, non-empty, non-comment line
                break
        
        if import_start_idx is not None:
            # Start from the first import found
            code_start = sum(len(line) + 1 for line in lines_before_def[:import_start_idx])
        else:
            # No imports, start from def
            code_start = def_start
        
        # Find end (last return statement)
        return_matches = list(re.finditer(r'\n\s*return\s+.*', code_section[def_start:]))
        if not return_matches:
            return None
        
        last_return = return_matches[-1]
        function_end = def_start + last_return.end()
        
        code = code_section[code_start:function_end]
        return code
        
    except Exception as e:
        return None

import signal

def test_solver_code(solver_code: str, task_id: str, dataset_type: str = 'train', debug: bool = False) -> Tuple[bool, Dict]:
    """Test solver code on all examples of a task."""
    exec_namespace = dict(globals())
    
    def timeout_handler(signum, frame):
        raise TimeoutError("Execution timed out")
    
    try:
        exec(solver_code, exec_namespace)
        
        if 'solve' not in exec_namespace:
            if debug:
                print(f"  No 'solve' function found after exec for {task_id}")
            return False, {}
        
        solver_func = exec_namespace['solve']
        
        # Load task
        with open(f'../data_v2/evaluation/{task_id}.json') as f:
            task = json.load(f)
        
        if dataset_type not in task or len(task[dataset_type]) == 0:
            if debug:
                print(f"  No {dataset_type} data for {task_id}")
            return False, {}
        
        results = {}
        all_correct = True
        
        for idx in range(len(task[dataset_type])):
            try:
                # Set up the timeout (1 second per example)
                signal.signal(signal.SIGALRM, timeout_handler)
                signal.alarm(1)
                
                I, expected, output = open_and_solve_example(task_id, idx, solver_func, set=dataset_type)
                
                # Cancel the alarm
                signal.alarm(0)
                
                if expected is None or output is None:
                    results[idx] = {'correct': False, 'similarity': 0.0}
                    all_correct = False
                else:
                    similarity = calculate_grid_similarity(output, expected)
                    is_correct = similarity == 1.0
                    results[idx] = {'correct': is_correct, 'similarity': similarity}
                    if not is_correct:
                        all_correct = False
            except TimeoutError:
                signal.alarm(0)  # Cancel the alarm
                if debug:
                    print(f"  Timeout on {task_id} {dataset_type}[{idx}]")
                results[idx] = {'correct': False, 'similarity': 0.0, 'error': 'Timeout'}
                all_correct = False
            except Exception as e:
                signal.alarm(0)  # Cancel the alarm
                if debug:
                    print(f"  Error on {task_id} {dataset_type}[{idx}]: {e}")
                results[idx] = {'correct': False, 'similarity': 0.0, 'error': str(e)}
                all_correct = False
        
        if debug and all_correct:
            print(f"  ‚úì {task_id} {dataset_type}: ALL CORRECT!")
        
        return all_correct, results
    except Exception as e:
        if debug:
            print(f"  Exec error for {task_id}: {e}")
        return False, {'error': str(e)}

def analyze_task_files(logs_dir: Path, debug: bool = False) -> Dict:
    """Analyze all task files to categorize solutions."""
    # Find all files
    all_files = list(logs_dir.glob('*.txt'))
    
    # Separate summary and repair files
    summary_files = [f for f in all_files if 'summary' in f.name and 'repair' not in f.name]
    repair_files = [f for f in all_files if 'repair' in f.name and 'summary' in f.name]
    
    print(f"Found {len(summary_files)} summary files, {len(repair_files)} repair files")
    
    # Track results by task_id
    task_results = {}
    
    # Process summary files (initial attempts)
    print("\nProcessing initial attempts...")
    for file_path in summary_files:
        task_id = file_path.name.split('_')[0]
        if debug:
            print(f"\nProcessing {task_id} (initial)...")
        
        solver_code = extract_code_from_file(file_path)
        
        if solver_code:
            train_success, train_details = test_solver_code(solver_code, task_id, 'train', debug)
            test_success, test_details = test_solver_code(solver_code, task_id, 'test', debug)
            
            task_results[task_id] = {
                'initial': {
                    'train': train_success,
                    'test': test_success,
                    'train_details': train_details,
                    'test_details': test_details,
                    'file': file_path.name
                }
            }
        else:
            if debug:
                print(f"  Failed to extract code")
    
    # Process repair files
    print("\nProcessing repair attempts...")
    for file_path in repair_files:
        task_id = file_path.name.split('_')[0]
        if debug:
            print(f"\nProcessing {task_id} (repair)...")
        
        solver_code = extract_code_from_file(file_path)
        
        if solver_code and task_id in task_results:
            train_success, train_details = test_solver_code(solver_code, task_id, 'train', debug)
            test_success, test_details = test_solver_code(solver_code, task_id, 'test', debug)
            
            task_results[task_id]['repair'] = {
                'train': train_success,
                'test': test_success,
                'train_details': train_details,
                'test_details': test_details,
                'file': file_path.name
            }
        elif solver_code and task_id not in task_results:
            if debug:
                print(f"  Repair found but no initial attempt for {task_id}")
    
    return task_results

def categorize_tasks(task_results: Dict) -> Dict[str, List[str]]:
    """Categorize tasks based on solution status."""
    categories = {
        'initially_solved_both': [],
        'initially_solved_train_only': [],
        'initially_solved_test_only': [],
        'repaired_to_both': [],
        'repaired_train_only': [],
        'repaired_test_only': [],
        'still_unsolved': [],
        'no_repair_attempted': []
    }
    
    for task_id, results in task_results.items():
        initial_train = results['initial']['train']
        initial_test = results['initial']['test']
        
        # Check if repair exists
        if 'repair' in results:
            repair_train = results['repair']['train']
            repair_test = results['repair']['test']
            
            # Initially solved
            if initial_train and initial_test:
                categories['initially_solved_both'].append(task_id)
            elif initial_train and not initial_test:
                # Check if repair helped test
                if repair_test:
                    categories['repaired_to_both'].append(task_id)
                else:
                    categories['initially_solved_train_only'].append(task_id)
            elif not initial_train and initial_test:
                # Check if repair helped train
                if repair_train:
                    categories['repaired_to_both'].append(task_id)
                else:
                    categories['initially_solved_test_only'].append(task_id)
            else:
                # Neither initially solved - check if repair helped
                if repair_train and repair_test:
                    categories['repaired_to_both'].append(task_id)
                elif repair_train and not repair_test:
                    categories['repaired_train_only'].append(task_id)
                elif not repair_train and repair_test:
                    categories['repaired_test_only'].append(task_id)
                else:
                    categories['still_unsolved'].append(task_id)
        else:
            # No repair attempted
            if initial_train and initial_test:
                categories['initially_solved_both'].append(task_id)
            elif initial_train and not initial_test:
                categories['initially_solved_train_only'].append(task_id)
            elif not initial_train and initial_test:
                categories['initially_solved_test_only'].append(task_id)
            else:
                categories['no_repair_attempted'].append(task_id)
    
    return categories

def print_analysis(categories: Dict[str, List[str]]):
    """Print analysis results."""
    print("\n" + "="*80)
    print("TASK SOLUTION ANALYSIS")
    print("="*80)
    
    print(f"\n‚úÖ INITIALLY SOLVED (TRAIN + TEST): {len(categories['initially_solved_both'])} tasks")
    for task_id in sorted(categories['initially_solved_both']):
        print(f"  - {task_id}")
    
    print(f"\nüîß SUCCESSFULLY REPAIRED (TRAIN + TEST): {len(categories['repaired_to_both'])} tasks")
    for task_id in sorted(categories['repaired_to_both']):
        print(f"  - {task_id}")
    
    print(f"\n‚ö†Ô∏è  INITIALLY SOLVED TRAIN ONLY: {len(categories['initially_solved_train_only'])} tasks")
    for task_id in sorted(categories['initially_solved_train_only']):
        print(f"  - {task_id}")
    
    print(f"\n‚ö†Ô∏è  REPAIRED TRAIN ONLY: {len(categories['repaired_train_only'])} tasks")
    for task_id in sorted(categories['repaired_train_only']):
        print(f"  - {task_id}")
    
    print(f"\n‚ö†Ô∏è  INITIALLY SOLVED TEST ONLY: {len(categories['initially_solved_test_only'])} tasks")
    for task_id in sorted(categories['initially_solved_test_only']):
        print(f"  - {task_id}")
    
    print(f"\n‚ö†Ô∏è  REPAIRED TEST ONLY: {len(categories['repaired_test_only'])} tasks")
    for task_id in sorted(categories['repaired_test_only']):
        print(f"  - {task_id}")
    
    print(f"\n‚ùå STILL UNSOLVED (after repair): {len(categories['still_unsolved'])} tasks")
    
    print(f"\n‚è≠Ô∏è  NO REPAIR ATTEMPTED: {len(categories['no_repair_attempted'])} tasks")
    
    print("\n" + "="*80)
    print("SUMMARY STATISTICS")
    print("="*80)
    total_initially_solved = len(categories['initially_solved_both'])
    total_repaired = len(categories['repaired_to_both'])
    total_solved = total_initially_solved + total_repaired
    
    total_tasks = sum(len(v) for v in categories.values())
    attempted_repairs = len(categories['repaired_to_both']) + len(categories['repaired_train_only']) + len(categories['repaired_test_only']) + len(categories['still_unsolved'])
    test_passed = total_solved + len(categories['repaired_test_only']) + len(categories['initially_solved_test_only'])
    train_passed = total_solved + len(categories['repaired_train_only']) + len(categories['initially_solved_train_only'])
    
    print(f"Total tasks analyzed: {total_tasks}")
    print(f"Fully solved initially: {total_initially_solved}")
    print(f"Fully solved after repair: {total_repaired}")
    print(f"Total fully solved: {total_solved}")
    print(f"Tasks passing test: {test_passed}")
    print(f"Tasks passing train: {train_passed}")

if __name__ == "__main__":
    import sys
    debug = '--debug' in sys.argv
    
    print("Analyzing task solutions...")
    task_results = analyze_task_files(logs_dir, debug=debug)
    categories = categorize_tasks(task_results)
    print_analysis(categories)

Analyzing task solutions...
Found 120 summary files, 78 repair files

Processing initial attempts...
Error solving 20270e3b idx 2: list index out of range

Processing repair attempts...
Error solving 4c7dc4dd idx 0: Execution timed out
Error solving 4c7dc4dd idx 1: Execution timed out
Error solving 4c7dc4dd idx 0: Execution timed out
Error solving 4c7dc4dd idx 1: Execution timed out

TASK SOLUTION ANALYSIS

‚úÖ INITIALLY SOLVED (TRAIN + TEST): 30 tasks
  - 136b0064
  - 13e47133
  - 1818057f
  - 221dfab4
  - 2ba387bc
  - 31f7f899
  - 38007db0
  - 45a5af55
  - 53fb4810
  - 58490d8a
  - 58f5dbd5
  - 5961cc34
  - 6e453dd6
  - 7b5033c1
  - 7c66cb00
  - 7ed72f31
  - 97d7923e
  - 9aaea919
  - a251c730
  - aa4ec2a5
  - b0039139
  - b10624e5
  - b5ca7ac4
  - b99e7126
  - bf45cf4b
  - cbebaa4b
  - d59b0160
  - d8e07eb2
  - db695cfb
  - e3721c99

üîß SUCCESSFULLY REPAIRED (TRAIN + TEST): 10 tasks
  - 16de56c4
  - 409aa875
  - 4c3d4a41
  - 62593bfd
  - 7b0280bc
  - a395ee82
  - cb2d8a2c
  - db0c5

‚úÖ INITIALLY SOLVED (TRAIN + TEST): 19 tasks
  - 136b0064
  - 1818057f
  - 291dc1e1
  - 2ba387bc
  - 36a08778
  - 38007db0
  - 3e6067c3
  - 58490d8a
  - 58f5dbd5
  - 6e453dd6
  - 7c66cb00
  - 981571dc
  - a251c730
  - aa4ec2a5
  - b5ca7ac4
  - bf45cf4b
  - c4d067a0
  - db0c5428
  - db695cfb

‚ö†Ô∏è  INITIALLY SOLVED TEST ONLY: 2 tasks
  - 332f06d7
  - 8f3a5a89

‚úÖ INITIALLY SOLVED (TRAIN + TEST): 14 tasks
  - 1818057f
  - 291dc1e1
  - 2ba387bc
  - 36a08778
  - 38007db0
  - 4c3d4a41
  - 58f5dbd5
  - 7c66cb00
  - 981571dc
  - a395ee82
  - aa4ec2a5
  - bf45cf4b
  - d8e07eb2
  - db695cfb

üîß SUCCESSFULLY REPAIRED (TRAIN + TEST): 4 tasks
  - 16de56c4
  - 45a5af55
  - c4d067a0
  - db0c5428

‚ö†Ô∏è  INITIALLY SOLVED TEST ONLY: 2 tasks
  - 7b3084d4
  - 8f3a5a89

‚ö†Ô∏è  REPAIRED TEST ONLY: 1 tasks
  - 332f06d7

exp_name:testing_noreasoning_nodsl_k4_similar_fewshot_repair
================================================================================
TASK SOLUTION ANALYSIS
================================================================================

‚úÖ INITIALLY SOLVED (TRAIN + TEST): 16 tasks
  - 136b0064
  - 1818057f
  - 291dc1e1
  - 2ba387bc
  - 36a08778
  - 38007db0
  - 45a5af55
  - 4c3d4a41
  - 58f5dbd5
  - 6e453dd6
  - 7c66cb00
  - 7ed72f31
  - aa4ec2a5
  - bf45cf4b
  - db0c5428
  - db695cfb

üîß SUCCESSFULLY REPAIRED (TRAIN + TEST): 2 tasks
  - 3e6067c3
  - 8f3a5a89

‚ö†Ô∏è  INITIALLY SOLVED TRAIN ONLY: 25 tasks
  - 135a2760
  - 16de56c4
  - 1ae2feb7
  - 20270e3b
  - 247ef758
  - 28a6681f
  - 2b83f449
  - 31f7f899
  - 3dc255db
  - 53fb4810
  - 7491f3cf
  - 78332cb0
  - 7b5033c1
  - 80a900e0
  - 88e364bc
  - 8b9c3697
  - 97d7923e
  - b10624e5
  - c4d067a0
  - d35bdbdc
  - dbff022c
  - dfadab01
  - e376de54
  - eee78d87
  - fc7cae8d

‚ö†Ô∏è  REPAIRED TRAIN ONLY: 16 tasks
  - 221dfab4
  - 35ab12c3
  - 409aa875
  - 5545f144
  - 65b59efc
  - 89565ca0
  - 8e5c0c38
  - 9bbf930d
  - a395ee82
  - b0039139
  - c7f57c3e
  - cb2d8a2c
  - dd6b8c4b
  - e8686506
  - f560132c
  - f931b4a8

‚ö†Ô∏è  INITIALLY SOLVED TEST ONLY: 3 tasks
  - 4a21e3da
  - 58490d8a
  - 981571dc

‚ö†Ô∏è  REPAIRED TEST ONLY: 0 tasks

‚ùå STILL UNSOLVED (after repair): 57 tasks

‚è≠Ô∏è  NO REPAIR ATTEMPTED: 1 tasks

================================================================================
SUMMARY STATISTICS
================================================================================
Total tasks analyzed: 120
Fully solved initially: 16
Fully solved after repair: 2
Total fully solved: 18
Tasks passing test: 21
Tasks passing train: 59

exp_name:testing_noreasoning_dsl_k4_similar_fewshot_repair
================================================================================
TASK SOLUTION ANALYSIS
================================================================================

‚úÖ INITIALLY SOLVED (TRAIN + TEST): 14 tasks
  - 1818057f
  - 2ba387bc
  - 31f7f899
  - 36a08778
  - 45a5af55
  - 4a21e3da
  - 4c3d4a41
  - 58490d8a
  - 58f5dbd5
  - 7c66cb00
  - 8f3a5a89
  - aa4ec2a5
  - bf45cf4b
  - db695cfb

üîß SUCCESSFULLY REPAIRED (TRAIN + TEST): 3 tasks
  - 3e6067c3
  - 7ed72f31
  - db0c5428

‚ö†Ô∏è  INITIALLY SOLVED TRAIN ONLY: 18 tasks
  - 16de56c4
  - 1ae2feb7
  - 2b83f449
  - 3dc255db
  - 53fb4810
  - 5545f144
  - 6e453dd6
  - 78332cb0
  - 7b5033c1
  - 80a900e0
  - 88e364bc
  - 97d7923e
  - 9bbf930d
  - c7f57c3e
  - d35bdbdc
  - dfadab01
  - eee78d87
  - f931b4a8

‚ö†Ô∏è  REPAIRED TRAIN ONLY: 11 tasks
  - 135a2760
  - 136b0064
  - 221dfab4
  - 247ef758
  - 28a6681f
  - 409aa875
  - 62593bfd
  - 7491f3cf
  - a395ee82
  - b0039139
  - dd6b8c4b

‚ö†Ô∏è  INITIALLY SOLVED TEST ONLY: 1 tasks
  - 981571dc

‚ö†Ô∏è  REPAIRED TEST ONLY: 1 tasks
  - fc7cae8d

‚ùå STILL UNSOLVED (after repair): 68 tasks

‚è≠Ô∏è  NO REPAIR ATTEMPTED: 1 tasks

================================================================================
SUMMARY STATISTICS
================================================================================
Total tasks analyzed: 117
Fully solved initially: 14
Fully solved after repair: 3
Total fully solved: 17
Tasks passing test: 19
Tasks passing train: 46

grok4.1fast_dsl_similar_fewshot_repair
================================================================================
TASK SOLUTION ANALYSIS
================================================================================

‚úÖ INITIALLY SOLVED (TRAIN + TEST): 9 tasks
  - 136b0064
  - 2ba387bc
  - 36a08778
  - 45a5af55
  - 58f5dbd5
  - 7c66cb00
  - b10624e5
  - c4d067a0
  - db695cfb

üîß SUCCESSFULLY REPAIRED (TRAIN + TEST): 7 tasks
  - 1818057f
  - 4c3d4a41
  - 58490d8a
  - 7ed72f31
  - a395ee82
  - b99e7126
  - bf45cf4b

‚ö†Ô∏è  INITIALLY SOLVED TRAIN ONLY: 13 tasks
  - 221dfab4
  - 28a6681f
  - 31f7f899
  - 3dc255db
  - 409aa875
  - 6e453dd6
  - 78332cb0
  - 7b5033c1
  - 80a900e0
  - 97d7923e
  - b0039139
  - d35bdbdc
  - dfadab01

‚ö†Ô∏è  REPAIRED TRAIN ONLY: 3 tasks
  - 135a2760
  - 1ae2feb7
  - 53fb4810

‚ö†Ô∏è  INITIALLY SOLVED TEST ONLY: 2 tasks
  - 4a21e3da
  - e376de54

‚ö†Ô∏è  REPAIRED TEST ONLY: 1 tasks
  - 9aaea919

‚ùå STILL UNSOLVED (after repair): 30 tasks

‚è≠Ô∏è  NO REPAIR ATTEMPTED: 8 tasks

================================================================================
SUMMARY STATISTICS
================================================================================
Total tasks analyzed: 73
Fully solved initially: 9
Fully solved after repair: 7
Total fully solved: 16
Tasks passing test: 19
Tasks passing train: 32

In [4]:
data = """
‚úÖ INITIALLY SOLVED (TRAIN + TEST): 14 tasks
  - 1818057f
  - 2ba387bc
  - 31f7f899
  - 36a08778
  - 45a5af55
  - 4a21e3da
  - 4c3d4a41
  - 58490d8a
  - 58f5dbd5
  - 7c66cb00
  - 8f3a5a89
  - aa4ec2a5
  - bf45cf4b
  - db695cfb

üîß SUCCESSFULLY REPAIRED (TRAIN + TEST): 3 tasks
  - 3e6067c3
  - 7ed72f31
  - db0c5428

‚ö†Ô∏è  INITIALLY SOLVED TRAIN ONLY: 18 tasks
  - 16de56c4
  - 1ae2feb7
  - 2b83f449
  - 3dc255db
  - 53fb4810
  - 5545f144
  - 6e453dd6
  - 78332cb0
  - 7b5033c1
  - 80a900e0
  - 88e364bc
  - 97d7923e
  - 9bbf930d
  - c7f57c3e
  - d35bdbdc
  - dfadab01
  - eee78d87
  - f931b4a8

‚ö†Ô∏è  REPAIRED TRAIN ONLY: 11 tasks
  - 135a2760
  - 136b0064
  - 221dfab4
  - 247ef758
  - 28a6681f
  - 409aa875
  - 62593bfd
  - 7491f3cf
  - a395ee82
  - b0039139
  - dd6b8c4b

‚ö†Ô∏è  INITIALLY SOLVED TEST ONLY: 1 tasks
  - 981571dc

‚ö†Ô∏è  REPAIRED TEST ONLY: 1 tasks
  - fc7cae8d

‚ùå STILL UNSOLVED (after repair): 68 tasks

‚è≠Ô∏è  NO REPAIR ATTEMPTED: 1 tasks
"""

# Extract task IDs from a section
def extract_tasks(data, section_name):
    tasks = []
    lines = data.split('\n')
    in_section = False
    
    for line in lines:
        if section_name in line:
            in_section = True
            continue
        if in_section:
            if line.strip().startswith('-'):
                task_id = line.strip().lstrip('- ').strip()
                if task_id:
                    tasks.append(task_id)
            elif line.strip() and not line.strip().startswith('-'):
                # Hit next section
                break
    
    return tasks

# Get tasks from each section
train_only = extract_tasks(data, "INITIALLY SOLVED TRAIN ONLY")
repaired_both = extract_tasks(data, "SUCCESSFULLY REPAIRED (TRAIN + TEST)")

# Find intersection
improved_tasks = [task for task in train_only if task in repaired_both]

# Display results
print(f"Tasks initially train-only: {len(train_only)}")
print(f"Tasks repaired to train+test: {len(repaired_both)}")
print(f"\nTasks that went from train-only to train+test: {len(improved_tasks)}")

if improved_tasks:
    print("\nTask IDs:")
    for task in improved_tasks:
        print(f"  - {task}")
else:
    print("(None found)")

Tasks initially train-only: 18
Tasks repaired to train+test: 3

Tasks that went from train-only to train+test: 0
(None found)
