In [5]:
import json
from pathlib import Path
from tqdm import tqdm
import operator # For sorting tuples

# --- Configuration ---
# Ensure this path points to your augmented tasks directory
output_dir_path = "modded_tasks"
top_n = 5 # How many top tasks to rank
# --- End Configuration ---

output_dir = Path(output_dir_path)

# --- Data Structures for Rankings ---
# Store tuples: (total_char_length, filename)
metric1_results = [] # For Train (JSON) + Test Input (JSON) length
metric2_results = [] # For Test Output (JSON) length

# --- Counters & Statistics ---
files_scanned = 0
files_with_errors = 0
files_without_metric1_data = 0
files_without_metric2_data = 0

# For average test output dimensions
total_test_out_width = 0
total_test_out_height = 0
valid_test_out_grid_count = 0


print(f"\n--- Finding Top {top_n} Tasks by JSON String Length in '{output_dir.name}' ---")
print(f"  - Metric 1: Length(JSON(Train)) + Length(JSON(Test Input))")
print(f"  - Metric 2: Length(JSON(Test Output))")

if not output_dir.exists():
    print(f"Error: Output directory '{output_dir}' does not exist.")
else:
    task_files = sorted(list(output_dir.glob("*.json")))
    if not task_files:
        print("No .json files found in the output directory.")
    else:
        print(f"Scanning {len(task_files)} task files...")
        for task_path in tqdm(task_files, desc="Scanning tasks"):
            files_scanned += 1
            can_calc_metric1 = False
            can_calc_metric2 = False
            metric1_len = 0
            metric2_len = 0
            test_output_grid_for_dims = None # Store grid temporarily for dimension calc

            try:
                with open(task_path, 'r') as f:
                    task_data = json.load(f)

                # --- Try to get data for Metric 1 ---
                train_data = task_data.get('train')
                test_examples = task_data.get('test', [])
                test_input_grid = None

                if train_data is not None and test_examples:
                    test_input_grid = test_examples[0].get('input')
                    if test_input_grid is not None:
                        train_string = json.dumps(train_data)
                        test_input_string = json.dumps(test_input_grid)
                        metric1_len = len(train_string) + len(test_input_string)
                        can_calc_metric1 = True

                # --- Try to get data for Metric 2 & Dimensions ---
                # Reuse test_examples from above
                if test_examples:
                    test_output_grid_for_dims = test_examples[0].get('output') # Get the output grid
                    if test_output_grid_for_dims is not None:
                        # Calculate length for ranking
                        test_output_string = json.dumps(test_output_grid_for_dims)
                        metric2_len = len(test_output_string)
                        can_calc_metric2 = True

                        # Calculate dimensions for averages if it's a valid grid structure
                        if (isinstance(test_output_grid_for_dims, list) and
                            test_output_grid_for_dims and # not empty list
                            isinstance(test_output_grid_for_dims[0], list) and
                            test_output_grid_for_dims[0]): # first row not empty list
                            height = len(test_output_grid_for_dims)
                            width = len(test_output_grid_for_dims[0])
                            if height > 0 and width > 0: # Check dimensions > 0
                                total_test_out_height += height
                                total_test_out_width += width
                                valid_test_out_grid_count += 1


                # --- Store results if calculated ---
                if can_calc_metric1:
                    metric1_results.append((metric1_len, task_path.name))
                else:
                    files_without_metric1_data += 1

                if can_calc_metric2:
                    metric2_results.append((metric2_len, task_path.name))
                else:
                    # This counter increments if test_examples is empty OR test_output_grid is None
                    files_without_metric2_data += 1


            except json.JSONDecodeError:
                files_with_errors += 1
                files_without_metric1_data += 1
                files_without_metric2_data += 1
            except Exception as e:
                files_with_errors += 1
                files_without_metric1_data += 1
                files_without_metric2_data += 1

        print("\n--- Scan Complete ---")

        # --- Sort and Rank ---
        metric1_results.sort(key=operator.itemgetter(0), reverse=True)
        metric2_results.sort(key=operator.itemgetter(0), reverse=True)

        top_metric1 = metric1_results[:top_n]
        top_metric2 = metric2_results[:top_n]

        # --- Report Metric 1 Ranking (Filenames Only) ---
        print(f"\n--- Top {len(top_metric1)} Task Files: Longest JSON Length (Train + Test Input) ---")
        if top_metric1:
            for i, (_, filename) in enumerate(top_metric1): # Unpack tuple, ignore length
                print(f"  {i+1}. {filename}")
        else:
            print("  No suitable tasks found for this metric.")

        # --- Report Metric 2 Ranking (Filenames Only) ---
        print(f"\n--- Top {len(top_metric2)} Task Files: Longest JSON Length (Test Output) ---")
        if top_metric2:
            for i, (_, filename) in enumerate(top_metric2): # Unpack tuple, ignore length
                print(f"  {i+1}. {filename}")
        else:
            print("  No suitable tasks found for this metric.")

        # --- Calculate and Report Average Test Output Dimensions ---
        avg_width = 0
        avg_height = 0
        if valid_test_out_grid_count > 0:
            avg_width = total_test_out_width / valid_test_out_grid_count
            avg_height = total_test_out_height / valid_test_out_grid_count

        # --- Report Stats ---
        print(f"\n--- Scan Statistics ---")
        print(f"- Files scanned: {files_scanned}")
        print(f"- Files missing data for Metric 1 (train/test-in): {files_without_metric1_data}")
        print(f"- Files missing data for Metric 2 (test-out):      {files_without_metric2_data}")
        print(f"- Files with read/parse errors:                   {files_with_errors}")
        print(f"- Valid test output grids found for stats:        {valid_test_out_grid_count}")
        if valid_test_out_grid_count > 0:
            print(f"- Average test output grid dimensions (W x H):    {avg_width:.2f} x {avg_height:.2f}")
        else:
            print("- Average test output grid dimensions (W x H):    N/A (No valid grids found)")


--- Finding Top 5 Tasks by JSON String Length in 'modded_tasks' ---
  - Metric 1: Length(JSON(Train)) + Length(JSON(Test Input))
  - Metric 2: Length(JSON(Test Output))
Scanning 30138 task files...


Scanning tasks: 100%|██████████| 30138/30138 [00:32<00:00, 914.23it/s] 


--- Scan Complete ---

--- Top 5 Task Files: Longest JSON Length (Train + Test Input) ---
  1. f9d67f8b_r270_mh_c_p2c1_p2c8.json
  2. f9d67f8b_mh_p2c5_p1c4.json
  3. f9d67f8b_r180_mh_p2c3_p1c0.json
  4. f9d67f8b_c_p2c4.json
  5. f9d67f8b_r270_c_p1c5_p1c9.json

--- Top 5 Task Files: Longest JSON Length (Test Output) ---
  1. 05a7bcf2_c_p2c9.json
  2. 05a7bcf2_mh_c_p2c1.json
  3. 05a7bcf2_r180_mh_c_p2c7.json
  4. 09c534e7_r90_mh_p2c8.json
  5. 25094a63_p1c0_p2c5.json

--- Scan Statistics ---
- Files scanned: 30138
- Files missing data for Metric 1 (train/test-in): 0
- Files missing data for Metric 2 (test-out):      0
- Files with read/parse errors:                   0
- Valid test output grids found for stats:        30138
- Average test output grid dimensions (W x H):    12.93 x 12.94



