### Quick Run Block

In [1]:
# import torch
# from pytorch_lightning.callbacks import ModelCheckpoint

# # Path to the problematic checkpoint
# checkpoint_path = r"c:\Users\colin\OneDrive\Desktop\Jersey-Number-Recognition\data\pre_trained_models\reid\dukemtmcreid_resnet50_256_128_epoch_120.ckpt"

# # Allow loading of ModelCheckpoint objects (since PyTorch 2.6 blocks this by default)
# torch.serialization.add_safe_globals([ModelCheckpoint])

# # Load the checkpoint safely
# checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=False)

# # Print out keys to inspect conflicting ones
# print("Checkpoint keys:", checkpoint.keys())

# # Remove ModelCheckpoint states (if they exist)
# keys_to_remove = [key for key in checkpoint.keys() if "ModelCheckpoint" in key]
# for key in keys_to_remove:
#     del checkpoint[key]

# # Save the cleaned checkpoint
# fixed_checkpoint_path = checkpoint_path.replace(".ckpt", "_fixed.ckpt")
# torch.save(checkpoint, fixed_checkpoint_path)

# print(f"Fixed checkpoint saved to: {fixed_checkpoint_path}")

## Code

In [2]:
import sys
from pathlib import Path
import os

sys.path.append(str(Path.cwd().parent.parent))
print(str(Path.cwd().parent.parent))
print("Current working directory: ", os.getcwd())

from ModelDevelopment.CentralPipeline import CentralPipeline
from ModelDevelopment.ImageBatchPipeline import ImageBatchPipeline
from DataProcessing.DataPreProcessing import DataPaths

c:\Users\colin\OneDrive\Desktop\Jersey-Number-Recognition
Current working directory:  c:\Users\colin\OneDrive\Desktop\Jersey-Number-Recognition\ModelDevelopment\experiments


## Multithreaded Studies Research Findings
- ThreadPoolExecutor works best for CentralPipeline pre-processing step (i.e. everything before run_pose)
- Time taken to process one batch of 32 tracklets: 251 seconds => (251*32)/3600 = 2.35 hours ETA @ 200 images per batch cap, num_threads = 6x3 = 18
- Note on the GPU_SEMAPHORE constant inside ImageFeatureTransformPipeline: this is always capped at 1 so we only offload 1 batch of images to the GPU
- Since this gate is capped at 1, we can control the amount of data offloaded to the GPU via the image_batch_size param to CentralPipeline and only that
- Since caching is implemented, you can try setting the gate to 2 for even faster results, and if the process crashes, keep restarting with use_cache=True

In [3]:
pipeline = CentralPipeline(
  #tracklets_to_process_override=["1210"],
  num_tracklets=1210,
  #num_images_per_tracklet=50,
  input_data_path=DataPaths.TEST_DATA_DIR.value,
  output_processed_data_path=DataPaths.PROCESSED_DATA_OUTPUT_DIR_TEST.value,
  common_processed_data_dir=DataPaths.COMMON_PROCESSED_OUTPUT_DATA_TEST.value,
  gt_data_path=DataPaths.TEST_DATA_GT.value,
  display_transformed_image_sample=False, # NOTE: DO NOT USE. Code is parallelized so we cannot show images anymore. Code breaks, but first one will show if True.
  num_image_samples=1,
  use_cache=False, # Set to false to rebuild the cache
  suppress_logging=False,
  
  # --- PARALLELIZATION PARAMS --- These settings are optimal for an NVIDIA RTX 3070 Ti Laptop GPU.
  num_workers=2,            # CRITICAL optimisation param. Adjust accordingly. 6
  tracklet_batch_size=32,   # CRITICAL optimisation param. Adjust accordingly. 32
  image_batch_size=100,     # CRITICAL optimisation param. Adjust accordingly. 200
  num_threads_multiplier=10  # CRITICAL optimisation param. Adjust accordingly. 3. Interpretation: num_threads = num_workers * num_threads_multiplier
  )

2025-03-29 12:53:57 [INFO] DataPreProcessing initialized. Universe of available data paths:
2025-03-29 12:53:57 [INFO] ROOT_DATA_DIR: c:\Users\colin\OneDrive\Desktop\Jersey-Number-Recognition\data\SoccerNet\jersey-2023\extracted
2025-03-29 12:53:57 [INFO] TEST_DATA_GT: c:\Users\colin\OneDrive\Desktop\Jersey-Number-Recognition\data\SoccerNet\jersey-2023\extracted\test\test_gt.json
2025-03-29 12:53:57 [INFO] TRAIN_DATA_GT: c:\Users\colin\OneDrive\Desktop\Jersey-Number-Recognition\data\SoccerNet\jersey-2023\extracted\train\train_gt.json
2025-03-29 12:53:57 [INFO] TEST_DATA_DIR: c:\Users\colin\OneDrive\Desktop\Jersey-Number-Recognition\data\SoccerNet\jersey-2023\extracted\test\images
2025-03-29 12:53:57 [INFO] TRAIN_DATA_DIR: c:\Users\colin\OneDrive\Desktop\Jersey-Number-Recognition\data\SoccerNet\jersey-2023\extracted\train\images
2025-03-29 12:53:57 [INFO] CHALLENGE_DATA_DIR: c:\Users\colin\OneDrive\Desktop\Jersey-Number-Recognition\data\SoccerNet\jersey-2023\extracted\challenge\images
2

### To Do
- Investigate why last tracklet is not generated (1210 is skipped, probably something to do with batching)

In [None]:
pipeline.run_soccernet(
  run_soccer_ball_filter=False,
  generate_features=False,
  run_filter=False,
  run_legible=False,
  run_legible_eval=False,
  run_pose=True,
  run_crops=False,
  run_str=False,
  run_combine=False,
  run_eval=False)

2025-03-29 12:53:57 [INFO] Running the SoccerNet pipeline.
2025-03-29 12:53:57 [INFO] Generating json for pose
2025-03-29 12:53:57 [INFO] Aggregating legible & illegible results (cache not used or only one file is missing).
2025-03-29 12:53:57 [INFO] Saved global legible results to: c:\Users\colin\OneDrive\Desktop\Jersey-Number-Recognition\data\SoccerNet\jersey-2023\processed_data\test\common_data\legible_results.json
2025-03-29 12:53:57 [INFO] Saved global illegible results to: c:\Users\colin\OneDrive\Desktop\Jersey-Number-Recognition\data\SoccerNet\jersey-2023\processed_data\test\common_data\illegible_results.json
2025-03-29 12:53:57 [INFO] Legible  tracklets list: 0, 1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 26, 27, 28, 30, 31, 32, 33, 35, 36, 37, 39, 42, 43, 44, 45, 46, 48, 52, 54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66, 69, 70, 72, 73, 77, 80, 83, 84, 86, 87, 89, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 105, 106, 107, 111, 112, 113, 114, 115, 116,

Generating pose JSON:   0%|          | 0/1210 [00:00<?, ?it/s]

2025-03-29 12:54:10 [INFO] Completed generating JSON for pose
2025-03-29 12:54:10 [INFO] Detecting pose
2025-03-29 12:54:10 [INFO] Legible tracklets list: 0, 1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 26, 27, 28, 30, 31, 32, 33, 35, 36, 37, 39, 42, 43, 44, 45, 46, 48, 52, 54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66, 69, 70, 72, 73, 77, 80, 83, 84, 86, 87, 89, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 105, 106, 107, 111, 112, 113, 114, 115, 116, 120, 123, 125, 126, 127, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 148, 149, 150, 152, 153, 154, 156, 157, 159, 160, 161, 162, 163, 164, 166, 167, 168, 169, 170, 171, 172, 173, 179, 180, 181, 185, 186, 187, 188, 190, 191, 192, 193, 197, 198, 199, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 212, 213, 215, 219, 220, 221, 222, 223, 227, 228, 229, 232, 236, 237, 238, 239, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 256, 260, 261, 263, 264, 266, 267, 268, 269, 272, 

Running pose estimation:   0%|          | 0/836 [00:00<?, ?it/s]

2025-03-29 12:54:10 [INFO] Using conda run for pose estimation
2025-03-29 12:54:10 [INFO] [0] Running command: conda run -n vitpose python -u c:\Users\colin\OneDrive\Desktop\Jersey-Number-Recognition\StreamlinedPipelineScripts\pose.py c:\Users\colin\OneDrive\Desktop\Jersey-Number-Recognition\pose\ViTPose\configs\body\2d_kpt_sview_rgb_img\topdown_heatmap\coco\ViTPose_huge_coco_256x192.py c:\Users\colin\OneDrive\Desktop\Jersey-Number-Recognition\pose\ViTPose\checkpoints\vitpose-h.pth --img-root / --json-file c:\Users\colin\OneDrive\Desktop\Jersey-Number-Recognition\data\SoccerNet\jersey-2023\processed_data\test\0\pose_input.json --out-json c:\Users\colin\OneDrive\Desktop\Jersey-Number-Recognition\data\SoccerNet\jersey-2023\processed_data\test\0\pose_results.json --image-batch-size 100
2025-03-29 12:54:10 [INFO] [1] Running command: conda run -n vitpose python -u c:\Users\colin\OneDrive\Desktop\Jersey-Number-Recognition\StreamlinedPipelineScripts\pose.py c:\Users\colin\OneDrive\Desktop\Je