In [None]:
from google.colab import drive
drive.mount("/content/drive")
!rm -r -f /content/sample_data
!cp -r /content/drive/MyDrive/IIT-Projects/third-umpire-decision-automation/.creds/.aws ~/.aws
!pip install -qq sagemaker ultralytics onnx onnxruntime
results_drive_path = "/content/drive/MyDrive/IIT-Projects/third-umpire-decision-automation/backend/training-jobs-aws"

In [2]:
import boto3
s3_res = boto3.resource("s3")
s3_cli = boto3.client("s3")
bucket_name = "third-umpire-decision-automation-osura"
bucket = s3_res.Bucket(bucket_name)

In [3]:
tj_key = "training-jobs"
obj_keys = []
for obj in bucket.objects.filter(Prefix=tj_key):
  obj_keys.append(obj.key)

In [None]:
from datetime import datetime
import os, shutil
from ultralytics import YOLO
import pandas as pd

tasks = [
    "wicket-classification",
    "batsman-segmentation",
    "cricket-object-detect",
]
loss_names = [
    "val/loss",
    "val/dfl_loss",
    "val/dfl_loss",
]
model_version = 2

for task, loss_name in zip(tasks, loss_names):
  task_model_keys = [{"key":key} for key in obj_keys if task in key and key.endswith("output.tar.gz")]
  for key_det in task_model_keys:
    time_str = "-".join(key_det["key"].split("/")[-3].split("-")[2:-1])
    time = int(datetime.strptime(time_str, "%Y-%m-%d-%H-%M-%S").timestamp())
    key_det["time"] = time
  task_model_keys=sorted(task_model_keys, key=lambda key_det: -key_det["time"])

  global_min_loss = float("inf")
  best_tj = None
  for key_det in task_model_keys:
    src_key = key_det["key"]
    time = key_det["time"]
    dst_dir = f"./tmp/{task}/{time}"
    os.makedirs(dst_dir, exist_ok=True)
    archive_path = f"{dst_dir}/output.tar.gz"
    s3_cli.download_file(Bucket=bucket_name, Key=src_key, Filename=archive_path)
    shutil.unpack_archive(archive_path, dst_dir)
    src_weights_path = f"{dst_dir}/{task}/weights/best.pt"
    if os.path.exists(src_weights_path):
      res_df = pd.read_csv(f"{dst_dir}/{task}/results.csv")
      res_df.columns = [col.strip() for col in res_df.columns]
      if res_df[loss_name].dtype == "object":
        res_df = res_df[res_df[loss_name].apply(lambda strs: strs.strip()!="nan")]
        res_df[loss_name] = res_df[loss_name].apply(lambda strs: float(strs.strip()))
      min_loss = res_df[loss_name].min()
      if min_loss<global_min_loss:
        global_min_loss = min_loss
        best_tj = time

  if best_tj is not None:
    job_path = f"./tmp/{task}/{best_tj}/{task}"
    # copy results to drive
    copy_path = f"{results_drive_path}/{task}"
    if os.path.exists(copy_path): shutil.rmtree(copy_path)
    shutil.copytree(job_path, copy_path)

    # upload weights to s3
    src_weights_path = f"{job_path}/weights/best.pt"
    model = YOLO(src_weights_path)
    onnx_src_path = model.export(format='onnx')
    weights_name = f"yolov8_{task.replace('-','_')}-v{model_version}.onnx"
    dst_key = f"model-weights/{weights_name}"
    s3_cli.upload_file(Filename=onnx_src_path, Bucket=bucket_name, Key=dst_key)
  