In [None]:
# --- 1. Setup
from google.colab import auth
auth.authenticate_user()
from google.cloud import bigquery
import gcsfs
import cv2
import tempfile
import pandas as pd
import numpy as np
!pip install decord
from torchvision import models, transforms
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from transformers import VideoMAEForVideoClassification, pipeline
import gcsfs
from transformers import TrainingArguments, Trainer
from transformers import VideoMAEImageProcessor
import tempfile
from transformers import VideoMAEModel
import torch.nn as nn
from torch.utils.data import Dataset
import torch
import decord
import json




Add in checkpoints in case times out

Optimal batch size 16-32

fp16 datatype



In [None]:
# Initialize client with project ID
client = bigquery.Client(project='sccm-datathon-2025-participant')
fs = gcsfs.GCSFileSystem(project='sccm-datathon-2025-participant')

In [None]:
# --- 2. BigQuery data
query_sim = """
SELECT sim_fileref_filename, *
FROM `sccm-discovery.AutoDoc.Simulation Data`
WHERE sim_fileref_filename IS NOT NULL
"""

query_ann = """
SELECT *
FROM `sccm-discovery.AutoDoc.Annotations`
"""

sim_df = client.query(query_sim).to_dataframe()
ann_df = client.query(query_ann).to_dataframe()


In [None]:
# --- 3. Merge and prepare labels
sim_df['ad_id'] = sim_df['ad_id'].astype(str)
ann_df['file_id'] = ann_df['file_id'].astype(str)
merged_df = sim_df.merge(ann_df, left_on='ad_id', right_on='file_id', how='inner')
merged_df.shape

# Create multi-label table
multi_label_df = (
    merged_df[merged_df['task'].notna()]
    .groupby('ad_id')['task']
    .apply(lambda x: list(set(x)))
    .reset_index()
)

In [None]:
merged_df["skill_start_time_hh:mm:ss"] = merged_df["skill_start_time_hh:mm:ss"].astype(str)
merged_df["skill_end_time_hh:mm:ss"] = merged_df["skill_end_time_hh:mm:ss"].astype(str)
grouped = (
    merged_df
    .groupby('sim_fileref_filename')
    .apply(lambda df: {
        "file_refname": df.name,
        "annotations": [
            {
                "task": row["task"],
                "skill_start_time_hh:mm:ss": row["skill_start_time_hh:mm:ss"],
                "skill_end_time_hh:mm:ss": row["skill_end_time_hh:mm:ss"]
            }
            for _, row in df.iterrows()
        ]
    })
    .tolist()
)


  .apply(lambda df: {


In [None]:
# --- 3. Merge and prepare labels
sim_df['ad_id'] = sim_df['ad_id'].astype(str)
ann_df['file_id'] = ann_df['file_id'].astype(str)
merged_df = sim_df.merge(ann_df, left_on='ad_id', right_on='file_id', how='inner')
merged_df.shape

(1442, 103)

In [None]:
from torch.utils.data import Dataset
import decord
from decord import VideoReader, cpu
import torch
import json
import tempfile
import os
import re
from torchvision import transforms

decord.bridge.set_bridge("torch")

def download_gcs_video(gcs_uri, fs):
    """Downloads a video from GCS to a temporary local file."""
    match = re.match(r'gs://([^/]+)/(.+)', gcs_uri)
    if not match:
        raise ValueError("Invalid GCS URI")

    bucket, path = match.groups()

    with fs.open(f"{bucket}/{path}", 'rb') as f:
        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
            tmp.write(f.read())
            tmp.flush()
            return tmp.name  # return local file path


class VideoMultiLabelJSONDataset(Dataset):
    def __init__(self, data, processor, label2id, fs, gcs_base_path="gs://sccm--autodoc2025/migrated_video/"):
        self.data = data
        self.processor = processor
        self.label2id = label2id
        self.fs = fs
        self.gcs_base_path = gcs_base_path
        self.frames_to_select = 16 # Set the number of frames to 16

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        entry = self.data[idx]
        file_refname = entry["file_refname"]
        annotations = entry["annotations"]

        gcs_path = os.path.join(self.gcs_base_path, f"{file_refname}.mp4")
        local_video_path = None

        try:
            # Download video
            local_video_path = download_gcs_video(gcs_path, self.fs)

            # Load video with Decord
            vr = VideoReader(local_video_path, ctx=cpu(0))
            num_frames = len(vr)

            # Extract frames based on annotations
            all_frames = []
            all_labels = [0] * len(self.label2id)

            # Collect frames and labels for all annotations in the video
            frames_for_video = []
            labels_for_video = [0] * len(self.label2id)

            for annotation in annotations:
                task = annotation["task"]
                start_time_str = annotation["skill_start_time_hh:mm:ss"]
                end_time_str = annotation["skill_end_time_hh:mm:ss"]

                # Convert time strings to seconds
                def time_to_seconds(time_str):
                    # Handle potential "None" values or invalid formats
                    if time_str is None or not isinstance(time_str, str) or not re.match(r'\d{2}:\d{2}:\d{2}', time_str):
                        return -1 # Indicate invalid time
                    h, m, s = map(int, time_str.split(':'))
                    return h * 3600 + m * 60 + s

                start_sec = time_to_seconds(start_time_str)
                end_sec = time_to_seconds(end_time_str)

                if start_sec == -1 or end_sec == -1 or start_sec >= end_sec:
                    print(f"Skipping invalid annotation times for video: {file_refname}, task: {task}")
                    continue

                # Convert seconds to frame indices
                fps = vr.get_avg_fps()
                start_frame = int(start_sec * fps)
                end_frame = int(end_sec * fps)

                # Ensure frame indices are within bounds
                start_frame = max(0, start_frame)
                end_frame = min(num_frames - 1, end_frame)

                # Select a fixed number of frames within the interval
                if end_frame - start_frame + 1 < self.frames_to_select:
                     # Not enough frames in interval, select all available and pad later
                     indices = torch.arange(start_frame, end_frame + 1)
                else:
                    # Select evenly spaced frames
                    indices = torch.linspace(start_frame, end_frame, self.frames_to_select).long()

                if indices.numel() > 0:
                    video_segment = vr.get_batch(indices)
                    video_segment = video_segment.permute(0, 3, 1, 2).to(torch.uint8)
                    frames_for_video.extend([transforms.ToPILImage()(video_segment[i].contiguous().cpu()) for i in range(video_segment.shape[0])])

                # Update labels for this task
                if task in self.label2id:
                    labels_for_video[self.label2id[task]] = 1


            if not frames_for_video:
                 # Handle cases where no valid frames are extracted for any annotation
                 print(f"No valid frames extracted for video: {file_refname}. Returning dummy data.")
                 # Return dummy data
                 dummy_frames = [transforms.ToPILImage()(torch.zeros(3, 224, 224, dtype=torch.uint8)) for _ in range(self.frames_to_select)] # Example dummy frames
                 pixel_values = self.processor(dummy_frames, return_tensors="pt")["pixel_values"]
                 pixel_values = pixel_values.squeeze(0)
                 labels = torch.tensor([0] * len(self.label2id), dtype=torch.float)
            else:
                # Process all collected frames for the video
                # Need to handle potential multiple segments and ensure a fixed number of frames for the model
                # A simple approach is to sample a fixed number of frames from all collected frames
                if len(frames_for_video) > self.frames_to_select:
                    # Randomly sample frames if more than required
                    sample_indices = torch.randperm(len(frames_for_video))[:self.frames_to_select]
                    sampled_frames = [frames_for_video[i] for i in sample_indices]
                else:
                    # Use all collected frames and pad if less than required
                    sampled_frames = frames_for_video
                    while len(sampled_frames) < self.frames_to_select:
                         # Pad with dummy frames
                         sampled_frames.append(transforms.ToPILImage()(torch.zeros(3, 224, 224, dtype=torch.uint8)))


                pixel_values = self.processor(sampled_frames, return_tensors="pt")["pixel_values"]
                pixel_values = pixel_values.squeeze(0) # Remove batch dimension

                labels = torch.tensor(labels_for_video, dtype=torch.float)


        except FileNotFoundError:
            print(f"Skipping missing video file during processing: {gcs_path}")
            # Return dummy data to avoid crashing the training loop
            dummy_frames = [transforms.ToPILImage()(torch.zeros(3, 224, 224, dtype=torch.uint8)) for _ in range(self.frames_to_select)] # Example dummy frames
            pixel_values = self.processor(dummy_frames, return_tensors="pt")["pixel_values"]
            pixel_values = pixel_values.squeeze(0)
            labels = torch.tensor([0] * len(self.label2id), dtype=torch.float)

        except Exception as e:
            print(f"An error occurred while processing video {file_refname}: {e}")
            # Return dummy data or raise the exception depending on desired behavior
            dummy_frames = [transforms.ToPILImage()(torch.zeros(3, 224, 224, dtype=torch.uint8)) for _ in range(self.frames_to_select)] # Example dummy frames
            pixel_values = self.processor(dummy_frames, return_tensors="pt")["pixel_values"]
            pixel_values = pixel_values.squeeze(0)
            labels = torch.tensor([0] * len(self.label2id), dtype=torch.float)


        finally:
            # Clean up the temporary local file
            if local_video_path and os.path.exists(local_video_path):
                os.remove(local_video_path)


        return {"pixel_values": pixel_values, "labels": labels}

In [None]:
# --- 4. Multiclass

# class VideoMultiLabelDataset(Dataset):
#     def __init__(self, df, processor, fs, label_columns):
#         self.df = df.reset_index(drop=True)  # Ensure index is clean
#         self.processor = processor
#         self.fs = fs
#         self.label_columns = label_columns

#     def __len__(self):
#         return len(self.df)

#     def __getitem__(self, idx):
#         import decord
#         from decord import VideoReader, cpu
#         decord.bridge.set_bridge("torch")

#         for _ in range(5):  # Try a few times to find a valid sample
#             try:
#                 row = self.df.iloc[idx]
#                 gcs_path = f"gs://sccm--autodoc2025/migrated_video/{row['sim_fileref_filename']}.mp4"
#                 local_path = download_gcs_video(gcs_path)

#                 vr = VideoReader(local_path, ctx=cpu(0))
#                 num_frames = 32
#                 indices = torch.linspace(0, len(vr) - 1, num_frames).long()
#                 # Get video frames from Decord
#                 video = vr.get_batch(indices)
#                 video = video.permute(0, 3, 1, 2).to(torch.uint8)

#                 # Convert each frame to PIL
#                 frames = [transforms.ToPILImage()(video[i].contiguous().cpu()) for i in range(video.shape[0])]

#                 # Pass PIL frames to processor
#                 pixel_values = self.processor(frames, return_tensors="pt")["pixel_values"]


#                 # Remove batch dim if needed
#                 pixel_values = pixel_values.squeeze(0)  # shape: (num_frames, C, H, W)

#                 # Labels
#                 labels = torch.tensor(row[self.label_columns].values.astype(float), dtype=torch.float)

#                 return {"pixel_values": pixel_values, "labels": labels}


#             except FileNotFoundError:
#                 print(f"Skipping missing video: {row['sim_fileref_filename']}")
#                 idx = (idx + 1) % len(self.df)  # try next sample

#         raise RuntimeError("Too many missing videos in a row")


mlb = MultiLabelBinarizer()
label_matrix = mlb.fit_transform(multi_label_df['task'])
label_columns = mlb.classes_

multi_label_df = multi_label_df.join(pd.DataFrame(label_matrix, columns=label_columns))


In [None]:
label2id = {label: i for i, label in enumerate(label_columns)}
id2label = {i: label for label, i in label2id.items()}


In [None]:
# --- 5. Merge labels with simulation data (to get video path info)
full_df = sim_df.merge(multi_label_df, on='ad_id', how='inner')


In [None]:
full_df.head()

Unnamed: 0,sim_fileref_filename,ad_id,redcap_repeat_instrument,redcap_repeat_instance,sim_migrated,sim_migrated_name,sim_mascal,ad_starttime,ad_endtime,ad_usecontext,...,Nasopharyngeal Airway,Pressure Dressing Application,StartEx,Surgical cricothyrotomy,Time Sync,Tourniquet Application,Treat a Casualty for a Cold Injury,Treat a Casualty with Burns,Treat a Casualty with a Pelvic Fracture,Uses Sensor
0,44e44a8b-3a17-4c36-bca9-14d3a5b26358,1,file_reference,1,,,,,,,...,1,1,1,0,1,0,1,0,0,1
1,ca36e521-3e9b-4bcd-9f06-579b361ecf8a,2,file_reference,1,,,,,,,...,1,1,1,0,1,0,1,0,0,0
2,f628cf42-cbde-4973-993e-8f2c45eaadc9,3,file_reference,1,,,,,,,...,1,1,1,0,1,0,0,0,0,0
3,31e5ca7d-67d1-42bc-9699-a975f860c997,4,file_reference,1,,,,,,,...,1,1,1,0,1,1,0,0,1,0
4,e03d79a0-f71b-4db8-9cec-76542d472cb8,5,file_reference,1,,,,,,,...,0,1,1,0,1,1,0,0,1,0


In [None]:
def download_gcs_video(gcs_uri, fs):
    # gcs_uri = 'gs://bucket_name/path/to/file.mp4'
    import re
    match = re.match(r'gs://([^/]+)/(.+)', gcs_uri)
    if not match:
        raise ValueError("Invalid GCS URI")

    bucket, path = match.groups()
    # fs = gcsfs.GCSFileSystem(project='sccm-datathon-2025-participant') # fs is now passed as an argument

    with fs.open(f"{bucket}/{path}", 'rb') as f:
        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
            tmp.write(f.read())
            tmp.flush()
            return tmp.name  # return local file path

In [None]:
def multilabel_collate_fn(batch):
    pixel_values = torch.stack([item["pixel_values"] for item in batch])
    labels = torch.stack([item["labels"] for item in batch])
    return {"pixel_values": pixel_values, "labels": labels}


In [None]:
from sklearn.model_selection import train_test_split
# Count label occurrences
label_counts = full_df[label_columns].sum()

# Keep only labels that appear at least 2 times
valid_labels = label_counts[label_counts >= 2].index.tolist()

# Filter full_df and label columns
filtered_df = full_df[full_df[valid_labels].sum(axis=1) > 0].copy()

def video_exists(file_id):
    gcs_path = f"sccm--autodoc2025/migrated_video/{file_id}.mp4"
    return fs.exists(gcs_path)

# Apply check
filtered_df['exists'] = filtered_df['sim_fileref_filename'].apply(video_exists)

# Keep only rows with existing videos
filtered_df = filtered_df[filtered_df['exists']]

aligned_label_matrix = filtered_df[valid_labels].values

subset_df = filtered_df.sample(n=10, random_state=42)
train_df, val_df = train_test_split(subset_df, test_size=0.2, random_state=42)


In [None]:
# from transformers import VideoMAEModel
# import torch.nn as nn

# class MultiLabelVideoMAE(torch.nn.Module):
#     def __init__(self, num_labels, label2id, id2label):
#         super().__init__()
#         self.backbone = VideoMAEModel.from_pretrained("MCG-NJU/videomae-base")
#         hidden_size = self.backbone.config.hidden_size
#         self.classifier = nn.Linear(hidden_size, num_labels)

#         # Update config
#         self.config = self.backbone.config
#         self.config.num_labels = num_labels
#         self.config.label2id = label2id
#         self.config.id2label = id2label

#     def forward(self, pixel_values, labels=None):
#         outputs = self.backbone(pixel_values=pixel_values)
#         cls_token = outputs.last_hidden_state[:, 0]  # (batch_size, hidden_size)
#         logits = self.classifier(cls_token)

#         if labels is not None:
#             loss_fn = nn.BCEWithLogitsLoss()
#             loss = loss_fn(logits, labels.float())
#             return {"loss": loss, "logits": logits}
#         return {"logits": logits}

# processor = VideoMAEImageProcessor.from_pretrained("MCG-NJU/videomae-base")

# label_columns = valid_labels
# label2id = {label: i for i, label in enumerate(label_columns)}
# id2label = {i: label for label, i in label2id.items()}

# train_dataset = VideoMultiLabelDataset(train_df, processor, fs, label_columns)
# val_dataset = VideoMultiLabelDataset(val_df, processor, fs, label_columns)

# args = TrainingArguments(
#     output_dir="./videomae-multilabel",
#     save_strategy="epoch",
#     eval_strategy="epoch",
#     per_device_train_batch_size=2,
#     per_device_eval_batch_size=2,
#     num_train_epochs=5,
#     learning_rate=2e-5,
#     remove_unused_columns=False,
#     logging_steps=10,
# )

# trainer = Trainer(
#     model=MultiLabelVideoMAE(num_labels=len(label_columns), label2id=label2id, id2label=id2label),
#     args=args,
#     train_dataset=train_dataset,
#     eval_dataset=val_dataset,
#     data_collator=multilabel_collate_fn,
# )


In [None]:
from transformers import VideoMAEModel, VideoMAEImageProcessor, Trainer, TrainingArguments
import torch
import torch.nn as nn
import os
import shutil
from google.colab import drive
from datetime import datetime

# Mount Google Drive
drive.mount('/content/drive')

class MultiLabelVideoMAE(torch.nn.Module):
    def __init__(self, num_labels, label2id, id2label):
        super().__init__()
        self.backbone = VideoMAEModel.from_pretrained("MCG-NJU/videomae-base")
        hidden_size = self.backbone.config.hidden_size
        self.classifier = nn.Linear(hidden_size, num_labels)

        # Update config
        self.config = self.backbone.config
        self.config.num_labels = num_labels
        self.config.label2id = label2id
        self.config.id2label = id2label

    def forward(self, pixel_values, labels=None):
        outputs = self.backbone(pixel_values=pixel_values)
        cls_token = outputs.last_hidden_state[:, 0]  # (batch_size, hidden_size)
        logits = self.classifier(cls_token)

        if labels is not None:
            loss_fn = nn.BCEWithLogitsLoss()
            loss = loss_fn(logits, labels.float())
            return {"loss": loss, "logits": logits}
        return {"logits": logits}

class CheckpointCallback:
    """Custom callback to save checkpoints to Google Drive"""

    def __init__(self, drive_path="/content/drive/MyDrive/videomae_checkpoints"):
        self.drive_path = drive_path
        os.makedirs(drive_path, exist_ok=True)

    def on_epoch_end(self, trainer, epoch):
        """Save checkpoint to Google Drive at the end of each epoch"""
        print(f"\n🔄 Saving checkpoint for epoch {epoch + 1}...")

        # Create epoch-specific folder
        epoch_folder = os.path.join(self.drive_path, f"epoch_{epoch + 1}")
        os.makedirs(epoch_folder, exist_ok=True)

        # Save the model and tokenizer
        trainer.model.save_pretrained(epoch_folder)

        # Save training arguments
        torch.save(trainer.args, os.path.join(epoch_folder, "training_args.bin"))

        # Save optimizer and scheduler states
        torch.save(trainer.optimizer.state_dict(), os.path.join(epoch_folder, "optimizer.bin"))
        if trainer.lr_scheduler is not None:
            torch.save(trainer.lr_scheduler.state_dict(), os.path.join(epoch_folder, "scheduler.bin"))

        # Save training metrics if available
        if hasattr(trainer.state, 'log_history') and trainer.state.log_history:
            torch.save(trainer.state.log_history, os.path.join(epoch_folder, "training_log.bin"))

        print(f"✅ Checkpoint saved to {epoch_folder}")

def setup_training_with_checkpoints():
    """Setup training with Google Drive checkpoint saving"""

    # Initialize processor and data
    processor = VideoMAEImageProcessor.from_pretrained("MCG-NJU/videomae-base")

    # Setup labels (assuming these variables are defined in your environment)
    label_columns = valid_labels  # Make sure this is defined
    label2id = {label: i for i, label in enumerate(label_columns)}
    id2label = {i: label for label, i in label2id.items()}

    # Create datasets (assuming these are defined in your environment)
    # train_dataset = VideoMultiLabelDataset(train_df, processor, fs, label_columns)
    # val_dataset = VideoMultiLabelDataset(val_df, processor, fs, label_columns)

    # Create unique output directory with timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    local_output_dir = f"./videomae-multilabel_{timestamp}"

    # Training arguments with checkpoint configuration
    args = TrainingArguments(
        output_dir=local_output_dir,
        save_strategy="epoch",
        save_total_limit=2,  # Keep only last 2 checkpoints locally to save space
        eval_strategy="epoch",
        per_device_train_batch_size=2,
        per_device_eval_batch_size=2,
        num_train_epochs=5,
        learning_rate=2e-5,
        remove_unused_columns=False,
        logging_steps=10,
        logging_dir=f"{local_output_dir}/logs",
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        greater_is_better=False,
        dataloader_pin_memory=False,  # Helps with memory issues
    )

    # Initialize model
    model = MultiLabelVideoMAE(
        num_labels=len(label_columns),
        label2id=label2id,
        id2label=id2label
    )
    train_dataset = VideoMultiLabelJSONDataset(
        data = grouped,  # or your split
        processor=processor,
        label2id=label2id,
        fs=fs # Pass the fs object here
    )
    val_dataset = VideoMultiLabelJSONDataset(
        data = grouped, # or your split
        processor=processor,
        label2id=label2id,
        fs=fs # Pass the fs object here
    )

    # Initialize trainer
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        data_collator=multilabel_collate_fn,  # Make sure this is defined
    )

    # Initialize checkpoint callback
    checkpoint_callback = CheckpointCallback()

    return trainer, checkpoint_callback

def custom_training_loop(trainer, checkpoint_callback):
    """Custom training loop with Google Drive checkpointing"""

    print("🚀 Starting training with Google Drive checkpoints...")

    # Training loop with manual checkpoint saving
    for epoch in range(trainer.args.num_train_epochs):
        print(f"\n📈 Starting Epoch {epoch + 1}/{trainer.args.num_train_epochs}")

        # Train for one epoch
        trainer.train()

        # Evaluate
        eval_results = trainer.evaluate()
        print(f"📊 Epoch {epoch + 1} Evaluation Results: {eval_results}")

        # Save checkpoint to Google Drive
        checkpoint_callback.on_epoch_end(trainer, epoch)

        # Save final best model to Google Drive
        if epoch == trainer.args.num_train_epochs - 1:
            final_model_path = os.path.join(checkpoint_callback.drive_path, "final_best_model")
            os.makedirs(final_model_path, exist_ok=True)
            trainer.save_model(final_model_path)
            print(f"🎯 Final best model saved to {final_model_path}")

def load_checkpoint_from_drive(checkpoint_path, model_class, num_labels, label2id, id2label):
    """Load a checkpoint from Google Drive"""
    print(f"📥 Loading checkpoint from {checkpoint_path}...")

    # Initialize model
    model = model_class(num_labels=num_labels, label2id=label2id, id2label=id2label)

    # Load model weights
    model.load_state_dict(torch.load(os.path.join(checkpoint_path, "pytorch_model.bin")))

    # Load optimizer state if needed
    optimizer_path = os.path.join(checkpoint_path, "optimizer.bin")
    if os.path.exists(optimizer_path):
        optimizer_state = torch.load(optimizer_path)
        print("✅ Optimizer state loaded")

    # Load scheduler state if needed
    scheduler_path = os.path.join(checkpoint_path, "scheduler.bin")
    if os.path.exists(scheduler_path):
        scheduler_state = torch.load(scheduler_path)
        print("✅ Scheduler state loaded")

    print("✅ Checkpoint loaded successfully!")
    return model

# Example usage:
if __name__ == "__main__":
    # Setup and run training
    trainer, checkpoint_callback = setup_training_with_checkpoints()
    custom_training_loop(trainer, checkpoint_callback)

    print("🎉 Training completed! All checkpoints saved to Google Drive.")

    # Example of loading a checkpoint
    # model = load_checkpoint_from_drive(
    #     "/content/drive/MyDrive/videomae_checkpoints/epoch_3",
    #     MultiLabelVideoMAE,
    #     len(label_columns),
    #     label2id,
    #     id2label
    # )

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
🚀 Starting training with Google Drive checkpoints...

📈 Starting Epoch 1/5
Skipping missing video file during processing: gs://sccm--autodoc2025/migrated_video/e68b7fd3-1581-43bb-b744-52b0421d49dc.mp4
Skipping invalid annotation times for video: 65fa3944-ed6c-4ae7-9f96-5df76b0396f6, task: Time Sync
Skipping missing video file during processing: gs://sccm--autodoc2025/migrated_video/f2b4b63e-d0dd-43ef-ba96-6824802510b5.mp4
Skipping missing video file during processing: gs://sccm--autodoc2025/migrated_video/7a42c3ab-c927-432e-8815-cd03aec2aa9d.mp4


Epoch,Training Loss,Validation Loss


Skipping invalid annotation times for video: 0eb95908-f038-4ac2-bcee-1ab7b4ab4068, task: Time Sync
Skipping invalid annotation times for video: 31931d51-7792-4d6b-b859-14e7562b7879, task: Time Sync
Skipping invalid annotation times for video: ec7538e7-1813-4f3f-b76a-1667453caba9, task: Time Sync
Skipping missing video file during processing: gs://sccm--autodoc2025/migrated_video/8f6e91f1-cfda-46a0-ba46-6f2004b49c6b.mp4
Skipping invalid annotation times for video: f628cf42-cbde-4973-993e-8f2c45eaadc9, task: Time Sync
Skipping invalid annotation times for video: 79aeb0ba-04c7-40a6-b033-939c002c2c9a, task: Time Sync
Skipping invalid annotation times for video: 6a81d79f-f45a-4362-af09-946c423553a6, task: Time Sync
Skipping invalid annotation times for video: ff42a428-999e-48ce-ad97-14eedd580395, task: Time Sync
Skipping invalid annotation times for video: a0dc9042-28b9-4452-9a82-f04ba5d5e0f9, task: Time Sync
Skipping invalid annotation times for video: 25eab334-1a93-48db-82be-6f2a6003750a,

KeyboardInterrupt: 

48fda5ccea388be96efda74ea0a13779796a8d8d

In [None]:
trainer.train()


In [None]:
# test on 5

