In [1]:
!pip install -r requirements.txt

Collecting torch==2.0.1 (from -r requirements.txt (line 1))
  Downloading torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl.metadata (24 kB)
Collecting torchvision==0.15.2 (from -r requirements.txt (line 2))
  Downloading torchvision-0.15.2-cp310-cp310-manylinux1_x86_64.whl.metadata (11 kB)
Collecting opencv-python==4.11.0.86 (from -r requirements.txt (line 3))
  Downloading opencv_python-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting huggingface-hub==0.29.3 (from -r requirements.txt (line 4))
  Downloading huggingface_hub-0.29.3-py3-none-any.whl.metadata (13 kB)
Collecting tqdm==4.67.1 (from -r requirements.txt (line 7))
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.7/57.7 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ultralytics==8.3.94 (from -r requirements.txt (line 8))
  Downloading ultralytics-8.3.94-py3-none-any.whl.metadata (35 kB)
Collec

In [2]:
import torch
import numpy as np
import random

seed = 0
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [3]:
from pathlib import Path

from utils.data_preparation import DataPreparer
from da_fine_tuning.dataset_preparer import DatasetPreparer
from da_fine_tuning.fine_tune_depth_anything import DepthAnythingFineTuner

In [4]:
# Imprtant! Set directory where files will be stored
workspace_path = Path('/workspace/output')
if not workspace_path.exists():
    workspace_path.mkdir()

In [5]:
data_preparer = DataPreparer(workspace_path)
data_preparer.prepare()

Downloading test dataset


test.zip:   0%|          | 0.00/6.78G [00:00<?, ?B/s]

Extracting test dataset


Process games: 100%|██████████| 20/20 [00:03<00:00,  5.50it/s]
Fix depths in /workspace/output/test_depths: 100%|██████████| 1423/1423 [00:33<00:00, 42.13it/s]


Downloading valid dataset


valid.zip:   0%|          | 0.00/6.96G [00:00<?, ?B/s]

Extracting valid dataset


Process games: 100%|██████████| 21/21 [00:03<00:00,  5.76it/s]
Fix depths in /workspace/output/valid_depths: 100%|██████████| 1441/1441 [00:30<00:00, 46.60it/s]


Downloading train dataset


train.zip:   0%|          | 0.00/19.9G [00:00<?, ?B/s]

Extracting train dataset


Process games: 100%|██████████| 21/21 [00:21<00:00,  1.01s/it]
Fix depths in /workspace/output/train_depths: 100%|██████████| 3864/3864 [01:25<00:00, 45.09it/s]


In [6]:
dataset_preparer = DatasetPreparer()
dataset_preparer.prepare_dataset(workspace_path / 'train_images', workspace_path / 'train_masks')
dataset_preparer.prepare_dataset(workspace_path / 'valid_images', workspace_path / 'valid_masks')
dataset_preparer.prepare_dataset(workspace_path / 'test_images', workspace_path / 'test_masks')

Prepare masks: 100%|██████████| 3864/3864 [14:49<00:00,  4.34it/s]
Prepare masks: 100%|██████████| 1441/1441 [05:27<00:00,  4.40it/s]
Prepare masks: 100%|██████████| 1423/1423 [05:30<00:00,  4.30it/s]


In [7]:
da_fine_tuner = DepthAnythingFineTuner(
    depth_train_path=workspace_path / 'train_depths',
    depth_val_path=workspace_path / 'valid_depths',
    depth_test_path=workspace_path / 'test_depths',
)

config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

xFormers not available
xFormers not available


pytorch_model.bin:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


In [8]:
da_fine_tuner.adjust_depths_to_pred()

Adjust to pred: 100%|██████████| 3864/3864 [17:15<00:00,  3.73it/s]


In [9]:
da_fine_tuner.fine_tune(n_epochs=24, checkpoint_path=workspace_path)

Epoch 1/24: 100%|██████████| 3864/3864 [30:45<00:00,  2.09it/s, avg_loss=0.148, loss=0.0858] 


Epoch 1/24, Loss: 0.1476


Val error: 100%|██████████| 1441/1441 [06:37<00:00,  3.63it/s, rse=0.855, silog_err=0.418]


Val loss: silog: 0.4183, sq: 0.8547


Epoch 2/24: 100%|██████████| 3864/3864 [30:42<00:00,  2.10it/s, avg_loss=0.0918, loss=0.0411] 


Epoch 2/24, Loss: 0.0918


Val error: 100%|██████████| 1441/1441 [06:15<00:00,  3.84it/s, rse=0.719, silog_err=0.389]


Val loss: silog: 0.3893, sq: 0.7189


Epoch 3/24: 100%|██████████| 3864/3864 [30:17<00:00,  2.13it/s, avg_loss=0.0776, loss=0.0236] 


Epoch 3/24, Loss: 0.0776


Val error: 100%|██████████| 1441/1441 [06:27<00:00,  3.72it/s, rse=0.682, silog_err=0.374]


Val loss: silog: 0.3735, sq: 0.6820


Epoch 4/24: 100%|██████████| 3864/3864 [30:55<00:00,  2.08it/s, avg_loss=0.068, loss=0.151]   


Epoch 4/24, Loss: 0.0680


Val error: 100%|██████████| 1441/1441 [06:28<00:00,  3.71it/s, rse=0.761, silog_err=0.405]


Val loss: silog: 0.4046, sq: 0.7612


Epoch 5/24: 100%|██████████| 3864/3864 [30:32<00:00,  2.11it/s, avg_loss=0.0634, loss=0.0348] 


Epoch 5/24, Loss: 0.0634


Val error: 100%|██████████| 1441/1441 [06:18<00:00,  3.81it/s, rse=0.647, silog_err=0.365]


Val loss: silog: 0.3655, sq: 0.6473


Epoch 6/24: 100%|██████████| 3864/3864 [30:51<00:00,  2.09it/s, avg_loss=0.0558, loss=0.0319] 


Epoch 6/24, Loss: 0.0558


Val error: 100%|██████████| 1441/1441 [06:30<00:00,  3.69it/s, rse=0.627, silog_err=0.362]


Val loss: silog: 0.3617, sq: 0.6272


Epoch 7/24: 100%|██████████| 3864/3864 [30:39<00:00,  2.10it/s, avg_loss=0.0498, loss=0.0325] 


Epoch 7/24, Loss: 0.0498


Val error: 100%|██████████| 1441/1441 [06:26<00:00,  3.73it/s, rse=0.687, silog_err=0.373]


Val loss: silog: 0.3726, sq: 0.6873


Epoch 8/24: 100%|██████████| 3864/3864 [30:45<00:00,  2.09it/s, avg_loss=0.0426, loss=0.0144] 


Epoch 8/24, Loss: 0.0426


Val error: 100%|██████████| 1441/1441 [06:42<00:00,  3.58it/s, rse=0.629, silog_err=0.356]


Val loss: silog: 0.3557, sq: 0.6292


Epoch 9/24: 100%|██████████| 3864/3864 [30:47<00:00,  2.09it/s, avg_loss=0.0388, loss=0.0385] 


Epoch 9/24, Loss: 0.0388


Val error: 100%|██████████| 1441/1441 [06:22<00:00,  3.77it/s, rse=0.668, silog_err=0.377]


Val loss: silog: 0.3771, sq: 0.6675


Epoch 10/24: 100%|██████████| 3864/3864 [30:31<00:00,  2.11it/s, avg_loss=0.0319, loss=0.0283] 


Epoch 10/24, Loss: 0.0319


Val error: 100%|██████████| 1441/1441 [06:22<00:00,  3.76it/s, rse=0.574, silog_err=0.342]


Val loss: silog: 0.3424, sq: 0.5744


Epoch 11/24: 100%|██████████| 3864/3864 [30:20<00:00,  2.12it/s, avg_loss=0.029, loss=0.0164]  


Epoch 11/24, Loss: 0.0290


Val error: 100%|██████████| 1441/1441 [06:12<00:00,  3.87it/s, rse=0.593, silog_err=0.349]


Val loss: silog: 0.3487, sq: 0.5931


Epoch 12/24: 100%|██████████| 3864/3864 [30:42<00:00,  2.10it/s, avg_loss=0.0257, loss=0.0176] 


Epoch 12/24, Loss: 0.0257


Val error: 100%|██████████| 1441/1441 [06:20<00:00,  3.79it/s, rse=0.566, silog_err=0.336]


Val loss: silog: 0.3360, sq: 0.5661


Epoch 13/24: 100%|██████████| 3864/3864 [30:44<00:00,  2.10it/s, avg_loss=0.0231, loss=0.0135] 


Epoch 13/24, Loss: 0.0231


Val error: 100%|██████████| 1441/1441 [06:31<00:00,  3.68it/s, rse=0.541, silog_err=0.331]


Val loss: silog: 0.3307, sq: 0.5411


Epoch 14/24: 100%|██████████| 3864/3864 [30:55<00:00,  2.08it/s, avg_loss=0.0222, loss=0.0267] 


Epoch 14/24, Loss: 0.0222


Val error: 100%|██████████| 1441/1441 [06:29<00:00,  3.70it/s, rse=0.544, silog_err=0.325]


Val loss: silog: 0.3250, sq: 0.5435


Epoch 15/24: 100%|██████████| 3864/3864 [30:49<00:00,  2.09it/s, avg_loss=0.021, loss=0.0208]  


Epoch 15/24, Loss: 0.0210


Val error: 100%|██████████| 1441/1441 [06:21<00:00,  3.78it/s, rse=0.544, silog_err=0.324]


Val loss: silog: 0.3240, sq: 0.5436


Epoch 16/24: 100%|██████████| 3864/3864 [30:33<00:00,  2.11it/s, avg_loss=0.0197, loss=0.00329]


Epoch 16/24, Loss: 0.0197


Val error: 100%|██████████| 1441/1441 [06:28<00:00,  3.71it/s, rse=0.537, silog_err=0.321]


Val loss: silog: 0.3205, sq: 0.5365


Epoch 17/24: 100%|██████████| 3864/3864 [30:30<00:00,  2.11it/s, avg_loss=0.0181, loss=0.0107] 
Val error: 100%|██████████| 1441/1441 [06:19<00:00,  3.80it/s, rse=0.526, silog_err=0.316]


Val loss: silog: 0.3157, sq: 0.5265


Epoch 18/24: 100%|██████████| 3864/3864 [30:22<00:00,  2.12it/s, avg_loss=0.017, loss=0.0183]  


Epoch 18/24, Loss: 0.0170


Val error: 100%|██████████| 1441/1441 [06:23<00:00,  3.76it/s, rse=0.52, silog_err=0.316] 


Val loss: silog: 0.3159, sq: 0.5195


Epoch 19/24: 100%|██████████| 3864/3864 [30:26<00:00,  2.12it/s, avg_loss=0.016, loss=0.00541] 


Epoch 19/24, Loss: 0.0160


Val error: 100%|██████████| 1441/1441 [06:26<00:00,  3.73it/s, rse=0.531, silog_err=0.315]


Val loss: silog: 0.3147, sq: 0.5315


Epoch 20/24: 100%|██████████| 3864/3864 [30:43<00:00,  2.10it/s, avg_loss=0.0153, loss=0.0101] 


Epoch 20/24, Loss: 0.0153


Val error: 100%|██████████| 1441/1441 [06:22<00:00,  3.76it/s, rse=0.514, silog_err=0.311]


Val loss: silog: 0.3105, sq: 0.5136


Epoch 21/24: 100%|██████████| 3864/3864 [30:30<00:00,  2.11it/s, avg_loss=0.0146, loss=0.00576]


Epoch 21/24, Loss: 0.0146


Val error: 100%|██████████| 1441/1441 [06:19<00:00,  3.80it/s, rse=0.498, silog_err=0.309]


Val loss: silog: 0.3092, sq: 0.4977


Epoch 22/24: 100%|██████████| 3864/3864 [30:36<00:00,  2.10it/s, avg_loss=0.014, loss=0.00945] 


Epoch 22/24, Loss: 0.0140


Val error: 100%|██████████| 1441/1441 [06:26<00:00,  3.73it/s, rse=0.498, silog_err=0.308]


Val loss: silog: 0.3076, sq: 0.4984


Epoch 23/24: 100%|██████████| 3864/3864 [30:30<00:00,  2.11it/s, avg_loss=0.0135, loss=0.025]  


Epoch 23/24, Loss: 0.0135


Val error: 100%|██████████| 1441/1441 [06:21<00:00,  3.78it/s, rse=0.49, silog_err=0.305] 


Val loss: silog: 0.3050, sq: 0.4899


Epoch 24/24: 100%|██████████| 3864/3864 [30:21<00:00,  2.12it/s, avg_loss=0.0131, loss=0.0397] 


Epoch 24/24, Loss: 0.0131


Val error: 100%|██████████| 1441/1441 [06:24<00:00,  3.75it/s, rse=0.493, silog_err=0.305]

Val loss: silog: 0.3054, sq: 0.4929





In [10]:
# Write predictions

import cv2
from tqdm import tqdm
import os
from pathlib import Path

train_img_path = workspace_path / 'train_images'
train_pred_path = workspace_path / 'train_pred'
train_pred_path.mkdir()
for filename in tqdm(os.listdir(train_img_path), desc=f"Pred train", leave=True):
    depth = da_fine_tuner.predict(train_img_path / filename)
    cv2.imwrite(str(train_pred_path / filename), depth)

valid_img_path = workspace_path / 'valid_images'
valid_pred_path = workspace_path / 'valid_pred'
valid_pred_path.mkdir()
for filename in tqdm(os.listdir(valid_img_path), desc=f"Pred valid", leave=True):
    depth = da_fine_tuner.predict(valid_img_path / filename)
    cv2.imwrite(str(valid_pred_path / filename), depth)

test_img_path = workspace_path / 'test_images'
test_pred_path = workspace_path / 'test_pred'
test_pred_path.mkdir()
for filename in tqdm(os.listdir(test_img_path), desc=f"Pred test", leave=True):
    depth = da_fine_tuner.predict(test_img_path / filename)
    cv2.imwrite(str(test_pred_path / filename), depth)

Pred train: 100%|██████████| 3864/3864 [20:03<00:00,  3.21it/s]
Pred valid: 100%|██████████| 1441/1441 [07:30<00:00,  3.20it/s]
Pred test: 100%|██████████| 1423/1423 [07:37<00:00,  3.11it/s]


## Fine tune predictions for players

In [11]:
from pathlib import Path

In [12]:
from players_fine_tuning.segment import PlayerSegmenter

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [13]:
segmenter = PlayerSegmenter()

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x-seg.pt to 'yolov8x-seg.pt'...


100%|██████████| 137M/137M [00:02<00:00, 68.3MB/s] 


In [14]:
train_segments_path = workspace_path / 'train_segments'
val_segments_path = workspace_path / 'valid_segments'
test_segments_path = workspace_path / 'test_segments'

train_segments_path.mkdir(), val_segments_path.mkdir(), test_segments_path.mkdir()

segmenter.segment(workspace_path / 'train_images', train_segments_path)
segmenter.segment(workspace_path / 'valid_images', val_segments_path)
segmenter.segment(workspace_path / 'test_images', test_segments_path)

Process segmentation: 100%|██████████| 3864/3864 [24:49<00:00,  2.59it/s]
Process segmentation: 100%|██████████| 1441/1441 [09:22<00:00,  2.56it/s]
Process segmentation: 100%|██████████| 1423/1423 [09:16<00:00,  2.56it/s]


In [15]:
from players_fine_tuning.dataset_preparer import DatasetPreparer

player_ds_preparer = DatasetPreparer(
    train_segments_path = Path(train_segments_path),
    val_segments_path = Path(val_segments_path),
    test_segments_path = Path(test_segments_path),

    train_orig_depth_path = workspace_path / 'train_depths',
    val_orig_depth_path = workspace_path / 'valid_depths',
    test_orig_depth_path = workspace_path / 'test_depths',

    train_pred_path = train_pred_path,
    val_pred_path = valid_pred_path,
    test_pred_path = test_pred_path,
)

In [16]:
player_ds_preparer.prepare_dataset()

Process train_segments: 100%|██████████| 3864/3864 [04:41<00:00, 13.74it/s]
Process valid_segments: 100%|██████████| 1441/1441 [01:51<00:00, 12.94it/s]
Process test_segments: 100%|██████████| 1423/1423 [01:50<00:00, 12.89it/s]


In [17]:
from pathlib import Path

from players_fine_tuning.player_fine_tuner import PlayerFineTuner

player_fine_tuner = PlayerFineTuner(workspace_path / 'train_segments', workspace_path / 'valid_segments')

In [18]:
player_fine_tuner.fine_tune(n_epochs=10)

Epoch 1/10: 100%|██████████| 3389/3389 [03:04<00:00, 18.33it/s, avg_loss=0.000139, loss=0.000114]


Epoch 1/10, Loss: 0.0001


Val error: 100%|██████████| 334/334 [00:47<00:00,  7.10it/s, loss=0.000134]


Val loss: 0.00013426, self._best_score=0.00013425593441028098


Epoch 2/10: 100%|██████████| 3389/3389 [03:02<00:00, 18.58it/s, avg_loss=0.000119, loss=8.4e-5]  


Epoch 2/10, Loss: 0.0001


Val error: 100%|██████████| 334/334 [00:45<00:00,  7.30it/s, loss=0.000127]


Val loss: 0.00012673, self._best_score=0.00012672815675265854


Epoch 3/10:  71%|███████▏  | 2422/3389 [02:09<00:55, 17.45it/s, avg_loss=0.000112, loss=0.000127]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

Epoch 5/10: 100%|██████████| 3389/3389 [03:02<00:00, 18.53it/s, avg_loss=0.000102, loss=0.000119]


Epoch 5/10, Loss: 0.0001


Val error: 100%|██████████| 334/334 [00:45<00:00,  7.36it/s, loss=0.000121]


Val loss: 0.00012060, self._best_score=0.00012060074767144536


Epoch 6/10: 100%|██████████| 3389/3389 [02:59<00:00, 18.88it/s, avg_loss=9.8e-5, loss=0.00013]  


Epoch 6/10, Loss: 0.0001


Val error: 100%|██████████| 334/334 [00:45<00:00,  7.26it/s, loss=0.000119]


Val loss: 0.00011936, self._best_score=0.00011935952308791282


Epoch 7/10: 100%|██████████| 3389/3389 [03:02<00:00, 18.60it/s, avg_loss=9.41e-5, loss=7.38e-5] 


Epoch 7/10, Loss: 0.0001


Val error: 100%|██████████| 334/334 [00:49<00:00,  6.81it/s, loss=0.00012] 


Val loss: 0.00011965, self._best_score=0.00011935952308791282


Epoch 8/10: 100%|██████████| 3389/3389 [02:55<00:00, 19.29it/s, avg_loss=8.99e-5, loss=8.17e-5] 


Epoch 8/10, Loss: 0.0001


Val error: 100%|██████████| 334/334 [00:47<00:00,  6.98it/s, loss=0.00012] 


Val loss: 0.00012020, self._best_score=0.00011935952308791282


Epoch 9/10: 100%|██████████| 3389/3389 [02:56<00:00, 19.23it/s, avg_loss=8.66e-5, loss=7.42e-5] 


Epoch 9/10, Loss: 0.0001


Val error: 100%|██████████| 334/334 [00:50<00:00,  6.67it/s, loss=0.000121]


Val loss: 0.00012109, self._best_score=0.00011935952308791282


Epoch 10/10: 100%|██████████| 3389/3389 [03:04<00:00, 18.36it/s, avg_loss=8.27e-5, loss=9.36e-5] 


Epoch 10/10, Loss: 0.0001


Val error: 100%|██████████| 334/334 [00:50<00:00,  6.61it/s, loss=0.000121]


Val loss: 0.00012139, self._best_score=0.00011935952308791282


In [19]:
player_fine_tuner.save(workspace_path / 'player_ft.pth')

## Apply fine tuned players depths

In [20]:
import os
import numpy as np
import torch
import tempfile
from torch.nn import functional as F

from players_fine_tuning.dataset_loader import PlayerDataset

masks_path = workspace_path / 'test_segments' / 'masks'

results_path = workspace_path / 'results'
results_path.mkdir()

group_by_name = {}
for filename in os.listdir(masks_path):
    name = '_'.join(filename.split('_')[:3])
    if name not in group_by_name:
        group_by_name[name] = []
    group_by_name[name].append(filename)

for group_name, filenames in tqdm(group_by_name.items(), desc=f'Predict', leave=True):

    depth = cv2.imread(str(workspace_path / 'test_pred' / f'{group_name}.png'), cv2.IMREAD_UNCHANGED)
    depth = depth / 255 ** 2
    depth = depth - depth.min()
    depth = depth / depth.max()

    for filename in filenames:
        mask = np.load(masks_path / filename)
        x1, y1, x2, y2 = map(int, filename.replace('.npy', '').split('_')[3:])

        player_depth = PlayerDataset.load_image(workspace_path / 'test_segments' / 'depths' / filename, PlayerDataset.SIZE)
        player_depth = player_depth.unsqueeze(0).unsqueeze(0).to('cuda')
        with torch.no_grad():
            pred = player_fine_tuner.predict(player_depth)

        pred = F.interpolate(pred, (y2-y1, x2-x1), mode='bilinear', align_corners=False)
        
        pred = pred.squeeze(0).squeeze(0).cpu().numpy()
        depth[y1:y2, x1:x2][mask] += pred[mask] / 2

    depth = np.round(depth * 255 ** 2).astype(np.uint16)

    _, game_id, idx = group_name.split('_')
    filename = f'foot_game_{game_id}_video_1_depth_r_{idx}.png'
    cv2.imwrite(str(results_path / filename), depth)

Predict: 100%|██████████| 1423/1423 [04:17<00:00,  5.53it/s]


## Evaluate on test set

In [21]:
from utils.misc import compute_scale_and_shift_np

gt_path = workspace_path / 'test_depths'
pred_path = workspace_path / 'results'
mask_path = workspace_path / 'test_masks'

mse_err, silog_err, cnt = 0.0, 0.0, 0

for filename in tqdm(os.listdir(gt_path), desc=f'Final val', leave=True):
    gt = cv2.imread(gt_path / filename, cv2.IMREAD_UNCHANGED) / 255 ** 2
    _, game_id, idx = filename.replace('.png', '').split('_')
    pred_filename = f'foot_game_{game_id}_video_1_depth_r_{idx}.png'
    pred = cv2.imread(pred_path / pred_filename, cv2.IMREAD_UNCHANGED) / 255 ** 2
    mask = ~np.load(mask_path / filename.replace('.png', '.npy'))
    scale, shift = compute_scale_and_shift_np(pred, gt, mask.astype(np.float32))
    pred = scale * pred + shift
    mse_err += ((pred - gt) ** 2)[mask].mean()
    log_err = np.log(pred) - np.log(gt)
    silog_err += np.sqrt(np.mean(log_err ** 2) - np.mean(log_err) ** 2) * 100

print(f'MSE: {(mse_err / len(os.listdir(gt_path))):.8f}, silog: {(silog_err / len(os.listdir(gt_path))):.8f}')

Final val: 100%|██████████| 1423/1423 [02:10<00:00, 10.90it/s]

MSE: 0.00000301, silog: 0.25562687



