# Dump visualisation
Load training dumps and visualise DINOv3 latents alongside model predictions.

In [None]:
from pathlib import Path
import torch
import numpy as np
from einops import rearrange
from sklearn.decomposition import PCA
import imageio.v2 as imageio
import matplotlib.pyplot as plt

dump_root = Path('experiments/dumps')
sample_root = sorted(dump_root.glob('*_epoch_*'))[0]
video = torch.load(sample_root / 'video.pt').numpy()
context_latents = torch.load(sample_root / 'context_latents.pt')
target_latents = torch.load(sample_root / 'target_latents.pt')
prediction = torch.load(sample_root / 'prediction.pt')

stride = 2
video = np.transpose(video, (0, 2, 3, 1))
video_stride = video[::stride]

def flatten(x):
    return rearrange(x, 'd t h w -> (t h w) d')

all_latents = torch.cat([flatten(context_latents), flatten(target_latents), flatten(prediction)], dim=0)
pca = PCA(n_components=3)
pca.fit(all_latents.numpy())

def to_rgb(x):
    flat = pca.transform(flatten(x).numpy())
    flat = (flat - flat.min()) / (flat.max() - flat.min() + 1e-8)
    t, h, w = x.shape[1:]
    return flat.reshape(t, h, w, 3)

context_rgb = to_rgb(context_latents)
target_rgb = to_rgb(target_latents)
pred_rgb = to_rgb(prediction)

frames = []
context_len = context_latents.shape[1]
for i in range(target_rgb.shape[0]):
    vid = video_stride[context_len + i]
    tgt = (target_rgb[i] * 255).astype(np.uint8)
    pred = (pred_rgb[i] * 255).astype(np.uint8)
    frame = np.concatenate([vid, tgt, pred], axis=1)
    frames.append(frame)
imageio.mimsave(sample_root / 'comparison.gif', frames, fps=4)
sample_root / 'comparison.gif'


In [None]:
from IPython.display import Image
Image(filename=str(sample_root / 'comparison.gif'))