## JUPYTER MULTIPLIER

In [None]:

######################
import rp

from IPython import get_ipython


# --- Global state to track the currently registered handler ---
# This is the key to preventing multiple hooks from being registered.
_CURRENT_JUPYTER_HOOK_HANDLER = None

def set_jupyter_code_hook(handler):
    """
    Registers a function to run before a Python code cell is executed.

    This function ensures that any previously registered hook is automatically
    unregistered. It creates a wrapper so the provided handler is only
    called for Python code and receives only the code string.

    Args:
        handler (function): A function to be called before cell execution.
                            It must accept one argument: the code string.
    """
    global _CURRENT_JUPYTER_HOOK_HANDLER
    ipython_shell = get_ipython()

    # If a handler was previously registered by this framework, unregister it first.
    if _CURRENT_JUPYTER_HOOK_HANDLER:
        try:
            ipython_shell.events.unregister('pre_run_cell', _CURRENT_JUPYTER_HOOK_HANDLER)
        except ValueError:
            # This can happen if the hook was manually removed. It's safe to ignore.
            pass

    def _wrapper_handler(info):
        """The actual function registered with IPython's event system."""
        code_string = info.raw_cell.strip()

        # --- Filter for Python code ---
        # Ignore empty cells, shell commands (like !pip), and cell magics (like %%time).
        if code_string and rp.is_valid_python_syntax(code_string):
            # If it's Python code, call the user's handler with the code string.
            rp.fansi_print("--- RUNNING CODE ON ALL THE PARROTS ---",'yellow bold')
            handler(code_string)
        
        global WORKER
        if code_string.startswith('#WORKER') and not WORKER:
            #IF we're not a WORKER, don't run cells labled #WORKER at the top
            rp.fansi_print("--- SKIPPING CELL AS I AM NOT A WORKER ---",'yellow bold')
            # Call the user's handler. It will decide if we should stop.
            # The handler should return True to stop execution.
            if handler(code_string) is True:
                # THIS IS THE CORRECT WAY: replace the upcoming execution with an empty string.
                ipython_shell.set_next_input("", replace=True)

    # Register the new wrapper handler and store a reference to it.
    ipython_shell.events.register('pre_run_cell', _wrapper_handler)
    _CURRENT_JUPYTER_HOOK_HANDLER = _wrapper_handler
    print(f"🦆 Hook Activated: Handler '{handler.__name__}' is now active.")



def deactivate_jupyter_code_hook():
    """
    Finds and unregisters the currently active cell execution hook.
    """
    global _CURRENT_JUPYTER_HOOK_HANDLER
    ipython_shell = get_ipython()

    if _CURRENT_JUPYTER_HOOK_HANDLER:
        try:
            ipython_shell.events.unregister('pre_run_cell', _CURRENT_JUPYTER_HOOK_HANDLER)
            print(f"🔇 Hook Deactivated: Handler was removed.")
            _CURRENT_JUPYTER_HOOK_HANDLER = None
        except ValueError:
            print("Could not find the registered hook. It may have already been removed.")
    else:
        print("No active hook was found to deactivate.")

#############################

LOCAL_ONLY = False
LOCAL_ONLY = True

#DELEGATE ALL CODE FROM THIS NOTEBOOK TO MANY WORKERS SO THEY DUPLICATE IT ALL

#Do we delegate tasks? If so we're the master
DELEGATOR = rp.running_in_ipython()
WORKER = not DELEGATOR

if DELEGATOR:
    import rp.web_evaluator
    
    cluster_info = rp.web_evaluator.launch_tmux_delegation_cluster(8,session_name='JupyterParrots',if_exists='replace')
    
    def do_all(code, **vars):
        return cluster_info.delegator.evaluate_all(code + "\npass;", **vars)

    if not LOCAL_ONLY:
        set_jupyter_code_hook(do_all)
    else:
        WORKER=True

## TRACKING HELPERS

#### Preamble

In [None]:
# FIX GLITCHY SCROLLING
# https://github.com/jupyterlab/jupyterlab/issues/15968
from IPython.display import HTML,display
display(HTML("<style>.jp-WindowedPanel-viewport { contain: layout }</style>"))

import os
os.environ['PYTORCH_ENABLE_MPS_FALLBACK']='1' #For if using cotracker + mac + MPS

import rp
import einops
import numpy as np
import sys
import torch
from icecream import ic
import functools

das_root = rp.printed(rp.get_absolute_path('../..'))
sys.path.append(das_root)
sys.path.append(rp.printed(rp.get_absolute_path(f'{das_root}/source/gaussblobs')))
from source.gaussblobs.render_tracks import draw_blobs_videos, col26 as ordered_colors

In [None]:
#WORKER
if rp.currently_running_mac(): 
    device = torch.device('cpu') #Don't use MPS - we're using TapNext beacuse it's a lot better
else:
    device = rp.select_torch_device(reserve=True, prefer_used=True)
    
ic(das_root, device);

# TAPNEXT SETUP FOR XCLOUD
# ON XCLOUD, IF I NEED TO MODIFY IT LATER, USE THIS INSTEAD:
sys.path += rp.get_absolute_paths(
    "/home/jupyter/CleanCode/Github/tapnet.git",
)
from run_tapnet import run_tapnet
tapnet_model_dir = "/home/jupyter/CleanCode/Github/tapnet.git/model"
run_tapnet = functools.partial(run_tapnet, model_dir=tapnet_model_dir)

In [None]:
T,H,W=49,480,720

In [None]:
@rp.globalize_locals
def init_input_video():
    # /Users/burgert/Downloads/MotionEditIPYBundle/MakeItNotBounce.mp4
    input_video = rp.load_video(input_video_path, use_cache=True)
    input_video = rp.resize_list(input_video, 49)
    input_video = rp.resize_images_to_hold(input_video, height=480, width=720)
    input_video = rp.crop_images(input_video, height=480, width=720, origin='center')
    input_video = rp.as_float_images(input_video)
    input_video = rp.as_numpy_array(input_video)
    
    ic(input_video_path, prompt, TITLE)
    rp.display_video(gridded_video(input_video))

In [None]:
def gridded_video(input_video):
    grid_input_video = rp.as_float_images(input_video)

    alpha = .5
    
    grid_input_video[:,::20,:] =rp.blend(.5, grid_input_video[:,::20,:] , alpha)
    grid_input_video[:,:,::20] =rp.blend(.5, grid_input_video[:,:,::20] , alpha)
    
    grid_input_video[:,::100,:]=rp.blend(1 , grid_input_video[:,::100,:], alpha)
    grid_input_video[:,:,::100]=rp.blend(1 , grid_input_video[:,:,::100], alpha)
    
    grid_input_video = rp.labeled_images(grid_input_video, range(len(grid_input_video)), size=30, font='Arial')
    grid_input_video = rp.video_with_progress_bar(grid_input_video, bar_color='green', position='top', size=5)

    return grid_input_video

In [None]:
def draw_points_on_video(video, points, colors=None, visible=None):
    if colors is None:
        colors=globals()['colors']
    if visible is None:
        visible = [True] * len(points)
        
    output = list(video)
    for (t, x, y), color, v in zip(points, colors, visible):
        if v:
            output[t] = rp.cv_draw_circle(output[t], x, y, radius=15, rim=3, color=color, copy=False)
            
    output = rp.as_byte_images(output, copy=False)
    return np.stack(output)



import numpy as np
import cv2
# Assuming 'rp' is a pre-existing library with drawing utilities 
# and 'colors' is a globally defined list.

def draw_points_on_video(video, points, colors=None, visible=None):
    """
    Draws circles and their indices on a video sequence using OpenCV.
    The text color is chosen to contrast with the circle color.
    Handles both integer (0-255) and float (0-1) color formats.

    Args:
        video: The source video frames.
        points: A list of points, where each point is a tuple (t, x, y)
                representing the frame index, and x, y coordinates.
        colors (list, optional): A list of colors for each point. 
                                 Defaults to a global 'colors' variable.
        visible (list, optional): A list of booleans indicating if a point 
                                  is visible. Defaults to all True.
    """
    if colors is None:
        # Fallback to a global 'colors' variable if not provided
        colors = globals()['colors']
    if visible is None:
        # Default to all points being visible
        visible = [True] * len(points)
        
    output = list(video)
    
    # Use enumerate to get the index 'i' for each point
    for i, ((t, x, y), color, v) in enumerate(zip(points, colors, visible)):
        if v:
            # First, draw the circle for the point as in the original function
            output[t] = rp.cv_draw_circle(output[t], x, y, radius=15, rim=3, color=color, copy=False)
            
            # --- Updated functionality with OpenCV ---
            # Prepare the text (the index of the point)
            text = str(i)
            font = cv2.FONT_HERSHEY_SIMPLEX
            font_scale = 0.7
            font_thickness = 2
            
            # --- Added functionality: Choose contrasting text color ---
            # Create a temporary color variable for luminance calculation
            # to handle both float (0-1) and int (0-255) color formats.
            calc_color = color
            # Heuristic to check if color is in float format (e.g., values <= 1.0)
            if max(color) <= 1.0:
                # Convert to 0-255 scale for luminance calculation
                calc_color = [int(c * 255) for c in color]

            # Assuming color is in BGR format (standard for OpenCV)
            # Calculate luminance to determine if the color is light or dark.
            # For BGR: Y = 0.114*B + 0.587*G + 0.299*R
            luminance = 0.114 * calc_color[0] + 0.587 * calc_color[1] + 0.299 * calc_color[2]
            
            # Use black text for light backgrounds, white for dark backgrounds
            text_color = (0, 0, 0) if luminance > 140 else (255, 255, 255)
            # --- End of added functionality ---

            # Get the size of the text box to center it accurately
            (text_width, text_height), baseline = cv2.getTextSize(text, font, font_scale, font_thickness)
            
            # Calculate the bottom-left corner of the text to center it inside the circle
            text_x = x - text_width // 2
            text_y = y + text_height // 2
            
            # Draw the index number using OpenCV's putText function.
            # The text is drawn in white for good contrast against various circle colors.
            # Note: cv2.putText modifies the image in place.
            # The 'org' parameter (the coordinates) MUST be a tuple of integers.
            cv2.putText(
                output[t], 
                text, 
                (int(text_x), int(text_y)), 
                font, 
                font_scale, 
                text_color, # Use the dynamically chosen color
                font_thickness,
                lineType=cv2.LINE_AA # Gives smoother text
            )
            # --- End of updated functionality ---
            
    # Convert the list of frames back to a NumPy array of byte images
    output = rp.as_byte_images(output, copy=False)
    return np.stack(output)

@rp.globalize_locals
def init_tracks():
    global init_points
    # colors = 'white red green blue cyan magenta yellow'.split()
    colors = ordered_colors[:len(init_points)]
    init_points = np.stack(init_points)
    
    cotracker_tracks, cotracker_visibles = rp.run_cotracker(
        input_video, 
        device = device,
        queries = init_points,
    )

    try:
        tracks, visibles = run_tapnet(
            input_video, 
            device = device,
            queries = init_points,
        )
    except Exception:
        print("FAILED TO RUN TAPNET, FALLING BACK TO COTRACKER")
        tracks, visibles = cotracker_tracks, cotracker_visibles

    
    # tracks   = torch.tensor(tracks  ).to(device=device)
    # visibles = torch.tensor(visibles).to(device=device)
    def draw_tracks(input_video, tracks=tracks, visibles=visibles):
        track_preview_video = input_video
        
        for t  in rp.eta(range(T), 'draw_tracks'):
            track = tracks[t]
            visible = visibles[t]
        
            points = [[t, x, y] for x,y in track]
            
            track_preview_video = draw_points_on_video(track_preview_video, points, colors, visible)
    
        return track_preview_video
    # visibles[:]=1
    
    T, N = rp.validate_tensor_shapes(
        tracks             = 'torch: T N XY',
        cotracker_tracks   = 'torch: T N XY',
        visibles           = 'torch: T N',
        cotracker_visibles = 'torch: T N',
        init_points = 'numpy:   N TXY',
        input_video = 'numpy: T H W RGB',
        TXY=3,
        XY=2,
        RGB=3,
        return_dims='T N',
    )
    
    track_preview_video = draw_tracks(input_video)
    
    rp.display_video(gridded_video(track_preview_video))

    num_tracks = tracks.shape[1]
    blob_colors = ordered_colors[:num_tracks]

#### Track Modifiers

In [None]:
def add_drift(tracks, *i, dx=0, dy=0, t_origin=0, do_before=True, do_after=True):
    """
    Adds a drift to tracks at selected indices i
    The drift starts at t_origin and for every timestep before and after dx and dy are added to its x and y
    """
    new_tracks = tracks + 0
    
    # Create time array
    ts = torch.arange(T).to(tracks.device, tracks.dtype)
    
    # Calculate drift for each timestep relative to t_origin
    drift_x = (ts - t_origin) * dx
    drift_y = (ts - t_origin) * dy
    
    # Apply drift to selected track indices
    for idx in i:
        new_tracks[:, idx, 0] += drift_x
        new_tracks[:, idx, 1] += drift_y

    if not do_before:
        new_tracks[:t_origin] = tracks[:t_origin]

    if not do_after:
        new_tracks[t_origin:] = tracks[t_origin:]
    
    return new_tracks


def tween(tracks, i, txy0, txy1):
    tracks = tracks + 0
    
    if isinstance(txy0, int): txy0 = [txy0, *tracks[txy0,i]]
    if isinstance(txy1, int): txy1 = [txy1, *tracks[txy1,i]]
    
    t0, x0, y0 = txy0
    t1, x1, y1 = txy1
    for t in range(t0, t1+1):
        a=rp.iblend(t, t0, t1)
        x=rp.blend(x0, x1, a)
        y=rp.blend(y0, y1, a)
        tracks[t,i,0]=x
        tracks[t,i,1]=y

    return tracks

def zoom_tracks(tracks, *i, t_origin=0, x_origin=None, y_origin=None, d_scale=1.03):
    """
    Applies geometric scaling to tracks at selected indices i
    The scaling starts at t_origin with scale factor changing by d_scale each frame
    x_origin and y_origin default to the mean of the tracks at t_origin if None
    """
    import torch
    
    new_tracks = tracks + 0
    T = tracks.shape[0]
    
    # Set default origins to mean of tracks at t_origin if not specified
    if x_origin is None:
        x_origin = tracks[t_origin, :, 0].mean().item()
    if y_origin is None:
        y_origin = tracks[t_origin, :, 1].mean().item()
    
    # Create time array
    ts = torch.arange(T).to(tracks.device, tracks.dtype)
    
    # Calculate scale factors for each timestep relative to t_origin
    scale_factors = d_scale ** (ts - t_origin)
    
    # Apply scaling to selected track indices
    for idx in i:
        # Center coordinates around origin
        centered_x = new_tracks[:, idx, 0] - x_origin
        centered_y = new_tracks[:, idx, 1] - y_origin
        
        # Apply scaling
        new_tracks[:, idx, 0] = centered_x * scale_factors + x_origin
        new_tracks[:, idx, 1] = centered_y * scale_factors + y_origin
    
    return new_tracks



def horz_mirror(tracks, *i, t_origin=0, x_origin=None):
    """
    Horizontally mirrors tracks at selected indices i around x_origin
    x_origin defaults to the mean x position of the tracks at t_origin if None
    """
    import torch
    
    new_tracks = tracks + 0
    
    # Set default x_origin to mean of tracks at t_origin if not specified
    if x_origin is None:
        x_origin = tracks[t_origin, :, 0].mean().item()
    
    # Apply horizontal mirroring to selected track indices
    for idx in i:
        # Mirror x coordinates around x_origin
        new_tracks[:, idx, 0] = 2 * x_origin - new_tracks[:, idx, 0]
    
    return new_tracks


def horz_mirror_origins(tracks, *i, x_origin=None):
    """
    Horizontally mirrors tracks at selected indices i by mirroring their centroid
    For each frame: calculates mean position of selected points, mirrors that centroid, 
    then applies the delta to all selected points
    """
    import torch
    
    new_tracks = tracks + 0
    
    if x_origin is None:
        # Use center of all tracks as mirror axis
        x_mirror = tracks[:, :, 0].mean(dim=1)  # Shape: [T]
    else:
        # Use custom x_origin as mirror axis
        x_mirror = torch.full((tracks.shape[0],), x_origin, device=tracks.device, dtype=tracks.dtype)
    
    # Calculate centroid of selected points for each frame
    selected_indices = torch.tensor(list(i), device=tracks.device)
    centroid_x = tracks[:, selected_indices, 0].mean(dim=1)  # Shape: [T]
    
    # Mirror the centroid around the mirror axis
    mirrored_centroid_x = 2 * x_mirror - centroid_x
    
    # Calculate delta (how much to move all points)
    delta_x = mirrored_centroid_x - centroid_x
    
    # Apply delta to all selected points
    for idx in i:
        new_tracks[:, idx, 0] += delta_x
    
    return new_tracks




def reverse_tracks(tracks, visibles, *indices):
    tracks = tracks + 0
    visibles = visibles + 0
    for index in indices:
        tracks[:,index]=tracks[:,index].flip(0)
        visibles[:,index]=visibles[:,index].flip(0)
    return tracks, visibles


def speed_tracks(tracks, visibles, *i, factor=1.0, t_origin=0, do_before=True, do_after=True):
    """
    Dilates tracks in time at selected indices i by the given factor
    factor > 1.0 slows down motion (dilates time), factor < 1.0 speeds up motion
    The dilation is centered at t_origin
    """
    import torch
    
    new_tracks = tracks + 0
    new_visibles = visibles + 0
    T = tracks.shape[0]
    
    # Create time array
    ts = torch.arange(T).to(tracks.device, tracks.dtype).float()
    
    # Calculate dilated time coordinates relative to t_origin
    dilated_ts = (ts - t_origin) * factor + t_origin
    
    # Apply time dilation to selected track indices
    for idx in i:
        # Interpolate positions at dilated time points
        
        original_x = tracks[:, idx, 0]
        original_y = tracks[:, idx, 1]
        
        # Use linear interpolation to get positions at dilated times
        import torch.nn.functional as F
        
        # Reshape for interpolation: [1, 1, T] format
        x_interp = F.interpolate(
            original_x.unsqueeze(0).unsqueeze(0), 
            size=T, 
            mode='linear', 
            align_corners=True
        ).squeeze()
        
        y_interp = F.interpolate(
            original_y.unsqueeze(0).unsqueeze(0), 
            size=T, 
            mode='linear', 
            align_corners=True
        ).squeeze()
        
        # Map dilated times to original indices for interpolation
        valid_mask = (dilated_ts >= 0) & (dilated_ts < T-1)
        dilated_ts_clamped = torch.clamp(dilated_ts, 0, T-1)
        
        # Linear interpolation manually
        floor_indices = torch.floor(dilated_ts_clamped).long()
        ceil_indices = torch.clamp(floor_indices + 1, 0, T-1)
        alpha = dilated_ts_clamped - floor_indices.float()
        
        new_tracks[:, idx, 0] = (1 - alpha) * original_x[floor_indices] + alpha * original_x[ceil_indices]
        new_tracks[:, idx, 1] = (1 - alpha) * original_y[floor_indices] + alpha * original_y[ceil_indices]
        
        # Apply same time dilation to visibles
        original_vis = visibles[:, idx]
        new_visibles[:, idx] = (1 - alpha) * original_vis[floor_indices].float() + alpha * original_vis[ceil_indices].float()
        new_visibles[:, idx] = (new_visibles[:, idx] > 0.5).to(visibles.dtype)
    
    if not do_before:
        new_tracks[:t_origin] = tracks[:t_origin]
        new_visibles[:t_origin] = visibles[:t_origin]
    
    if not do_after:
        new_tracks[t_origin:] = tracks[t_origin:]
        new_visibles[t_origin:] = visibles[t_origin:]
    
    return new_tracks, new_visibles


    
def resize_list_linterp(values, length: int):
    """Resize tensor along first dimension using linear interpolation.
    
    Args:
        values: Input tensor of any dimensionality >=1
        length: Target length for first dimension
        
    Returns:
        Tensor with shape (length, *values.shape[1:])
    """
    import torch
    
    if length == 0:
        return torch.tensor([])
    if length == 1:
        return values[-1:]
    if len(values) == 1:
        return values.repeat([length] + [1] * (values.ndim - 1))
    
    # Flatten all dimensions except first, interpolate, then reshape back
    original_shape = values.shape
    values_flat = values.view(original_shape[0], -1)
    
    interpolated = torch.nn.functional.interpolate(
        values_flat.t().unsqueeze(0).float(),
        size=length,
        mode='linear',
        align_corners=True
    ).squeeze(0).t().to(values.dtype)
    
    return interpolated.view(length, *original_shape[1:])


#### Drawing

In [None]:
def draw_arrows(video, old_tracks, new_tracks, old_visibles, new_visibles):
    out=[]
    for frame, old_track, new_track, old_viz, new_viz in zip(video, old_tracks, new_tracks, old_visibles, new_visibles):
        start_x, start_y = old_track.T
        end_x, end_y = new_track.T
        visible = [ov * nv for ov,nv in zip(old_viz, new_viz)]
        
        frame = rp.cv_draw_arrows(frame, start_x, start_y, end_x, end_y, color=blob_colors, tip_length=0, visible=visible)
        out.append(frame)
    return rp.as_numpy_array(out)
        

In [None]:
@rp.globalize_locals
def display_tracks_diff(_secondary_video=None):
    _secondary_video = _secondary_video if _secondary_video is not None else input_video
    before_preview, after_preview = rp.labeled_videos(
        [
            gridded_video(draw_tracks(input_video, tracks, visibles)),
            # gridded_video(draw_tracks(input_video, new_tracks, new_visibles)),
            gridded_video(draw_arrows(draw_tracks(_secondary_video, new_tracks, new_visibles), tracks, new_tracks, visibles, new_visibles))
        ],
        ["Counterfactual Input", "Target"],
        size=30,
        font="Futura",
        
    )
    
    rp.display_video(
        rp.labeled_images(
        rp.horizontally_concatenated_videos(
            before_preview,
            [rp.cv_resize_image(rp.bordered_image_solid_color((rp.pil_text_to_image('\n>',font='Menlo',size=200)),color='black',thickness=30,),1/3)],
            after_preview,
            origin='center',
        ),
            f'{input_video_path}\n{prompt}',font='Futura',position='bottom',size=20,size_by_lines=True,text_color='light blue',
        )
    )

In [None]:
def tracks_to_xyzv(tracks, visibles):
    #T N XY -> T N XYZV

    #I currently don't care about depth
    z = torch.ones_like(tracks[:,:,0])

    tracks_xyzv, _ = einops.pack([tracks, z, visibles], 'T N *')

    rp.validate_tensor_shapes(
        tracks      = 'torch: T N XY',
        visibles    = 'torch: T N',
        z           = 'torch: T N',
        tracks_xyzv = 'torch: T N XYZV',
        XYZV=4,
        XY=2,
    )

    return tracks_xyzv

@rp.globalize_locals
def init_blob_videos():
    global visibles, new_visibles
    
    rp.display_image(rp.labeled_image(rp.tiled_images([rp.uniform_float_color_image(64,64,color) for color in blob_colors   ],border_thickness=0), "Chosen Blob Colors", size=30))
    rp.display_image(rp.labeled_image(rp.tiled_images([rp.uniform_float_color_image(64,64,color) for color in ordered_colors],border_thickness=0), "All Blob Colors", size=30))
                                                                
    blobs_videos = draw_blobs_videos(
        video         = rp.as_torch_video(input_video),
        counter_video = rp.as_torch_video(input_video),
        video_tracks         = tracks_to_xyzv(new_tracks, new_visibles),
        counter_tracks = tracks_to_xyzv(tracks, visibles),
        sigma = 10,
        blob_colors = blob_colors,
    )
    
    video_gaussians, counter_video_gaussians = rp.destructure(blobs_videos)
    
    #RGBA -> RGB
    video_gaussians         = video_gaussians        [:,:3]
    counter_video_gaussians = counter_video_gaussians[:,:3]
    
    rp.validate_tensor_shapes(
        video_gaussians         = 'torch: T 3 H W',
        counter_video_gaussians = 'torch: T 3 H W',
        input_video             = 'numpy: T H W 3',
    )
    
    #In range [0, 1]
    assert 0<=counter_video_gaussians.min()<=counter_video_gaussians.max()<=1
    assert 0<=video_gaussians        .min()<=video_gaussians        .max()<=1
    
    rp.display_video(rp.tiled_videos(rp.as_numpy_videos([video_gaussians, counter_video_gaussians]),border_color='white',border_thickness=1))



def uncamera(points: torch.Tensor, ref_points: torch.Tensor, origin_frame: int = 0) -> torch.Tensor:
    """
    Removes camera motion from points using a cascading transformation fit.

    This function stabilizes points by calculating the transformation that maps
    reference points from each frame to a specified origin frame. It uses the
    most powerful transformation possible given the number of reference points:
    - 4+ points: Homography (perspective)
    - 3 points: Affine (translation, rotation, scaling, shear)
    - 2 points: Euclidean (translation, rotation, uniform scaling)
    - 1 point: Translation only

    Args:
        points (torch.Tensor): A tensor of shape [T, N, 2] containing the
            target points to be stabilized.
        ref_points (torch.Tensor): A tensor of shape [T, M, 2] containing
            the reference points for stabilization. M must be 1 or greater.
        origin_frame (int, optional): The index of the frame to use as the
            stable reference. All other frames will be aligned to this one.
            Defaults to 0.

    Returns:
        torch.Tensor: A new tensor of shape [T, N, 2] with the camera
            motion removed from the input `points`.
    """
    # --- Input Validation and Setup ---
    if not isinstance(points, torch.Tensor) or not isinstance(ref_points, torch.Tensor):
        raise TypeError("Inputs 'points' and 'ref_points' must be PyTorch tensors.")
    if points.dim() != 3 or ref_points.dim() != 3 or points.shape[2] != 2 or ref_points.shape[2] != 2:
        raise ValueError("Inputs must be of shape [T, N, 2] or [T, M, 2].")
    if points.shape[0] != ref_points.shape[0]:
        raise ValueError("Both 'points' and 'ref_points' must have the same number of frames (T).")
        
    num_ref_points = ref_points.shape[1]
    if num_ref_points < 1:
        raise ValueError(
            f"At least 1 reference point is required to calculate a transformation, "
            f"but got {num_ref_points} points."
        )

    num_frames, _, _ = points.shape
    device, dtype = points.device, points.dtype

    new_points = torch.empty_like(points)
    ref_points_np = ref_points.cpu().numpy().astype(np.float32)
    ref_points_dst = ref_points_np[origin_frame]

    # --- Fallback transformations for failed calculations ---
    last_H = np.identity(3, dtype=np.float32)
    last_A = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], dtype=np.float32)
    last_T = np.zeros(2, dtype=np.float32)

    # --- Main Loop: Iterate Through Frames ---
    for t in range(num_frames):
        if t == origin_frame:
            new_points[t] = points[t].clone()
            continue

        ref_points_src = ref_points_np[t]
        points_t_np = points[t].cpu().numpy().astype(np.float32)
        transformed_points_t_np = None

        # --- Find and Apply Transformation based on number of points ---
        if num_ref_points >= 4:
            H, _ = cv2.findHomography(ref_points_src, ref_points_dst, 0)
            if H is None: H = last_H
            else: last_H = H
            points_t_reshaped = points_t_np.reshape(1, -1, 2)
            transformed_points_t_np = cv2.perspectiveTransform(points_t_reshaped, H)

        elif num_ref_points == 3:
            A = cv2.getAffineTransform(ref_points_src, ref_points_dst)
            if A is None: A = last_A
            else: last_A = A
            points_t_reshaped = points_t_np.reshape(1, -1, 2)
            transformed_points_t_np = cv2.transform(points_t_reshaped, A)

        elif num_ref_points == 2:
            # estimateAffinePartial2D computes an optimal Euclidean transform
            A, _ = cv2.estimateAffinePartial2D(ref_points_src, ref_points_dst)
            if A is None: A = last_A
            else: last_A = A
            points_t_reshaped = points_t_np.reshape(1, -1, 2)
            transformed_points_t_np = cv2.transform(points_t_reshaped, A)

        elif num_ref_points == 1:
            # Simple translation based on the single point's displacement
            translation = ref_points_dst[0] - ref_points_src[0]
            if np.isnan(translation).any(): translation = last_T
            else: last_T = translation
            # Apply translation by simple addition
            transformed_points_t_np = points_t_np + translation

        # --- Store Result ---
        if transformed_points_t_np is not None:
            # Reshape is needed for all matrix-based transforms
            transformed_points_t_reshaped = transformed_points_t_np.reshape(-1, 2)
            new_points[t] = torch.from_numpy(transformed_points_t_reshaped).to(device, dtype)
        else:
            # Fallback if something unexpected happens
            new_points[t] = points[t].clone()

    return new_points

def recamera(points: torch.Tensor, ref_points: torch.Tensor, origin_frame: int = 0) -> torch.Tensor:
    """
    Re-applies camera motion to points, acting as the inverse of uncamera.

    This function transforms points from the coordinate system of a stable
    origin frame back into the coordinate system of each original frame. It
    calculates the inverse of the transformation used in `uncamera` by swapping
    the source and destination reference points.

    Args:
        points (torch.Tensor): A tensor of shape [T, N, 2] containing the
            stabilized points (presumably from `uncamera`).
        ref_points (torch.Tensor): A tensor of shape [T, M, 2] containing
            the same reference points used for the original `uncamera` call.
        origin_frame (int, optional): The index of the frame that was used as
            the stable reference in the `uncamera` call. Defaults to 0.

    Returns:
        torch.Tensor: A new tensor of shape [T, N, 2] with the original
            camera motion re-introduced to the input `points`.
    """
    # --- Input Validation and Setup ---
    if not isinstance(points, torch.Tensor) or not isinstance(ref_points, torch.Tensor):
        raise TypeError("Inputs 'points' and 'ref_points' must be PyTorch tensors.")
    if points.dim() != 3 or ref_points.dim() != 3 or points.shape[2] != 2 or ref_points.shape[2] != 2:
        raise ValueError("Inputs must be of shape [T, N, 2] or [T, M, 2].")
    if points.shape[0] != ref_points.shape[0]:
        raise ValueError("Both 'points' and 'ref_points' must have the same number of frames (T).")

    num_ref_points = ref_points.shape[1]
    if num_ref_points < 1:
        raise ValueError(
            f"At least 1 reference point is required to calculate a transformation, "
            f"but got {num_ref_points} points."
        )

    num_frames, _, _ = points.shape
    device, dtype = points.device, points.dtype

    new_points = torch.empty_like(points)
    ref_points_np = ref_points.cpu().numpy().astype(np.float32)
    
    # The source for the inverse transform is the origin frame
    ref_points_src_inv = ref_points_np[origin_frame]

    # --- Fallback transformations for failed calculations ---
    last_H = np.identity(3, dtype=np.float32)
    last_A = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], dtype=np.float32)
    last_T = np.zeros(2, dtype=np.float32)

    # --- Main Loop: Iterate Through Frames ---
    for t in range(num_frames):
        if t == origin_frame:
            new_points[t] = points[t].clone()
            continue

        # The destination for the inverse transform is the current frame t
        ref_points_dst_inv = ref_points_np[t]
        points_t_np = points[t].cpu().numpy().astype(np.float32)
        transformed_points_t_np = None

        # --- Find and Apply Inverse Transformation ---
        # We find the inverse by swapping the source and destination points
        if num_ref_points >= 4:
            H, _ = cv2.findHomography(ref_points_src_inv, ref_points_dst_inv, 0)
            if H is None: H = last_H
            else: last_H = H
            points_t_reshaped = points_t_np.reshape(1, -1, 2)
            transformed_points_t_np = cv2.perspectiveTransform(points_t_reshaped, H)

        elif num_ref_points == 3:
            A = cv2.getAffineTransform(ref_points_src_inv, ref_points_dst_inv)
            if A is None: A = last_A
            else: last_A = A
            points_t_reshaped = points_t_np.reshape(1, -1, 2)
            transformed_points_t_np = cv2.transform(points_t_reshaped, A)

        elif num_ref_points == 2:
            A, _ = cv2.estimateAffinePartial2D(ref_points_src_inv, ref_points_dst_inv)
            if A is None: A = last_A
            else: last_A = A
            points_t_reshaped = points_t_np.reshape(1, -1, 2)
            transformed_points_t_np = cv2.transform(points_t_reshaped, A)

        elif num_ref_points == 1:
            # Inverse translation is the displacement from origin to current frame
            translation = ref_points_dst_inv[0] - ref_points_src_inv[0]
            if np.isnan(translation).any(): translation = last_T
            else: last_T = translation
            transformed_points_t_np = points_t_np + translation

        # --- Store Result ---
        if transformed_points_t_np is not None:
            transformed_points_t_reshaped = transformed_points_t_np.reshape(-1, 2)
            new_points[t] = torch.from_numpy(transformed_points_t_reshaped).to(device, dtype)
        else:
            # Fallback if something unexpected happens
            new_points[t] = points[t].clone()
            
    return new_points



## DIFFUSION HELPERS

In [None]:
@rp.globalize_locals
def init_sample():
    result_title = rp.get_file_name(input_video_path, include_file_extension=False)
    
    result_folder = f'untracked/gaussblob_tests/{result_title}'
    result_folder = rp.get_unique_copy_path(result_folder)
    rp.make_directory(result_folder)
    
    ic(result_folder)
    
    sample = rp.as_easydict(
        frames               = rp.as_torch_video(input_video) * 2 - 1, #This one doesn't matter
        counter_video_frames = rp.as_torch_video(input_video) * 2 - 1, 
        tracking_frames         = video_gaussians             * 2 - 1,
        counter_tracking_frames = counter_video_gaussians     * 2 - 1,
        prompt = prompt,
    )
    
    #SWAP
    # sample.frames         , sample.counter_video_frames    = sample.counter_video_frames   , sample.frames         
    # sample.tracking_frames, sample.counter_tracking_frames = sample.counter_tracking_frames, sample.tracking_frames

In [None]:
@rp.globalize_locals
def init_mp4_files():
    rp.display_video(sample.frames                  / 2 + 0.5)
    rp.display_video(sample.tracking_frames         / 2 + 0.5)
    rp.display_video(sample.counter_tracking_frames / 2 + 0.5)
    rp.display_video(sample.counter_video_frames    / 2 + 0.5)
    
    frames                  = rp.as_numpy_images(sample.frames                  / 2 + 0.5)
    tracking_frames         = rp.as_numpy_images(sample.tracking_frames         / 2 + 0.5)
    counter_tracking_frames = rp.as_numpy_images(sample.counter_tracking_frames / 2 + 0.5)
    counter_video_frames    = rp.as_numpy_images(sample.counter_video_frames    / 2 + 0.5)
    
    with rp.SetCurrentDirectoryTemporarily(result_folder):
        frames_path                  = rp.save_video_mp4(frames                 , "frames.mp4",                  framerate=20, video_bitrate="max", show_progress=False)
        tracking_frames_path         = rp.save_video_mp4(tracking_frames        , "tracking_frames.mp4",         framerate=20, video_bitrate="max", show_progress=False)
        counter_tracking_frames_path = rp.save_video_mp4(counter_tracking_frames, "counter_tracking_frames.mp4", framerate=20, video_bitrate="max", show_progress=False)
        counter_video_frames_path    = rp.save_video_mp4(counter_video_frames   , "counter_video_frames.mp4",    framerate=20, video_bitrate="max", show_progress=False)
    
    prompt = sample.prompt
    
    ic(
        prompt                      ,
        frames_path                 ,
        tracking_frames_path        ,
        counter_tracking_frames_path,
        counter_video_frames_path   ,
    )

In [None]:
@rp.globalize_locals
def do_diffusion():
    global pipe, device
    
    ##########################
    # IMPORTS
    ##########################
    
    import sys
    import os
    import shlex
    
    from functools import cached_property
    
    import models.cogvideox_tracking as cogtrack
    import rp
    import torch
    from icecream import ic
    
    import numpy as np
    
    sys.path += rp.get_absolute_paths(
        [
            "~/CleanCode/Management",
            # "~/CleanCode/Github/DiffusionAsShader",
            # "~/CleanCode/Datasets/Vids/Raw_Feb28",
            # "~/CleanCode/Github/CogvideX-Interpolation-Mar23:MotionPrompting",
            # "~/CleanCode/Github/CogvideX-Interpolation-Feb13:Inpainting",
        ]
    )
    
    import syncutil
    
    device = rp.select_torch_device(prefer_used=True, reserve=True)
    
    ##########################
    # FUNCTIONS
    ##########################
    
    CKPT_folder = rp.path_join(das_root,'diffusion_shader_model_CKPT')
    CKPT_transformer_folder = rp.path_join(CKPT_folder, 'transformer')
    
    def update_to_latest_checkpoint():
    
        # if not rp.folder_exists(CKPT_folder):
        rp.r._run_sys_command(f'rm -rf {CKPT_folder}')
        rp.r._run_sys_command(
            f'cp -al /home/jupyter/CleanCode/Github/DiffusionAsShader/diffusion_shader_model {CKPT_folder}'
            # f'cp -al /home/jupyter/CleanCode/Huggingface/CogVideoX-5b {CKPT_folder}'
        )
        
        latest_transformer_checkpoint = checkpoint_root
        
        rp.fansi_print(f'Using checkpoint: {latest_transformer_checkpoint}','bold green undercurl')
    
        rp.r._run_sys_command(
            "rm",
            "-rf",
            CKPT_transformer_folder,
        )
        rp.make_hardlink(
            rp.path_join(latest_transformer_checkpoint, "transformer"),
            CKPT_transformer_folder,
            recursive=True,
        )
        
    
    def get_maps(video_path):
        from diffusers.utils import export_to_video, load_image, load_video
    
        video_path=rp.get_absolute_path(video_path)
    
        maps = load_video(video_path)
        # Convert list of PIL Images to tensor [T, C, H, W]
        maps = torch.stack(
            [
                torch.from_numpy(np.array(frame)).permute(2, 0, 1).float() / 255.0
                for frame in maps
            ]
        )
        maps = maps.to(device=device, dtype=torch.bfloat16)
    
        print(f"Encoding tracking maps from {video_path}")
        maps = maps.unsqueeze(0)  # [B, T, C, H, W]
        maps = maps.permute(0, 2, 1, 3, 4)  # [B, C, T, H, W]
        
        maps = maps * 2 - 1 #Normalize from [0,1] to [-1, 1]
        
        with torch.no_grad():
            latent_dist = pipe.vae.encode(maps).latent_dist
            maps = latent_dist.sample() * pipe.vae.config.scaling_factor
            maps = maps.permute(0, 2, 1, 3, 4)  # [B, F, C, H, W]
        
        return maps
    
    def load_video_first_frame(video_path):
        return image_form(next(rp.load_video_stream(rp.get_absolute_path(video_path))))
    
    def image_form(image):
        image=rp.as_rgb_image(image)
        return rp.as_pil_image(image)
    
    @rp.globalize_locals
    def run_pipe(
        prompt                    = prompt                      ,
        video_path                = frames_path                 ,
        tracking_map_path         = tracking_frames_path        ,
        counter_tracking_map_path = counter_tracking_frames_path,
        counter_video_map_path    = counter_video_frames_path   ,
    ):
        ic(
            prompt,
            video_path,
            tracking_map_path,
            counter_tracking_map_path,
            counter_video_map_path,
        )
    
        # prompt = ''
        # fansi_print("LOOK MA NO PROMPT",'blue')
    
        pipeline_args = {
            "prompt"                 : prompt,
            "image"                  : load_video_first_frame(video_path),
            "tracking_image"         : load_video_first_frame(tracking_map_path),
            "counter_tracking_image" : load_video_first_frame(counter_tracking_map_path),
            "counter_video_image"    : load_video_first_frame(counter_video_map_path),
            "tracking_maps"          : get_maps(tracking_map_path),
            "counter_tracking_maps"  : get_maps(counter_tracking_map_path),
            "counter_video_maps"     : get_maps(counter_video_map_path),
            "negative_prompt"        : "The video is not of a high quality, it has a low resolution. Watermark present in each frame. The background is solid. Strange body and strange trajectory. Distortion.",
            "height"              : 480,
            "width"               : 720,
            "num_frames"          : 49,
            "use_dynamic_cfg"     : True,
            "guidance_scale"      : guidance_scale, #3 if rp.random_chance() else 6,
            "num_inference_steps" : num_inference_steps,
        }
    
        rp.display_dict(rp.gather(pipeline_args,'prompt negative_prompt height width num_frames use_dynamic_cfg guidance_scale num_inference_steps'.split(), as_dict=True))
    
        pipeline_args |= dict(          
            use_image_conditioning=True,
            # latent_conditioning_dropout=[0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
            latent_conditioning_dropout=latent_conditioning_dropout,#[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], #Weird, not as good actually...
            # latent_conditioning_dropout=[1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1], #Sparse...25%
    
            # use_image_conditioning=False,
            # latent_conditioning_dropout=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], #Weird, not as good actually...
            # latent_conditioning_dropout=[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1],
            # latent_conditioning_dropout=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
            #latent_conditioning_dropout=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        )
    
        pipeline_args = rp.as_easydict(pipeline_args)
    
        with torch.no_grad():
            results=pipe(**pipeline_args)
        
        video=results.frames[0]
        video=rp.as_numpy_images(video)
        video = rp.labeled_images(
            video,
            f"PROMPT={repr(prompt[:50])}\nCFG={pipeline_args.guidance_scale} DYN-CFG={pipeline_args.use_dynamic_cfg} STEPS={pipeline_args.num_inference_steps} {''.join(map(str,pipeline_args['latent_conditioning_dropout']))}",
            size=-25,
            background_color="translucent dark blue",
            size_by_lines=False,
        )
    
        video = rp.as_numpy_array(video)
        
        return video
    
    ##########################
    # SETTINGS
    ##########################
    
    # checkpoint_root = '/home/jupyter/CleanCode/Github/DiffusionAsShader/ckpts/your_ckpt_path/CounterChans2500100000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-4500'
    # checkpoint_root = '/home/jupyter/CleanCode/Github/DiffusionAsShader/ckpts/your_ckpt_path/CounterChans_RandomSpeed_2500_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-1100'
    # checkpoint_root = '/home/jupyter/CleanCode/Github/DiffusionAsShader/ckpts/your_ckpt_path/CounterChans_RandomSpeed_2500_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-6000'
    # checkpoint_root = '/home/jupyter/CleanCode/Github/DiffusionAsShader/ckpts/your_ckpt_path/CounterChans_RandomSpeed_WithDropout_2500_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-3000'
    # checkpoint_root = '/home/jupyter/CleanCode/Github/DiffusionAsShader/ckpts/your_ckpt_path/CounterChans_RandomSpeed_WithDropout_2500_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-9200'
    # checkpoint_root = '/home/jupyter/CleanCode/Github/DiffusionAsShader/ckpts/your_ckpt_path/CounterChans_RandomSpeed_WithDropout_2500_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-14700'
    # checkpoint_root = '/home/jupyter/CleanCode/Github/DiffusionAsShader/ckpts/your_ckpt_path/CounterChans_RandomSpeed_WithDropout_2500_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-29000'
    # checkpoint_root = '/home/jupyter/CleanCode/Github/DiffusionAsShader/ckpts/your_ckpt_path/CounterChans_RandomSpeed_WithDropout_2500_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-29000'
    # checkpoint_root = '/home/jupyter/CleanCode/Github/DiffusionAsShader/ckpts/your_ckpt_path/CounterChans_RandomSpeed_WithDropout_2500_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-29000'
    # checkpoint_root = '/home/jupyter/CleanCode/Github/DiffusionAsShader/ckpts/your_ckpt_path/CounterChans_BetterAug_WithDropout_50kSamp_T2V_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-9000'
    # checkpoint_root = '/home/jupyter/CleanCode/Github/DiffusionAsShader/ckpts/your_ckpt_path/CounterChans_BetterAug_WithDropout_50kSamp_T2V_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-9000'
    checkpoint_root = '/home/jupyter/CleanCode/Github/DaS_Trees/gauss_blobs/ckpts/your_ckpt_path/CounterChans_FIXED_DATASET_BetterAug_WithDropout_50kSamp_T2V_from_scratch_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-3500'
    checkpoint_root = '/home/jupyter/CleanCode/Github/DaS_Trees/gauss_blobs/ckpts/your_ckpt_path/CounterChans_FIXED_DATASET_BetterAug_WithDropout_50kSamp_T2V_from_scratch_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-4500'
    checkpoint_root = '/home/jupyter/CleanCode/Github/DaS_Trees/gauss_blobs/ckpts/your_ckpt_path/CounterChans_FIXED_DATASET_BetterAug_WithDropout_50kSamp_T2V_from_scratch_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-13500'
    checkpoint_root = '/home/jupyter/CleanCode/Github/DaS_Trees/gauss_blobs/ckpts/your_ckpt_path/CounterChans_FIXED_DATASET_BetterAug_WithDropout_50kSamp_T2V_from_scratch_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-23000'
    # checkpoint_root = '/home/jupyter/CleanCode/Github/DaS_Trees/gauss_blobs/ckpts/your_ckpt_path/CounterBlobs_WithSingleframe_ManyColors_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-2500'
    # checkpoint_root = '/home/jupyter/CleanCode/Github/DaS_Trees/gauss_blobs/ckpts/your_ckpt_path/CounterBlobs_WithSingleframe_ManyColors_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-2500'
    checkpoint_root = '/home/jupyter/CleanCode/Github/DaS_Trees/gauss_blobs/ckpts/your_ckpt_path/CounterBlobs_SingleFrameONLY_ManyColors_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-500'
    checkpoint_root = '/home/jupyter/CleanCode/Github/DaS_Trees/gauss_blobs/ckpts/your_ckpt_path/gauss_blobs_track2pointONLY10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-1500'
    checkpoint_root = '/home/jupyter/CleanCode/Github/DaS_Trees/gauss_blobs/ckpts/your_ckpt_path/CounterBlobs_WithSingleframe_ManyColors_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-3500'
    # checkpoint_root = '/home/jupyter/CleanCode/Github/DaS_Trees/gauss_blobs/ckpts/your_ckpt_path/CounterChans_FIXED_DATASET_BetterAug_WithDropout_50kSamp_T2V_from_scratch_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-23000'
    # checkpoint_root = '/home/jupyter/CleanCode/Github/DaS_Trees/gauss_blobs/ckpts/your_ckpt_path/CounterChans_FIXED_DATASET_BetterAug_WithDropout_50kSamp_T2V_from_scratch_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-27000'
    checkpoint_root = '/home/jupyter/CleanCode/Github/DaS_Trees/gauss_blobs/ckpts/your_ckpt_path/CounterBlobs_WithSingleframe_ManyColors_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-4000'
    checkpoint_root = '/home/jupyter/CleanCode/Github/DaS_Trees/gauss_blobs/ckpts/your_ckpt_path/CounterChans_FIXED_DATASET_BetterAug_WithDropout_50kSamp_T2V_from_scratch_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-30000'
    # checkpoint_root = '/home/jupyter/CleanCode/Github/DaS_Trees/gauss_blobs/ckpts/your_ckpt_path/CounterBlobs_WithSingleframe_ManyColors_10000000__optimizer_adamw__lr-schedule_cosine_with_restarts__learning-rate_1e-4/checkpoint-8000'#Overfit?
    
    
    checkpoint_title = rp.get_folder_name(checkpoint_root)
    
    USE_T2V=True
    # USE_T2V=False
    
    if USE_T2V:
        os.environ['T2V_TRANSFORMER_CHECKPOINT'] = "/home/jupyter/CleanCode/Huggingface/CogVideoX-5b/transformer"
    
    NO_CONTROLNET=False
    if NO_CONTROLNET:
        os.environ['DISABLE_CONTROLNET'] = "True"
    
    
    ##########################
    # SETUP
    ##########################
        
    latest_transformer_checkpoint = syncutil.sync_checkpoint_folder(checkpoint_root)
    
    # rp.set_current_directory('/home/jupyter/CleanCode/Github/DiffusionAsShader')
    # if not rp.file_exists('source/datasets/youtube/DaS/Vanilla/prompt.txt'):
    #     rp.r._run_sys_command('python source/datasets/youtube/DaS/Vanilla/make_columns.py')
    # if not rp.folder_exists('diffusion_shader_model_CKPT'):
    #     rp.make_hardlink('diffusion_shader_model','diffusion_shader_model_CKPT',recursive=True)
    
    if "pipe" not in globals():
        print("INITIALIZING PIPE")
        update_to_latest_checkpoint()
        pipe = cogtrack.CogVideoXImageToVideoPipelineTracking.from_pretrained(
            CKPT_folder,
        )
    
    pipe.to(dtype=torch.bfloat16)
    pipe.to(device)
    #pipe.enable_sequential_cpu_offload(device=device)
    pipe.vae.enable_slicing()
    pipe.vae.enable_tiling()
    pipe.transformer.eval()
    pipe.text_encoder.eval()
    pipe.vae.eval()
    
    ##########################
    # MAIN
    ##########################
    
    output_video = run_pipe()

In [None]:
def overlay_tracks(frames, track_frames):
    rp.validate_tensor_shapes(
        frames="numpy: T H W 3",
        track_frames="numpy: T H W 3",
    )
    alpha = track_frames.max(-1, keepdims=True)
    output = alpha * track_frames + (1 - alpha) * frames
    return output


def text_symbol(x):
    return rp.pil_text_to_image(
        x, font="DejaVuSerif", size=200, color="white", background_color="black"
    )

@rp.globalize_locals
def save_diffusion_results():
    global video
    
    arrow_image = text_symbol("→")
    plus_image = text_symbol("+")
    approx_image = text_symbol("≈")
    
    video = rp.as_numpy_array(video)[:, :, :, :3]
    
    preview_video = rp.horizontally_concatenated_videos(
        rp.labeled_images(
            overlay_tracks(counter_video_frames, counter_tracking_frames),
            "Counterfactual Input",
            font='Arial',
            size=20,
        ),
        [arrow_image],
        rp.labeled_images(overlay_tracks(video, tracking_frames), "Diffusion Output", size=20,         font='Arial'),
    
        ##THESE DONT HAVE GROUND TRUTH
        # [approx_image],
        # rp.labeled_images(overlay_tracks(frames, tracking_frames), "Ground Truth"),
        
        origin="center",
    )
    
    preview_video = rp.labeled_images(preview_video, rp.line_join(checkpoint_root,prompt), size_by_lines=True)
    preview_video = rp.labeled_images(preview_video, TITLE, size=30,position='top',text_color='green yellow', font='Arial')
    
    rp.display_video(preview_video)
    
    preview_video_path = 'untracked/inferblobs_outputs/'+rp.get_folder_name(checkpoint_root)+'__'+result_title+'.mp4'
    preview_video_path = rp.get_unique_copy_path(preview_video_path)
    
    
    
    rp.make_parent_directory(preview_video_path)
    rp.save_video_mp4(preview_video, preview_video_path, framerate=30, show_progress=False)
    
    
    ic(preview_video_path);

## EDIT VIDEOS

In [None]:
#WORKER
seed = rp.millis()%10000 ; rp.seed_all(seed)

input_video_path = "MoveTheCar.mp4"
prompt = 'A minivan with a bunch of colorful baloons is driving through a dusty desert highway with power pylons in the top right of the screen'

TITLE = f"[Seed {seed}] Move the car faster forward"

init_input_video()

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=6
num_inference_steps=20

##############################

#In TXY form
init_points = [
    [0, 580, 300], #0 white ::  Car
    [0, 680, 260], #1 red ::  Balloons
    [0, 650, 365], #2 green ::  License Plate
    [32, 600, 60], #3 blue ::  Road 1
    # [0, 343, 185], #3 blue ::  Road 1
    [27, 100, 200], #4 cyan ::  Left of road
    [24, 600, 200], #5 magenta ::  Right dirt
    [48, 400, 250], #6 yellow ::  
    # [0, 100, 100], #7 gray ::  
    # [0, 150, 150], #7 dark blue ::  
    # [0, 200, 200], #7 dark green ::  
    # [0, 250, 250], #7 dark red ::  
    # [0, 300, 300], #7 dark cyan ::  
    # [0, 350, 350], #7 dark magenta ::  
    # [0, 400, 400], #7 dark yellow ::  
]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

new_tracks = tracks + 0
new_visibles = visibles + 0
# new_tracks = add_drift(new_tracks,  1, 3, dx=30, dy=0, t_origin=30, do_before=False)
# new_tracks = add_drift(new_tracks,  2, 5, 6, dx=0, dy=-5, t_origin=25)
# new_tracks = add_drift(new_tracks, 5 , dx=-7, dy=20, t_origin=19, do_before=False)
# new_tracks = add_drift(new_tracks, 2 , dx=-5, dy=0, t_origin=11, do_before=False)
# new_tracks = add_drift(new_tracks, 2 , dx=5, dy=0, t_origin=38, do_before=False, do_after=True)
# new_tracks = add_drift(new_tracks, 2 , dx=0, dy=5, t_origin=38, do_before=False, do_after=True)
new_tracks[10:,:3] = new_tracks[10:11,:3] #Freeze pos
new_tracks = add_drift(new_tracks, 0,1,2 , dx=-5, dy=-4, t_origin=0, do_before=False, do_after=True)
new_visibles[:,:3]=1
new_tracks = zoom_tracks(new_tracks, 0, 1, 2, d_scale = .99, t_origin = 10)

# for track_num in len(init_points):
    

# new_tracks = horz_mirror_tracks(new_tracks, 0, 1, 2, x_origin=450)

# new_visibles[35:45,1]=0
# new_tracks, new_visibles = reverse_tracks(new_tracks, new_visibles, 0,4)

display_tracks_diff()

##############################

rp.display_video(gridded_video(draw_arrows(draw_tracks(input_video, new_tracks, new_visibles), tracks, new_tracks, visibles, new_visibles)))

init_blob_videos()

##############################

init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()

In [None]:
#WORKER
TITLE = "Move the car to the other lane and make it go faster"

input_video_path = "MoveTheCar.mp4"
prompt = 'A minivan with a bunch of colorful baloons is driving through a dusty desert highway with power pylons in the top right of the screen'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=6
num_inference_steps=10

seed = rp.millis()%10000 ; rp.seed_all(seed)
TITLE = f"[Seed {seed}] {TITLE}"

init_input_video()

##############################

#In TXY form
init_points = [
    [0, 580, 300], #0 white ::  Car
    # [0, 680, 260], #1 red ::  Balloons
    # [0, 650, 365], #2 green ::  License Plate
    [32, 600, 60], #3 blue ::  Road 1
    # [0, 343, 185], #3 blue ::  Road 1
    [27, 100, 200], #4 cyan ::  Left of road
    [24, 600, 200], #5 magenta ::  Right dirt
    [48, 400, 250], #6 yellow ::  
]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

new_tracks = tracks + 0
new_visibles = visibles + 0
# new_tracks = add_drift(new_tracks,  1, 3, dx=30, dy=0, t_origin=30, do_before=False)
# new_tracks = add_drift(new_tracks,  2, 5, 6, dx=0, dy=-5, t_origin=25)
# new_tracks = add_drift(new_tracks, 5 , dx=-7, dy=20, t_origin=19, do_before=False)
# new_tracks = add_drift(new_tracks, 2 , dx=-5, dy=0, t_origin=11, do_before=False)
# new_tracks = add_drift(new_tracks, 2 , dx=5, dy=0, t_origin=38, do_before=False, do_after=True)
# new_tracks = add_drift(new_tracks, 2 , dx=0, dy=5, t_origin=38, do_before=False, do_after=True)
new_tracks[10:,:1] = new_tracks[10:11,:1] #Freeze pos
new_tracks = add_drift(new_tracks, 0 , dx=-5, dy=-4, t_origin=0, do_before=False, do_after=True)
new_visibles[:,:1]=1
new_tracks = zoom_tracks(new_tracks, 0, d_scale = .985, t_origin = 10)

new_tracks = add_drift(new_tracks, 0 , dx=3.4, dy=0, t_origin=0, do_before=False, do_after=True)
new_tracks = horz_mirror(new_tracks, 0, x_origin=450)
new_visibles[:,1]=1
# new_visibles[35:45,1]=0
# new_tracks, new_visibles = reverse_tracks(new_tracks, new_visibles, 0,4)

display_tracks_diff()

##############################

rp.display_video(gridded_video(draw_arrows(draw_tracks(input_video, new_tracks, new_visibles), tracks, new_tracks, visibles, new_visibles)))

init_blob_videos()

##############################

init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()

In [None]:
#WORKER
TITLE = "Mirror the car to the other lane and make it go faster"

input_video_path = "MoveTheCar.mp4"
prompt = 'A minivan with a bunch of colorful baloons is driving through a dusty desert highway with power pylons in the top right of the screen'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=6
num_inference_steps=50

seed = rp.millis()%10000 ; rp.seed_all(seed)
TITLE = f"[Seed {seed}] {TITLE}"

init_input_video()

##############################

#In TXY form
init_points = [
    [0, 580, 300], #0 white ::  Car
    [0, 680, 260], #1 red ::  Balloons
    [0, 650, 365], #2 green ::  License Plate
    [32, 600, 60], #3 blue ::  Road 1
    # [0, 343, 185], #3 blue ::  Road 1
    [27, 100, 200], #4 cyan ::  Left of road
    [24, 600, 200], #5 magenta ::  Right dirt
    [48, 400, 250], #6 yellow ::  
]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

new_tracks = tracks + 0
new_visibles = visibles + 0
# new_tracks = add_drift(new_tracks,  1, 3, dx=30, dy=0, t_origin=30, do_before=False)
# new_tracks = add_drift(new_tracks,  2, 5, 6, dx=0, dy=-5, t_origin=25)
# new_tracks = add_drift(new_tracks, 5 , dx=-7, dy=20, t_origin=19, do_before=False)
# new_tracks = add_drift(new_tracks, 2 , dx=-5, dy=0, t_origin=11, do_before=False)
# new_tracks = add_drift(new_tracks, 2 , dx=5, dy=0, t_origin=38, do_before=False, do_after=True)
# new_tracks = add_drift(new_tracks, 2 , dx=0, dy=5, t_origin=38, do_before=False, do_after=True)
new_tracks[10:,:3] = new_tracks[10:11,:3] #Freeze pos
new_tracks = add_drift(new_tracks, 0,1,2 , dx=-5, dy=-4, t_origin=0, do_before=False, do_after=True)
new_visibles[:,:3]=1
new_tracks = zoom_tracks(new_tracks, 0, 1, 2, d_scale = .985, t_origin = 10)

new_tracks = add_drift(new_tracks, 0,1,2 , dx=3.4, dy=0, t_origin=0, do_before=False, do_after=True)
new_tracks = horz_mirror_origins(new_tracks, 0, 1, 2, x_origin=450)
new_visibles[:,3]=1
# new_visibles[35:45,1]=0
# new_tracks, new_visibles = reverse_tracks(new_tracks, new_visibles, 0,4)

display_tracks_diff()

##############################

rp.display_video(gridded_video(draw_arrows(draw_tracks(input_video, new_tracks, new_visibles), tracks, new_tracks, visibles, new_visibles)))

init_blob_videos()

##############################

init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()

In [None]:
#WORKER
TITLE = "Hot Air Baloons: Swap all three and make them rise"

input_video_path = "MakeTheBaloonsMove.mp4"
prompt = 'Several hot air baloons rise through a beautiful grassy serene valley'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=6
num_inference_steps=50

init_input_video()

##############################

#In TXY form
init_points = [
    [0, 230, 130], #0 white ::  Top baloon
    [0, 550, 400], #1 red ::  Blue Baloon
    [36, 100, 350], #2 green ::  Small Left Baloon
    [8, 600, 230], #3 blue ::  Road 1
    [46, 100, 260], #4 cyan ::  Left of road
    [22, 500, 280], #5 magenta ::  Right dirt
    [0, 115, 360], #6 yellow ::  
]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

seed = rp.millis()%10000 ; rp.seed_all(seed)
TITLE = f"[Seed {seed}] {TITLE}"

init_tracks()

##############################

new_tracks = tracks + 0
new_visibles = visibles + 0
# new_tracks = add_drift(new_tracks,  1, 3, dx=30, dy=0, t_origin=30, do_before=False)
# new_tracks = add_drift(new_tracks,  2, 5, 6, dx=0, dy=-5, t_origin=25)
# new_tracks = add_drift(new_tracks, 5 , dx=-7, dy=20, t_origin=19, do_before=False)
# new_tracks = add_drift(new_tracks, 2 , dx=-5, dy=0, t_origin=11, do_before=False)
# new_tracks = add_drift(new_tracks, 2 , dx=5, dy=0, t_origin=38, do_before=False, do_after=True)
# new_tracks = add_drift(new_tracks, 2 , dx=0, dy=5, t_origin=38, do_before=False, do_after=True)
# new_tracks[10:,:3] = new_tracks[10:11,:3] #Freeze pos
new_tracks = add_drift(new_tracks, 0,1,2 , dx=0, dy=-2, t_origin=0, do_before=False, do_after=True)
# new_tracks = speed_tracks(new_tracks, 3,4,5,6, factor=.5)
new_tracks[:,[0,1,2]]=new_tracks[:,[1,2,0]]
# new_visibles[:,:3]=1
# new_tracks = zoom_tracks(new_tracks, 0, 1, 2, d_scale = .985, t_origin = 10)

# new_tracks = add_drift(new_tracks, 0,1,2 , dx=3.4, dy=0, t_origin=0, do_before=False, do_after=True)
# new_tracks = horz_mirror(new_tracks, 0, 1, 2, x_origin=450)
# new_visibles[:,3]=1
# new_visibles[35:45,1]=0
# new_tracks, new_visibles = reverse_tracks(new_tracks, new_visibles, 0,4)

display_tracks_diff()

##############################

rp.display_video(gridded_video(draw_arrows(draw_tracks(input_video, new_tracks, new_visibles), tracks, new_tracks, visibles, new_visibles)))

init_blob_videos()

##############################

init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()

In [None]:
#WORKER
TITLE = "Hot Air Baloons: Slow camera, make baloons rise"

input_video_path = "MakeTheBaloonsMove.mp4"
prompt = 'Several hot air baloons rise through a beautiful grassy serene valley'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=6
num_inference_steps=50

init_input_video()

##############################

#In TXY form
init_points = [
    [0, 230, 130], #0 white ::  Top baloon
    [0, 550, 400], #1 red ::  Blue Baloon
    [36, 100, 350], #2 green ::  Small Left Baloon
    [8, 600, 230], #3 blue ::  Road 1
    [46, 100, 260], #4 cyan ::  Left of road
    [22, 500, 280], #5 magenta ::  Right dirt
    [0, 115, 360], #6 yellow ::  
]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

seed = rp.millis()%10000 ; rp.seed_all(seed)
TITLE = f"[Seed {seed}] {TITLE}"

init_tracks()

##############################

new_tracks = tracks + 0
new_visibles = visibles + 0
# new_tracks = add_drift(new_tracks,  1, 3, dx=30, dy=0, t_origin=30, do_before=False)
# new_tracks = add_drift(new_tracks,  2, 5, 6, dx=0, dy=-5, t_origin=25)
# new_tracks = add_drift(new_tracks, 5 , dx=-7, dy=20, t_origin=19, do_before=False)
# new_tracks = add_drift(new_tracks, 2 , dx=-5, dy=0, t_origin=11, do_before=False)
# new_tracks = add_drift(new_tracks, 2 , dx=5, dy=0, t_origin=38, do_before=False, do_after=True)
# new_tracks = add_drift(new_tracks, 2 , dx=0, dy=5, t_origin=38, do_before=False, do_after=True)
# new_tracks[10:,:3] = new_tracks[10:11,:3] #Freeze pos
new_tracks = add_drift(new_tracks, 0,1,2 , dx=0, dy=-4, t_origin=0, do_before=False, do_after=True)
new_tracks, new_visibles = speed_tracks(new_tracks,new_visibles, 3,4,5,6, factor=.5)
# new_visibles[:,:3]=1
# new_tracks = zoom_tracks(new_tracks, 0, 1, 2, d_scale = .985, t_origin = 10)

# new_tracks = add_drift(new_tracks, 0,1,2 , dx=3.4, dy=0, t_origin=0, do_before=False, do_after=True)
# new_tracks = horz_mirror(new_tracks, 0, 1, 2, x_origin=450)
# new_visibles[:,3]=1
# new_visibles[35:45,1]=0
# new_tracks, new_visibles = reverse_tracks(new_tracks, new_visibles, 0,4)

display_tracks_diff()

##############################

rp.display_video(gridded_video(draw_arrows(draw_tracks(input_video, new_tracks, new_visibles), tracks, new_tracks, visibles, new_visibles)))

init_blob_videos()

##############################

init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()

In [None]:
#WORKER
TITLE = "Sora Basketball: The ball goes into the hoop"

input_video_path = "MakeItNotBounce.mp4"
prompt = 'A basketball gets thrown into a basketball hoop with a nice swish. The basketball goes through the hoop.'
prompt = '''A sleek basketball arcs gracefully through the air, its orange surface gleaming under the gymnasium lights, as it approaches the hoop with precision. The ball makes a perfect swish, slicing through the net with a satisfying sound, the net fluttering gently in its wake. The moment is captured in slow motion, highlighting the ball's trajectory and the seamless integration of its path into the hoop, emphasizing the skill and finesse of the throw. The scene shows a vibrant outdoor playground under a clear blue sky. At the center of the image is a basketball hoop viewed from below, giving a dynamic perspective. In the background, there's a playground structure with an orange slide and climbing frame, all enclosed in a fenced area. '''

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=6
num_inference_steps=20

SEED = rp.millis() % 9999
SEED = 6303
rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form
init_points = [
    [0, 100, 100], #0 white ::  Tree
    [0, 420, 290], #1 red ::  Left of rim
    # [0, 420, 290], #1 red ::  Left of rim
    [0, 600, 200], #2 green ::  Backstop
    [39, 400, 100], #3 blue ::  Ball 2
    [43, 700, 380], #4 cyan ::  Hidden Behind Backstop
    [43, 360, 90], #5 magenta ::  Basketball 2
    [22, 400, 400], #6 yellow ::  Playground area
    # [0, 100, 400], #6 yellow ::  Playground area
]
rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

new_tracks = tracks + 0
new_visibles = visibles + 0
# new_tracks = add_drift(new_tracks,  1, 3, dx=30, dy=0, t_origin=30, do_before=False)
# new_tracks = add_drift(new_tracks,  2, 5, 6, dx=0, dy=-5, t_origin=25)
new_tracks = add_drift(new_tracks, 3,5 , dx=-4, dy=10, t_origin=19, do_before=False)
new_tracks = add_drift(new_tracks, 3,5 , dx=0, dy=20, t_origin=19, do_before=False)
new_tracks = add_drift(new_tracks, 3,5 , dx=0, dy=-13, t_origin=23, do_before=False)
new_tracks = add_drift(new_tracks, 3,5 , dx=0, dy=-9, t_origin=33, do_before=False)
# new_tracks = add_drift(new_tracks, 0,1,2,3,4,5,6 , dx=0, dy=-5, t_origin=15, do_before=False)
new_tracks = add_drift(new_tracks, 3,5 , dx=2, dy=0, t_origin=0, do_before=False)
new_tracks = add_drift(new_tracks, 3,5 , dx=-2, dy=0, t_origin=22, do_before=False)
new_visibles[18:32,1]=0 
new_visibles[23:,6]=0 
visibles[:,3]=1
visibles[:,5]=1
new_visibles[:,3]=1
new_visibles[:,5]=1
# new_tracks, new_visibles = reverse_tracks(new_tracks, new_visibles, 0,4)
rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
rp.display_video(output_video, framerate=15)

In [None]:
#WORKER
TITLE = "Motorcycle Chase: The motorcycle chases the car"

input_video_path = "MakeMotorcycleChaseCar.mp4"
prompt = 'A morotcycle chases a red car on a busy highway, with the camera panning backwards with other cars surrounding it'
prompt = 'A morotcycle chases a red car on a busy highway, with the camera panning backwards with other cars surrounding it'
prompt = '''A sleek black motorcycle, engine roaring, weaves through dense traffic on a bustling highway, hot on the heels of a vibrant red sports car. The camera, mounted on a trailing vehicle, pans backward, capturing the intense chase. Surrounding cars, a mix of sedans and trucks in various colors, blur past, emphasizing the high speed. The rider, clad in a dark leather jacket and helmet, expertly navigates the tight gaps, while the red car ahead zips through the congestion. The scene is a thrilling blend of motion and urgency, with the highway's median and guardrails flashing by, underscoring the perilous pursuit.'''

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=6
num_inference_steps=20

SEED = rp.millis() % 9999
rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form
init_points = [
    [0, 320, 300], #0 white ::  Red Car
    [16, 120, 260], #1 red ::  Motorcycle
    [48, 100, 60], #2 green ::  Hidden Building
    [48, 380, 160], #3 blue ::  Visible Building
    [38, 600, 260], #4 cyan ::  Background Car Right Lane
    [22, 60, 260], #5 magenta ::  White Van on Side of Road
    [20, 500, 150], #6 yellow ::  A signpost


    # [9, 280.0, 137.0],
    # [19, 630.0, 380.0],
    # [19, 612.0, 234.0],
    # [36, 688.0, 199.0],
    # [36, 37.0, 93.0],
    # [36, 58.0, 28.0],
    # [36, 51.0, 224.5],
    # [36, 159.5, 197.0],
    # [36, 272.0, 227.0],
    # [36, 310.0, 227.0],
    # [36, 363.0, 224.0],
    # [36, 506.0, 227.0],
    # [36, 448.0, 138.0],
    # [7, 356.0, 143.0],
    # [7, 27.0, 184.5],
]
rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

new_tracks = tracks + 0
new_visibles = visibles + 0

#Red car acceleration
new_tracks = add_drift(new_tracks, 0, dx=0, dy=2, t_origin=0, do_before=False) 
new_tracks = add_drift(new_tracks, 0, dx=0, dy=2, t_origin=10, do_before=False) 
new_tracks = add_drift(new_tracks, 0, dx=0, dy=2, t_origin=20, do_before=False) 

#Motorcycle Chasing
new_tracks = add_drift(new_tracks, 1, dx=0, dy=2, t_origin=10, do_before=False) 
new_tracks = add_drift(new_tracks, 1, dx=2, dy=0, t_origin=25, do_before=False) 
new_tracks = add_drift(new_tracks, 1, dx=2, dy=.5, t_origin=30, do_before=False) 
new_tracks = add_drift(new_tracks, 1, dx=2, dy=1, t_origin=35, do_before=False) 
new_tracks = add_drift(new_tracks, 1, dx=1, dy=1, t_origin=37, do_before=False) 
new_tracks = add_drift(new_tracks, 1, dx=1, dy=2, t_origin=40, do_before=False) 
new_tracks = add_drift(new_tracks, 1, dx=0, dy=2, t_origin=42, do_before=False) 
new_tracks = add_drift(new_tracks, 1, dx=0, dy=2, t_origin=44, do_before=False) 
new_tracks = add_drift(new_tracks, 1, dx=0, dy=2, t_origin=46, do_before=False) 

# new_tracks = add_drift(new_tracks, 0 , dx=0, dy=2, t_origin=30, do_before=False) 
# new_visibles[23:,6]=0 
# visibles[:,5]=1
# new_tracks, new_visibles = reverse_tracks(new_tracks, new_visibles, 0,4)

rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()

In [None]:
#WORKER
TITLE = "Sora Basketball: Single Point Test"

input_video_path = "MakeItNotBounce.mp4"
prompt = 'A basketball gets thrown into a basketball hoop with a nice swish. The basketball goes through the hoop.'
prompt = '''A sleek basketball arcs gracefully through the air, its orange surface gleaming under the gymnasium lights, as it approaches the hoop with precision. The ball makes a perfect swish, slicing through the net with a satisfying sound, the net fluttering gently in its wake. The moment is captured in slow motion, highlighting the ball's trajectory and the seamless integration of its path into the hoop, emphasizing the skill and finesse of the throw. The scene shows a vibrant outdoor playground under a clear blue sky. At the center of the image is a basketball hoop viewed from below, giving a dynamic perspective. In the background, there's a playground structure with an orange slide and climbing frame, all enclosed in a fenced area. '''

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=6
num_inference_steps=20

SEED = rp.millis() % 9999
rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [ [9, 400, 100], [9, 300, 100] ],
    [ [38, 350, 100], [38, 350, 300] ],
    *[ [x]*2 for x in [
            [0, 100, 100], 
            [18, 400, 400],
            [18, 400, 100],
            [44, 500, 200],
        ]
    ],

    * [ [[rp.random_int(0,T-1), rp.random_int(0,W-1), rp.random_int(0,H-1)]] * 2 for _ in range(6)],
]

init_points = [
    x[0] for x in point_pairs
]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

new_tracks = tracks + 0
new_visibles = visibles + 0

visibles[:]=0
new_visibles[:]=0

temporal_radius=2

for blob_index, (from_point, to_point) in enumerate(point_pairs):
    b=blob_index
    
    t,x,y = from_point
    for time_delta in range(-temporal_radius, temporal_radius+1):
        if 0<=t+time_delta<T:
            print(t,x,y,b)
            tracks[t+time_delta, b, 0] = x
            tracks[t+time_delta, b, 1] = y
            visibles[t+time_delta, b] = 1
        
    t,x,y = to_point
    for time_delta in range(-temporal_radius, temporal_radius+1):
        if 0<=t+time_delta<T:
            print(t,x,y,b)
            new_tracks[t+time_delta, b, 0] = x
            new_tracks[t+time_delta, b, 1] = y
            new_visibles[t+time_delta, b] = 1

rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()

In [None]:
#WORKER
# del pipe

TITLE = "Boat: Single Point Test"

#Adjust this based on the loaded checkpoint
# POINT_MODE = 'point2point'
POINT_MODE = 'track2point'

input_video_path = "MoveTheBoatLeft.mp4"
prompt = 'A ferry boat glides gracefully in the water as the camera pans up'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=6
num_inference_steps=20

SEED = rp.millis() % 9999
rp.seed_all(SEED)
TITLE = f'[{POINT_MODE}: Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    # [[26, 369, 255], [26, 530, 202]],
    # [[14, 358, 265], [14, 438, 253]],
    # [[46, 405, 324], [46, 625, 261]],
    # [[46, 244.0, 102.0], [46, 244.0, 102.0]],
    # [[46, 90.5, 341.0], [46, 90.5, 341.0]],
    # [[46, 83.0, 150.0], [46, 83.0, 150.0]],
    # [[46, 625.5, 449.0], [46, 625.5, 449.0]],
    # [[46, 614.0, 108.0], [46, 614.0, 108.0]],
    # [[18, 563.0, 112.0], [18, 563.0, 112.0]],
    # [[18, 135.0, 189.0], [18, 135.0, 189.0]],
    # [[18, 413.0, 400.0], [18, 413.0, 400.0]],
    # [[7, 157.0, 107.0], [7, 157.0, 107.0]],
    # [[7, 146.0, 340.0], [7, 146.0, 340.0]],
    # [[7, 556.0, 354.0], [7, 556.0, 354.0]],
    # [[0, 54.0, 27.0], [0, 54.0, 27.0]],
    # [[0, 129.0, 421.0], [0, 129.0, 421.0]],
    # [[0, 640.0, 452.0], [0, 640.0, 452.0]],
    # [[0, 656.0, 30.0], [0, 656.0, 30.0]]

    # [[19, 374, 249], [19, 551, 259]],
    # [[28, 385, 262], [28, 670, 262]],
    # [[9, 372, 255], [9, 515, 259]],
    # [[4, 341, 249], [4, 432, 245]],
    # # [[1, 337.0, 251.0], [1, 337.0, 251.0]],
    # # [[3, 143.0, 100.0], [3, 143.0, 100.0]],
    # [[3, 653.0, 233.0], [3, 653.0, 233.0]],
    # # [[8, 181.0, 382.0], [8, 181.0, 382.0]],
    # # [[8, 213.5, 91.5], [8, 213.5, 91.5]],
    # # [[8, 555.0, 48.0], [8, 555.0, 48.0]],
    # # [[14, 572.0, 437.0], [14, 572.0, 437.0]],
    # # [[14, 26.0, 18.0], [14, 26.0, 18.0]],
    # [[32, 664.0, 39.0], [32, 664.0, 39.0]],
    # # [[32, 172.0, 45.0], [32, 172.0, 45.0]],
    # # [[32, 128.0, 420.0], [32, 128.0, 420.0]],
    # # [[46, 200.0, 61.0], [46, 200.0, 61.0]],
    # [[46, 666.0, 117.0], [46, 666.0, 117.0]],
    # # [[46, 127.0, 144.0], [46, 127.0, 144.0]]

    # [[0, 302.0, 366.0], [0, 302.0, 366.0]],
    # [[35, 377, 267], [35, 531, 266]],
    # [[45, 672.0, 82.0], [45, 672.0, 82.0]],
    # [[45, 64.0, 143.0], [45, 64.0, 143.0]],
    # [[9, 88.0, 203.0], [9, 88.0, 203.0]],
    # [[4, 414.0, 452.0], [4, 414.0, 452.0]],
    # [[26, 92.0, 124.0], [26, 92.0, 124.0]]



    [[0, 302.0, 319.0], [0, 302.0, 319.0], [15, 514, 289], [30, 622.0, 273.0]],
    [[30, 213.0, 54.0], [30, 213.0, 54.0]],
    [[17, 108.0, 283.0], [17, 108.0, 283.0]],
    [[17, 645.0, 47.0], [17, 645.0, 47.0]],
    [[43, 110.0, 54.0], [43, 110.0, 54.0]],
    [[43, 323.0, 145.0], [43, 323.0, 145.0]],
    [[4, 182.0, 96.0], [4, 182.0, 96.0]],
    # [[4, 39.0, 426.0], [4, 39.0, 426.0]],
    # [[4, 610.0, 343.0], [4, 610.0, 343.0]],
    # [[4, 597.0, 80.0], [4, 597.0, 80.0]]

]

# point_pairs = [[x[0]]*len(x) for x in point_pairs]#USE ONLY INPUT POINTS - A SANITY CHECK. SHOULD RETURN SAME VIDEO.

init_points = [
    x[0] for x in point_pairs
]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

assert POINT_MODE in ['track2point', 'point2point']

new_tracks = tracks + 0
new_visibles = visibles + 0

temporal_radius=2

frame2frame_indices = [0,1,2,3]
frame2frame_indices = range(len(init_points)) #Make all of them single points

for blob_index in frame2frame_indices:

    if POINT_MODE=='point2point':
        visibles[:,blob_index]=0

    if 1:
        new_visibles[:,blob_index]=0
    
    from_point = point_pairs[blob_index][0]
    to_points = point_pairs[blob_index][1:]
    b=blob_index

    if POINT_MODE=='point2point':
        t,x,y = from_point
        for time_delta in range(-temporal_radius, temporal_radius+1):
            if 0<=t+time_delta<T:
                print(t,x,y,b)
                tracks[t+time_delta, b, 0] = x
                tracks[t+time_delta, b, 1] = y
                visibles[t+time_delta, b] = 1

    if 1:
        for to_point in to_points:
            t,x,y = to_point
            for time_delta in range(-temporal_radius, temporal_radius+1):
                if 0<=t+time_delta<T:
                    print(t,x,y,b)
                    new_tracks[t+time_delta, b, 0] = x
                    new_tracks[t+time_delta, b, 1] = y
                    new_visibles[t+time_delta, b] = 1

rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()

In [None]:
#WORKER
TITLE = "Boat: Move Test"

input_video_path = "MoveTheBoatLeft.mp4"
prompt = 'A ferry boat glides gracefully in the water as the camera pans up'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=6
num_inference_steps=20

SEED = rp.millis() % 9999
rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[0, 310.0, 336.0], [0, 310.0, 336.0]],#On boat
    [[0, 334.0, 170.0], [0, 334.0, 170.0]],#On boat
    [[0, 93.0, 84.0], [0, 93.0, 84.0]],#In background
    [[0, 601.0, 91.0], [0, 601.0, 91.0]],#In background
    [[17, 44.0, 15.0], [17, 44.0, 15.0]],#In background
    [[33, 680.0, 41.0], [33, 680.0, 41.0]],#In background
    [[44, 326.0, 45.0], [44, 326.0, 45.0]]#In background
]

init_points = [
    x[0] for x in point_pairs
]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

new_tracks = tracks + 0
new_visibles = visibles + 0

new_tracks = tracks + 0
new_visibles = visibles + 0

new_tracks = add_drift(new_tracks, 0,1 , dx=-3, dy=-3, t_origin=25, do_before=True)
new_tracks = add_drift(new_tracks, 1 , dx=-3, dy=0, t_origin=25, do_before=True)
new_tracks = add_drift(new_tracks, 0 , dx=1, dy=0, t_origin=25, do_before=False)

# new_tracks = add_drift(new_tracks, 0,1 , dx=3, dy=4, t_origin=25, do_before=True)
# new_tracks = add_drift(new_tracks, 1 , dx=0, dy=4, t_origin=25, do_before=True)

zoom_tracks(new_tracks, 0, 1, d_scale = .985, t_origin = 25)
new_tracks[:,:2,0]-=100

new_tracks[:,2:]=new_tracks[:,2:].flip(0)
new_visibles[:,2:]=new_visibles[:,2:].flip(0)


rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()

In [None]:
rp.display_video(output_video,framerate=15)

In [None]:
#WORKER
TITLE = "Judge: Walk Out"

input_video_path = "GetUpAndWalkOut.mp4"
prompt = 'A black judge woman in black robes walks into the room from the right and sits on a table, crossing her arms'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=8
num_inference_steps=40

SEED = rp.millis() % 9999
SEED = 5176

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[18, 237.0, 103.0], [18, 237.0, 103.0]],
    [[2, 336.0, 343.0], [2, 336.0, 343.0]],
    [[44, 128.0, 206.0], [44, 128.0, 206.0]],
    [[44, 639.0, 196.0], [44, 639.0, 196.0]],
    [[44, 402.0, 218.0], [44, 402.0, 218.0]],
    [[44, 406.0, 336.0], [44, 406.0, 336.0]],
    [[18, 156.0, 339.0], [18, 156.0, 339.0]],    
    [[32, 375.0, 371.0], [32, 375.0, 371.0]],#Hands
    [[32, 396.0, 310.0], [32, 396.0, 310.0]],#Hands
    [[32, 427.0, 90.0], [32, 427.0, 90.0]],#Face
    [[32, 400.0, 134.0], [32, 400.0, 134.0]],#Face
]


init_points = [
    x[0] for x in point_pairs
]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

new_tracks = tracks + 0
new_visibles = visibles + 0

judge_indices = [0,4,5,6,7,8,9,10]

new_tracks[:, judge_indices, 0] = W - new_tracks[:, judge_indices,  0] - 1 + 80 #MIRROR ALL POINTS
new_tracks[:,[9,0]] = new_tracks[:,[0,9]]
alpha = torch.tensor(rp.full_range(np.clip(np.linspace(0,1,49)-30/49+.1, 0, 1))).to(dtype=tracks.dtype, device=tracks.device)
rp.line_graph(rp.as_numpy_array(alpha), title='alpha')
new_tracks[:,judge_indices]=rp.blend(new_tracks[:,judge_indices], tracks[:,judge_indices], alpha[:,None,None])
new_tracks[:,[9,0]]=rp.blend(new_tracks[:,[9,0]], tracks[:,[9,0]], alpha[:,None,None])
new_tracks[:,[9,0]]=rp.blend(new_tracks[:,[9,0]], tracks[:,[9,0]], alpha[:,None,None])
new_tracks[:,[9,0]]=rp.blend(new_tracks[:,[9,0]], tracks[:,[9,0]], alpha[:,None,None])
new_tracks[:,[9,0]] = new_tracks[:,[0,9]]



new_visibles[:,[2,3]] = new_visibles[:,[3,2]]


# new_tracks = add_drift(new_tracks, 0,1 , dx=-3, dy=-3, t_origin=25, do_before=True)
# new_tracks = add_drift(new_tracks, 1 , dx=-3, dy=0, t_origin=25, do_before=True)
# new_tracks = add_drift(new_tracks, 0 , dx=1, dy=0, t_origin=25, do_before=False)

# new_tracks = add_drift(new_tracks, 0,1 , dx=3, dy=4, t_origin=25, do_before=True)
# new_tracks = add_drift(new_tracks, 1 , dx=0, dy=4, t_origin=25, do_before=True)

# zoom_tracks(new_tracks, 0, 1, d_scale = .985, t_origin = 25)
# new_tracks[:,:2,0]-=100

# new_tracks[:,2:]=new_tracks[:,2:].flip(0)
# new_visibles[:,2:]=new_visibles[:,2:].flip(0)


rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_video(output_video)

In [None]:
#WORKER
TITLE = "Judge: Walk In From Right + Zoom"

input_video_path = "GetUpAndWalkOut.mp4"
prompt = 'A black judge woman in black robes walks into the room from the right and sits on a table, crossing her arms'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=8
num_inference_steps=50

SEED = rp.millis() % 9999
SEED = 5176

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[18, 237.0, 103.0], [18, 237.0, 103.0]],
    [[2, 336.0, 343.0], [2, 336.0, 343.0]],
    [[44, 128.0, 206.0], [44, 128.0, 206.0]],
    [[44, 639.0, 196.0], [44, 639.0, 196.0]],
    [[44, 402.0, 218.0], [44, 402.0, 218.0]],
    [[44, 406.0, 336.0], [44, 406.0, 336.0]],
    [[18, 156.0, 339.0], [18, 156.0, 339.0]],    
    [[32, 375.0, 371.0], [32, 375.0, 371.0]],#Hands
    [[32, 396.0, 310.0], [32, 396.0, 310.0]],#Hands
    [[32, 427.0, 90.0], [32, 427.0, 90.0]],#Face
    [[32, 400.0, 134.0], [32, 400.0, 134.0]],#Face

    [[14, 578.0, 230.0], [14, 578.0, 230.0]],
    [[14, 658.0, 340.0], [14, 658.0, 340.0]],
    [[14, 648.0, 185.0], [14, 648.0, 185.0]],
    # [[14, 568.0, 387.0], [14, 568.0, 387.0]],
    [[14, 285.0, 87.0], [14, 285.0, 87.0]],
    # [[14, 285.0, 87.0], [14, 285.0, 87.0]],
    [[14, 302.0, 273.0], [14, 302.0, 273.0]],
    # [[14, 411.0, 177.0], [14, 411.0, 177.0]],
    [[14, 188.0, 200.0], [14, 188.0, 200.0]],
    # [[14, 364.0, 395.0], [14, 364.0, 395.0]],
    [[24, 122.0, 299.0], [24, 122.0, 299.0]],
    [[24, 107.0, 404.0], [24, 107.0, 404.0]],
    # [[24, 389.0, 260.0], [24, 389.0, 260.0]],
    [[24, 319.0, 342.0], [24, 319.0, 342.0]],
    [[24, 329.0, 118.0], [24, 329.0, 118.0]],
    [[24, 224.0, 292.0], [24, 224.0, 292.0]],
    # [[24, 387.0, 71.0], [24, 387.0, 71.0]],
    [[24, 38.0, 80.0], [24, 38.0, 80.0]],
    [[24, 74.0, 148.0], [24, 74.0, 148.0]],
    [[24, 153.0, 87.0], [24, 153.0, 87.0]]
]


init_points = [
    x[0] for x in point_pairs
]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

new_tracks = tracks + 0
new_visibles = visibles + 0

judge_indices = [0,4,5,6,7,8,9,10   , 21, 20, 19]
non_judge_indices = sorted(set(range(len(init_points)))-set(judge_indices))

#Make her walk in from right
# new_tracks[:, judge_indices, 0] = W - new_tracks[:, judge_indices,  0] - 1 + 80 #MIRROR ALL POINTS
# new_tracks[:,[9,0]] = new_tracks[:,[0,9]]
# alpha = torch.tensor(rp.full_range(np.clip(np.linspace(0,1,49)-30/49+.1, 0, 1))).to(dtype=tracks.dtype, device=tracks.device)
# rp.line_graph(rp.as_numpy_array(alpha), title='alpha')
# new_tracks[:,judge_indices]=rp.blend(new_tracks[:,judge_indices], tracks[:,judge_indices], alpha[:,None,None])
# new_tracks[:,[9,0]]=rp.blend(new_tracks[:,[9,0]], tracks[:,[9,0]], alpha[:,None,None])
# new_tracks[:,[9,0]]=rp.blend(new_tracks[:,[9,0]], tracks[:,[9,0]], alpha[:,None,None])
# new_tracks[:,[9,0]]=rp.blend(new_tracks[:,[9,0]], tracks[:,[9,0]], alpha[:,None,None])
# new_tracks[:,[9,0]] = new_tracks[:,[0,9]]
# new_visibles[:,[2,3]] = new_visibles[:,[3,2]]

#Zoom into center
new_tracks[:,non_judge_indices] = new_tracks[:1,non_judge_indices] #Make the background not move
deltas = new_tracks + 0 
center_x = W//2
center_y = H//2
center_x = new_tracks[-1:,[9,0],0].mean(1,keepdim=True)
center_y = new_tracks[-1:,[9,0],1].mean(1,keepdim=True)
deltas[:,:,0] -= center_x
deltas[:,:,1] -= center_y
alpha = torch.tensor(np.linspace(0,1,49)).to(dtype=tracks.dtype, device=tracks.device)
new_tracks[:,non_judge_indices] += deltas[:,non_judge_indices] * alpha[:,None,None] / 2



# new_tracks = add_drift(new_tracks, 0,1 , dx=-3, dy=-3, t_origin=25, do_before=True)
# new_tracks = add_drift(new_tracks, 1 , dx=-3, dy=0, t_origin=25, do_before=True)
# new_tracks = add_drift(new_tracks, 0 , dx=1, dy=0, t_origin=25, do_before=False)

# new_tracks = add_drift(new_tracks, 0,1 , dx=3, dy=4, t_origin=25, do_before=True)
# new_tracks = add_drift(new_tracks, 1 , dx=0, dy=4, t_origin=25, do_before=True)

# zoom_tracks(new_tracks, 0, 1, d_scale = .985, t_origin = 25)
# new_tracks[:,:2,0]-=100

# new_tracks[:,2:]=new_tracks[:,2:].flip(0)
# new_visibles[:,2:]=new_visibles[:,2:].flip(0)


rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()

In [None]:
#WORKER
TITLE = "Truck Before Cab [Old]"

input_video_path = "truck_before_cab.mp4"
prompt = 'A yellow taxi cab SUV drives in front of a green truck and a hotdog stand move around in times square, NYC'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=6
num_inference_steps=20

SEED = rp.millis() % 9999
# SEED = 5176

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[13, 438.0, 170.0], [13, 438.0, 170.0]],#0
    [[13, 333.0, 28.0], [13, 333.0, 28.0]],#1
    [[31, 416.0, 102.0], [31, 416.0, 102.0]],#2
    [[37, 663.0, 26.0], [37, 663.0, 26.0]],#3
    [[20, 437.0, 103.0], [20, 437.0, 103.0]],#4
    [[20, 89.0, 382.0], [20, 89.0, 382.0]],#5
    [[48, 143.0, 38.0], [48, 143.0, 38.0]],#6
    [[16, 500, 400]],#Concrete below taxi #7
    [[0, 300, 200]],#Taxi Cab Mirror #8
    [[10, 400, 200]],#Taxi Cab Mirror #8
]


init_points = [
    x[0] for x in point_pairs
]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

new_tracks = tracks + 0
new_visibles = visibles + 0

# judge_indices = [0,4,5,6,7,8,9,10   , 21, 20, 19]
# non_judge_indices = sorted(set(range(len(init_points)))-set(judge_indices))

#Make her walk in from right
# new_tracks[:, judge_indices, 0] = W - new_tracks[:, judge_indices,  0] - 1 + 80 #MIRROR ALL POINTS
# new_tracks[:,[9,0]] = new_tracks[:,[0,9]]
# alpha = torch.tensor(rp.full_range(np.clip(np.linspace(0,1,49)-30/49+.1, 0, 1))).to(dtype=tracks.dtype, device=tracks.device)
# rp.line_graph(rp.as_numpy_array(alpha), title='alpha')
# new_tracks[:,judge_indices]=rp.blend(new_tracks[:,judge_indices], tracks[:,judge_indices], alpha[:,None,None])
# new_tracks[:,[9,0]]=rp.blend(new_tracks[:,[9,0]], tracks[:,[9,0]], alpha[:,None,None])
# new_tracks[:,[9,0]]=rp.blend(new_tracks[:,[9,0]], tracks[:,[9,0]], alpha[:,None,None])
# new_tracks[:,[9,0]]=rp.blend(new_tracks[:,[9,0]], tracks[:,[9,0]], alpha[:,None,None])
# new_tracks[:,[9,0]] = new_tracks[:,[0,9]]
# new_visibles[:,[2,3]] = new_visibles[:,[3,2]]

# #Zoom into center
# new_tracks[:,non_judge_indices] = new_tracks[:1,non_judge_indices] #Make the background not move
# deltas = new_tracks + 0 
# center_x = W//2
# center_y = H//2
# center_x = new_tracks[-1:,[9,0],0].mean(1,keepdim=True)
# center_y = new_tracks[-1:,[9,0],1].mean(1,keepdim=True)
# deltas[:,:,0] -= center_x
# deltas[:,:,1] -= center_y
# alpha = torch.tensor(np.linspace(0,1,49)).to(dtype=tracks.dtype, device=tracks.device)
# new_tracks[:,non_judge_indices] += deltas[:,non_judge_indices] * alpha[:,None,None] / 2

car = [0, 1, 8, 9]
truck = [2, 3]

# def retime(tracks, visibles, *i, old_start=0, old_end=-1, new_start=0, new_end=-1):

        
    
new_tracks  [:,car]=resize_list_linterp(new_tracks  [:,car], T*2)[:T]
new_visibles[:,car]=rp.resize_list     (new_visibles[:,car], T*2)[:T]

new_tracks  [:,truck]=resize_list_linterp(new_tracks  [:,truck], T*2)[T:]
new_visibles[:,truck]=rp.resize_list     (new_visibles[:,truck], T*2)[T:]

new_visibles[22:35,[2]] = 0

#Hot dog
new_visibles[:,[4]] = 0

#Concrete below cab
new_visibles[:,[7]] = 0
new_visibles[:10,[7]] = 1
new_visibles[30:,[7]] = 1


# new_tracks = add_drift(new_tracks, 0,1 , dx=-3, dy=-3, t_origin=25, do_before=True)
# new_tracks = add_drift(new_tracks, 1 , dx=-3, dy=0, t_origin=25, do_before=True)
# new_tracks = add_drift(new_tracks, 0 , dx=1, dy=0, t_origin=25, do_before=False)

# new_tracks = add_drift(new_tracks, 0,1 , dx=3, dy=4, t_origin=25, do_before=True)
# new_tracks = add_drift(new_tracks, 1 , dx=0, dy=4, t_origin=25, do_before=True)

# zoom_tracks(new_tracks, 0, 1, d_scale = .985, t_origin = 25)
# new_tracks[:,:2,0]-=100

# new_tracks[:,2:]=new_tracks[:,2:].flip(0)
# new_visibles[:,2:]=new_visibles[:,2:].flip(0)


rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
#### WORKER
TITLE = "Truck Before Cab"

input_video_path = "truck_before_cab.mp4"
prompt = 'A yellow SUV taxi cab SUV drives in front of a green truck and a hotdog stand move around in times square, NYC'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=13
num_inference_steps=50

SEED = rp.millis() % 9999
SEED = 5072

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[13, 438.0, 170.0], [13, 438.0, 170.0]],#0
    [[13, 333.0, 28.0], [13, 333.0, 28.0]],#1
    [[31, 416.0, 102.0], [31, 416.0, 102.0]],#2
    [[37, 663.0, 26.0], [37, 663.0, 26.0]],#3
    [[20, 437.0, 103.0], [20, 437.0, 103.0]],#4
    [[20, 89.0, 382.0], [20, 89.0, 382.0]],#5
    [[48, 143.0, 38.0], [48, 143.0, 38.0]],#6
    [[16, 500, 400]],#Concrete below taxi #7
    [[0, 300, 200]],#Taxi Cab Mirror #8
    [[10, 400, 200]],#Taxi Cab Mirror #9
    [[16, 600, 400]],#Ground 10
    # [[36, 120, 300]],#Ground 11 #NOT NEEDED
]


init_points = [
    x[0] for x in point_pairs
]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

new_tracks = tracks + 0
new_visibles = visibles + 0


car = [0, 1, 8, 9]
truck = [2, 3]

# def retime(tracks, visibles, *i, old_start=0, old_end=-1, new_start=0, new_end=-1):


car_start=30
new_tracks  [car_start:,car]=resize_list_linterp(new_tracks  [:,car], (49-car_start))[:T]
new_visibles[car_start:,car]=rp.resize_list     (new_visibles[:,car], (49-car_start))[:T]
new_visibles[:car_start,car] = 0

new_tracks  [:,truck]=resize_list_linterp(new_tracks  [:,truck], T*2)[T:]
new_visibles[:,truck]=rp.resize_list     (new_visibles[:,truck], T*2)[T:]

# new_visibles[22:35,[2]] = 0


#HOT DOG CART
new_visibles[:,[4]] = 0
new_visibles[:9,[4]] = 1

def tween(tracks, i, txy0, txy1):
    tracks = tracks + 0
    
    if isinstance(txy0, int): txy0 = [txy0, *tracks[txy0,i]]
    if isinstance(txy1, int): txy1 = [txy1, *tracks[txy1,i]]
    
    t0, x0, y0 = txy0
    t1, x1, y1 = txy1
    for t in range(t0, t1+1):
        a=rp.iblend(t, t0, t1)
        x=rp.blend(x0, x1, a)
        y=rp.blend(y0, y1, a)
        tracks[t,i,0]=x
        tracks[t,i,1]=y

    return tracks

new_tracks = tween(new_tracks, 4, [0, 560, 140], 14)
new_visibles[32:37,2]=0



#Concrete below cab
new_visibles[:,[7,10]] = 0
new_visibles[:28,[7]] = 1
new_visibles[35:,[7]] = 1
new_visibles[34:,[10]] = 1
new_visibles[0:27,[10]] = 1
new_tracks[:,[7,10]]=cotracker_tracks[:,[7,10]] #Cotracker is better under occlusions
# new_visibles[36:,[11]] = 0


rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()


rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
#### WORKER
TITLE = "Reverse Windmills"

input_video_path = "reverse_windmill.mp4"
prompt = 'two spinning windmills. A serene peaceful landscape with spinning windmills'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=13
num_inference_steps=30

SEED = rp.millis() % 9999
# SEED = 5072

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[0, 214.0, 179.0], [0, 214.0, 179.0]],#Windmill 1 Spoke 0
    [[0, 257.0, 222.0], [0, 257.0, 222.0]],#Windmill 1 Spoke 1
    [[0, 163.0, 222.0], [0, 163.0, 222.0]],#Windmill 1 Spoke 2
    [[5, 161.0, 237.0], [5, 161.0, 237.0]],#Windmill 1 Spoke 3 
    [[5, 211.0, 232.0], [5, 211.0, 232.0]],#Windmill 1 Center 4
    [[5, 413.0, 173.0], [5, 413.0, 173.0]],#Windmill 2 Spoke 5
    [[5, 499.0, 135.0], [5, 499.0, 135.0]],#Windmill 2 Spoke 6
    [[18, 422.0, 263.0], [18, 422.0, 263.0]],#Windmill 2 Spoke 7
    [[18, 428.0, 147.0], [18, 428.0, 147.0]],#Windmill 2 Spoke 8 
    [[18, 489.0, 208.0], [18, 489.0, 208.0]],#Windmill 2 Center 9
    [[0, 166.0, 374.0], [0, 166.0, 374.0]],#Background 1 10
    [[0, 554.0, 379.0], [0, 554.0, 379.0]],#Background 2 11
    [[48, 502.0, 129.0], [48, 502.0, 129.0]],#W2 12
    [[48, 415.0, 177.0], [48, 415.0, 177.0]],#W2 13
    [[48, 453.0, 275.0], [48, 453.0, 275.0]],#W2 14
    [[48, 259.0, 209.0], [48, 259.0, 209.0]],#W1 15
    [[48, 200.0, 180.0], [48, 200.0, 180.0]],#W1 16
    [[48, 171.0, 246.0], [48, 171.0, 246.0]],#W1 17

]


init_points = [
    x[0] for x in point_pairs
]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

#BAD TRACKS
# visibles[:,[16,13,14]]=0

# q=3  ; visibles[:,q] = 0 ; visibles[:21, q ] = 1
# q=5  ; visibles[:,q] = 0 ; visibles[:23, q ] = 1
# # q=6  ; visibles[:,q] = 0 ; visibles[:13, q ] = 1
# q=8  ; visibles[:,q] = 0 ; visibles[7:34, q ] = 1
# # q=7  ; visibles[:,q] = 0 ; visibles[15:42, q ] = 1
# q=12 ; visibles[:,q] = 0 ; visibles[28:, q] = 1
# # q=15 ; visibles[:,q] = 0 ; visibles[:22, q] = 1
# q=17 ; visibles[:,q] = 0 ; visibles[21:, q] = 1
# q=13 ; visibles[:,q] = 0 ; visibles[:0 , q] = 1
# q=14 ; visibles[:,q] = 0 ; visibles[:0 , q] = 1
# q=16 ; visibles[:,q] = 0 ; visibles[:0 , q] = 1

# print("SJA",tracks.shape, visibles.shape)
# tracks = list(tracks.permute(1,0,2))
# visibles = list(visibles.permute(1,0))
# to_delete = [13, 14, 16]
# to_delete = sorted(to_delete, reverse=True)
# for i in to_delete:
#     print("DEL",i)
#     del tracks[i]
#     del visibles[i]
# tracks = torch.stack(tracks).permute(1,0,2)
# visibles = torch.stack(visibles).permute(1,0)

# tracks = cotracker_tracks
# visibles = cotracker_visibles

new_tracks = tracks + 0
# new_tracks = cotracker_tracks + 0
new_visibles = visibles + 0

wind1=[0,1,2,3,4, 15, 16, 17]
wind2=[5,6,7,8,9, 12, 13, 14]


#Reverse windmill #2
new_tracks  [:,wind2]=new_tracks  [:,wind2].flip(0)
new_visibles[:,wind2]=new_visibles[:,wind2].flip(0)





# car = [0, 1, 8, 9]
# truck = [2, 3]

# def retime(tracks, visibles, *i, old_start=0, old_end=-1, new_start=0, new_end=-1):


# car_start=30
# new_tracks  [car_start:,car]=resize_list_linterp(new_tracks  [:,car], (49-car_start))[:T]
# new_visibles[car_start:,car]=rp.resize_list     (new_visibles[:,car], (49-car_start))[:T]
# new_visibles[:car_start,car] = 0

# new_tracks  [:,truck]=resize_list_linterp(new_tracks  [:,truck], T*2)[T:]
# new_visibles[:,truck]=rp.resize_list     (new_visibles[:,truck], T*2)[T:]


rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:

#### WORKER
TITLE = "[Failure] Stop Sign Lady"

input_video_path = "stop_sign_lady.mp4"
prompt = 'a woman holds a sign saying "STOP!" and holds up a megaphone'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=13
num_inference_steps=20

SEED = rp.millis() % 9999
# SEED = 5072

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[27, 388.0, 15.0], [27, 388.0, 15.0]],
    [[27, 412.0, 7.0], [27, 412.0, 7.0]],
    [[48, 421.0, 157.0], [48, 421.0, 157.0]],
    [[48, 397.0, 248.0], [48, 397.0, 248.0]],
    [[48, 329.0, 178.0], [48, 329.0, 178.0]],
    [[48, 315.0, 277.0], [48, 315.0, 277.0]],
    [[48, 330.0, 423.0], [48, 330.0, 423.0]],
    [[6, 298.0, 305.0], [6, 298.0, 305.0]],
    # [[6, 227.0, 466.0], [6, 227.0, 466.0]],

]


init_points = [
    x[0] for x in point_pairs
]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################



new_tracks = tracks + 0
# new_tracks = cotracker_tracks + 0
new_visibles = visibles + 0


# car = [0, 1, 8, 9]
# truck = [2, 3]

# def retime(tracks, visibles, *i, old_start=0, old_end=-1, new_start=0, new_end=-1):

def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks

new_tracks = partial_drift(new_tracks, 0, 1, dx = 200, dy=300-60, t_start = 0, t_end=14)
new_tracks = partial_drift(new_tracks, 2, dx = 100, dy=150-60, t_start = 0, t_end=14)
new_tracks = partial_drift(new_tracks, 3, dx = 0, dy=0, t_start = 0, t_end=14)
# new_tracks = partial_drift(new_tracks, 0, 3, dx = 5, dy=5, t_start = 0, t_end=14)
new_tracks[:,[0,1],1]-=30

# car_start=30
# new_tracks  [car_start:,car]=resize_list_linterp(new_tracks  [:,car], (49-car_start))[:T]
# new_visibles[car_start:,car]=rp.resize_list     (new_visibles[:,car], (49-car_start))[:T]
# new_visibles[:car_start,car] = 0

# new_tracks  [:,truck]=resize_list_linterp(new_tracks  [:,truck], T*2)[T:]
# new_visibles[:,truck]=rp.resize_list     (new_visibles[:,truck], T*2)[T:]


rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:

#### WORKER
TITLE = "[Failure] Stop Sign Lady"

input_video_path = "stop_sign_lady.mp4"
prompt = 'a woman holds a megaphone in one hand to the left a megaphone a sign saying "STOP!" and holds up a megaphone'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=13
num_inference_steps=40

SEED = rp.millis() % 9999
SEED = 4370

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[27, 388.0, 15.0], [27, 388.0, 15.0]],
    [[27, 412.0, 7.0], [27, 412.0, 7.0]],
    [[48, 421.0, 157.0], [48, 421.0, 157.0]],
    [[48, 397.0, 248.0], [48, 397.0, 248.0]],
    [[48, 329.0, 178.0], [48, 329.0, 178.0]],
    [[48, 315.0, 277.0], [48, 315.0, 277.0]],
    [[48, 330.0, 423.0], [48, 330.0, 423.0]],
    [[6, 298.0, 305.0], [6, 298.0, 305.0]],
    # [[6, 227.0, 466.0], [6, 227.0, 466.0]],

]


init_points = [
    x[0] for x in point_pairs
]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################



new_tracks = tracks + 0
new_visibles = visibles + 0
new_tracks, new_visibles = cotracker_tracks + 0, new_visibles+0

# new_visibles[:,[6,7]]=1
# new_visibles[:,[0,1,2]]=1 #Arm Sign
# new_tracks = partial_drift(new_tracks, 5, dx = -200, dy=0, t_start = 0, t_end=30)
new_tracks = partial_drift(new_tracks, 5, dx = -150, dy=0, t_start = 0, t_end=49)

def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks

# new_tracks = partial_drift(new_tracks, 0, 1, dx = 200, dy=300-60, t_start = 0, t_end=14)
# new_tracks = partial_drift(new_tracks, 2, dx = 100, dy=150-60, t_start = 0, t_end=14)
# new_tracks = partial_drift(new_tracks, 3, dx = 0, dy=0, t_start = 0, t_end=14)
# new_tracks[:,[0,1],1]-=30


rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
#### WORKER
TITLE = "Penguins Walk Together"

input_video_path = "penguins_walk_together.mp4"
prompt = 'two penguins walk along a rocky beach shore'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=13
num_inference_steps=50

SEED = rp.millis() % 9999
# SEED = 5072

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[0, 109.0, 249.0], [0, 109.0, 249.0]],
    [[0, 379.0, 377.0], [0, 379.0, 377.0]],
    [[0, 690.0, 285.0], [0, 690.0, 285.0]],
    [[12, 326.0, 140.0], [12, 326.0, 140.0]],
    [[12, 355.0, 239.0], [12, 355.0, 239.0]],
    [[29, 278.0, 197.0], [29, 278.0, 197.0]],
    [[29, 297.0, 270.0], [29, 297.0, 270.0]],#Pengui
   # [[7, 569.0, 168.5], [7, 569.0, 168.5]],
   #  [[7, 612.0, 163.0], [7, 612.0, 163.0]],
   #  [[7, 597.0, 197.0], [7, 597.0, 197.0]],
]


init_points = [
    x[0] for x in point_pairs
]


rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################



new_tracks = tracks + 0
# new_tracks = cotracker_tracks + 0
new_visibles = visibles + 0

def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks

camera_points = [1]
camera_deltas = tracks[:,camera_points].mean(1, keepdim=True)
camera_deltas = camera_deltas - camera_deltas[:1]

p1=[3,4,]#7,8,9]
p2=[5,6]

new_tracks  [:,p1] = resize_list_linterp((new_tracks-camera_deltas)[6:18, p1], T) + camera_deltas
new_visibles[:,p1] = rp.resize_list     (new_visibles[6:18, p1], T)
new_visibles[26:30,p2] = 0

# new_tracks[:,p1]=new_tracks[14:15,p1]
# new_visibles[:,p1]=1
# new_tracks = partial_drift(new_tracks, *p1, dx = 10, dy=50, t_start = 0, t_end=14)
# new_tracks = partial_drift(new_tracks, *p1, dx = 100, dy=160, t_start = 14, t_end=49)
# new_tracks = partial_drift(new_tracks, 4, dx = -20, dy=30, t_start = 14, t_end=49) #Bottom of penguin
# new_visibles[27:31,[5,6]] = 0 #Blocked by other penguin

# new_tracks = partial_drift(new_tracks, 2, dx = 100, dy=150-60, t_start = 0, t_end=14)
# new_tracks = partial_drift(new_tracks, 3, dx = 0, dy=0, t_start = 0, t_end=14)
# # new_tracks = partial_drift(new_tracks, 0, 3, dx = 5, dy=5, t_start = 0, t_end=14)
# new_tracks[:,[0,1],1]-=30


rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
#### WORKER
TITLE = "Knight Chases Windmill"

input_video_path = "chasing_windmills.mp4"
prompt = 'a knight chases a windmill'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=6
num_inference_steps=20

SEED = rp.millis() % 9999
# SEED = 5072

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[0, 234.0, 171.0], [0, 234.0, 171.0]], #Windmill Center
    [[32, 245.0, 206.0], [32, 245.0, 206.0]],
    [[32, 257.0, 123.0], [32, 257.0, 123.0]],
    [[32, 145.0, 193.0], [32, 145.0, 193.0]],
    [[32, 170.0, 101.0], [32, 170.0, 101.0]],
    [[32, 333.0, 158.0], [32, 333.0, 158.0]],
    [[32, 140.0, 323.0], [32, 140.0, 323.0]],
    [[32, 443.0, 260.0], [32, 443.0, 260.0]],
    [[32, 340.0, 274.0], [32, 340.0, 274.0]],
]


init_points = [
    x[0] for x in point_pairs
]


rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################



new_tracks = tracks + 0
# new_tracks = cotracker_tracks + 0
new_visibles = visibles + 0

def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks

camera_points = [0]
camera_deltas = tracks[:,camera_points].mean(1, keepdim=True)
camera_deltas = camera_deltas - camera_deltas[:1]

w=[1,2,3,4]
new_tracks  [:,w] = (new_tracks[:,w] - camera_deltas).flip(0) + camera_deltas

# new_tracks[:,p1]=new_tracks[14:15,p1]
# new_visibles[:,p1]=1
# new_tracks = partial_drift(new_tracks, *p1, dx = 10, dy=50, t_start = 0, t_end=14)
# new_tracks = partial_drift(new_tracks, *p1, dx = 100, dy=160, t_start = 14, t_end=49)
# new_tracks = partial_drift(new_tracks, 4, dx = -20, dy=30, t_start = 14, t_end=49) #Bottom of penguin
# new_visibles[27:31,[5,6]] = 0 #Blocked by other penguin

# new_tracks = partial_drift(new_tracks, 2, dx = 100, dy=150-60, t_start = 0, t_end=14)
# new_tracks = partial_drift(new_tracks, 3, dx = 0, dy=0, t_start = 0, t_end=14)
# # new_tracks = partial_drift(new_tracks, 0, 3, dx = 5, dy=5, t_start = 0, t_end=14)
# new_tracks[:,[0,1],1]-=30


rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
#### WORKER
TITLE = "Knight Chases Windmill [Slower]"

input_video_path = "chasing_windmills_slower.mp4"
prompt = 'a knight chases a windmill'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=6
num_inference_steps=20

SEED = rp.millis() % 9999
# SEED = 5072

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[0, 229.0, 169.0], [0, 229.0, 169.0]],

    [[29, 254.0, 144.0], [29, 254.0, 144.0]],
    [[29, 186.0, 111.0], [29, 186.0, 111.0]],
    [[29, 147.0, 180.0], [29, 147.0, 180.0]],
    [[20, 267.0, 186.0], [20, 267.0, 186.0]],
    
    [[0, 338.0, 140.0], [0, 338.0, 140.0]],
    [[20, 171.0, 296.0], [20, 171.0, 296.0]],
    [[20, 540.0, 229.0], [20, 540.0, 229.0]]
]


init_points = [
    x[0] for x in point_pairs
]


rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################



new_tracks = tracks + 0
# new_tracks = cotracker_tracks + 0
new_visibles = visibles + 0

def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks

camera_points = [0]
camera_deltas = tracks[:,camera_points].mean(1, keepdim=True)
camera_deltas = camera_deltas - camera_deltas[:1]

w=[1,2,3,4]
new_tracks  [:,w] = (new_tracks[:,w] - camera_deltas).flip(0) + camera_deltas

# new_tracks[:,p1]=new_tracks[14:15,p1]
# new_visibles[:,p1]=1
# new_tracks = partial_drift(new_tracks, *p1, dx = 10, dy=50, t_start = 0, t_end=14)
# new_tracks = partial_drift(new_tracks, *p1, dx = 100, dy=160, t_start = 14, t_end=49)
# new_tracks = partial_drift(new_tracks, 4, dx = -20, dy=30, t_start = 14, t_end=49) #Bottom of penguin
# new_visibles[27:31,[5,6]] = 0 #Blocked by other penguin

# new_tracks = partial_drift(new_tracks, 2, dx = 100, dy=150-60, t_start = 0, t_end=14)
# new_tracks = partial_drift(new_tracks, 3, dx = 0, dy=0, t_start = 0, t_end=14)
# # new_tracks = partial_drift(new_tracks, 0, 3, dx = 5, dy=5, t_start = 0, t_end=14)
# new_tracks[:,[0,1],1]-=30


rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
#### WORKER
TITLE = "Shakycam"

input_video_path = "shakycam.mp4"
prompt = 'a muddy puddle river next to a bunch of trees'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=12
num_inference_steps=50

SEED = rp.millis() % 9999
# SEED = 5072

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[0, 162.0, 78.0], [0, 162.0, 78.0]],
    [[0, 619.0, 342.0], [0, 619.0, 342.0]],
    [[42, 633.0, 61.0], [42, 633.0, 61.0]],
    [[42, 207.0, 100.0], [42, 207.0, 100.0]],
    [[42, 478.0, 371.0], [42, 478.0, 371.0]],
    [[48, 660.0, 135.0], [48, 660.0, 135.0]],
    [[5, 328.0, 117.0], [5, 328.0, 117.0]],
    [[48, 342.0, 114.0], [48, 342.0, 114.0]],
    [[48, 480.0, 350.0], [48, 480.0, 350.0]],
    [[48, 143.0, 291.0], [48, 143.0, 291.0]],
    [[48, 469.0, 132.0], [48, 469.0, 132.0]],
    [[15, 501.0, 55.0], [15, 501.0, 55.0]],
    [[15, 565.0, 226.0], [15, 565.0, 226.0]],
    [[15, 441.0, 298.0], [15, 441.0, 298.0]],
    [[15, 460.0, 414.0], [15, 460.0, 414.0]],
    [[15, 158.0, 151.0], [15, 158.0, 151.0]],
    [[15, 288.0, 76.5], [15, 288.0, 76.5]],
    [[15, 232.0, 306.0], [15, 232.0, 306.0]],
    [[15, 77.0, 332.0], [15, 77.0, 332.0]],
    [[3, 135.0, 118.0], [3, 135.0, 118.0]],
    [[3, 104.0, 324.0], [3, 104.0, 324.0]],
    [[3, 351.0, 405.0], [3, 351.0, 405.0]],
    [[3, 277.0, 73.0], [3, 277.0, 73.0]],
]


init_points = [
    x[0] for x in point_pairs
]


rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################



new_tracks = tracks + 0
new_tracks = cotracker_tracks + 0
new_visibles = visibles + 0

new_tracks = (new_tracks[:-5] + new_tracks[1:-4] + new_tracks[2:-3]  + new_tracks[3:-2] + new_tracks[4:-1] +  new_tracks[5:]) / 5
new_tracks = resize_list_linterp(new_tracks.contiguous(), 49)



rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
#### WORKER
TITLE = "Arms Up Spinny"

input_video_path = "ArmsUp.mp4"
prompt = 'a woman in a grape field spins around and raises her arms up'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=12
num_inference_steps=20

SEED = rp.millis() % 9999
# SEED = 5072

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[21, 469.5, 458.0], [21, 469.5, 458.0]],
    [[21, 431.0, 126.0], [21, 431.0, 126.0]],
    [[21, 547.0, 205.0], [21, 547.0, 205.0]],
    [[21, 69.0, 67.0], [21, 69.0, 67.0]],
    [[33, 432.0, 112.0], [33, 432.0, 112.0]],
    [[33, 396.0, 336.0], [33, 396.0, 336.0]],
    [[33, 262.0, 366.0], [33, 262.0, 366.0]],
    [[33, 132.0, 358.0], [33, 132.0, 358.0]],
    
    [[20, 600, 420], [33, 132.0, 358.0]]
]


init_points = [
    x[0] for x in point_pairs
]


rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################



new_tracks = tracks + 0
new_visibles = visibles + 0



def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks

visibles[:,0]=1
new_tracks = partial_drift(new_tracks, 0, dy=-100, t_start = 0, t_end=25)

new_tracks[:,[8]] = cotracker_tracks[:,[8]] + 0
visibles[:,[8]]=1



# new_tracks = (new_tracks[:-5] + new_tracks[1:-4] + new_tracks[2:-3]  + new_tracks[3:-2] + new_tracks[4:-1] +  new_tracks[5:]) / 5
# new_tracks = resize_list_linterp(new_tracks.contiguous(), 49)



rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
#### WORKER
TITLE = "Arms Up Spinny"

input_video_path = "ArmsUp.mp4"
prompt = 'a woman in a grape field spins around and raises her arms up'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=12
num_inference_steps=20

SEED = rp.millis() % 9999
# SEED = 5072

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[21, 469.5, 458.0], [21, 469.5, 458.0]],
    [[21, 431.0, 126.0], [21, 431.0, 126.0]],
    [[21, 547.0, 205.0], [21, 547.0, 205.0]],
    [[21, 69.0, 67.0], [21, 69.0, 67.0]],
    [[33, 432.0, 112.0], [33, 432.0, 112.0]],
    [[33, 396.0, 336.0], [33, 396.0, 336.0]],
    [[33, 262.0, 366.0], [33, 262.0, 366.0]],
    [[33, 132.0, 358.0], [33, 132.0, 358.0]],
    
    [[20, 600, 420], [33, 132.0, 358.0]]
]


init_points = [
    x[0] for x in point_pairs
]


rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################



new_tracks = tracks + 0
new_visibles = visibles + 0



def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks

visibles[:,0]=1
new_tracks = partial_drift(new_tracks, 0,1,2,3,4,5,6,7 , dy=-300, t_start = 0, t_end=49)

# new_tracks[:,[8]] = cotracker_tracks[:,[8]] + 0
# visibles[:,[8]]=1



# new_tracks = (new_tracks[:-5] + new_tracks[1:-4] + new_tracks[2:-3]  + new_tracks[3:-2] + new_tracks[4:-1] +  new_tracks[5:]) / 5
# new_tracks = resize_list_linterp(new_tracks.contiguous(), 49)



rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
#### WORKER
TITLE = "Candle Grab"

input_video_path = "candle_grab.MOV"
prompt = 'an arm reaches out and grabs a jar yankee candle jar on a couch with two candles on it in jars'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=12
num_inference_steps=20

SEED = rp.millis() % 9999
# SEED = 5072

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[28, 549.0, 189.0], [28, 549.0, 189.0]],
    [[28, 261.0, 258.0], [28, 261.0, 258.0]],
    [[28, 411.0, 346.0], [28, 411.0, 346.0]],
    [[32, 607.0, 363.0], [32, 607.0, 363.0]],
    [[32, 396.0, 205.0], [32, 396.0, 205.0]],
    [[32, 213.0, 242.0], [32, 213.0, 242.0]],
    [[32, 178.0, 419.0], [32, 178.0, 419.0]],
    [[32, 124.0, 68.0], [32, 124.0, 68.0]],
]


init_points = [
    x[0] for x in point_pairs
]


rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

visibles[:]=1


new_tracks = tracks + 0
new_visibles = visibles + 0



def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks
tracks[:,3]=cotracker_tracks[:,3]

visibles[:,0]=1
visibles[:26,3]=0
new_tracks = partial_drift(new_tracks, 0,1,2,3,4,5,6,7 , dy=100, dx=-100, t_start = 0, t_end=49)
# new_tracks = partial_drift(new_tracks, 2,3,4,5 , dx=320, dy=-100, t_start = 0, t_end=36)


# new_tracks[:,[8]] = cotracker_tracks[:,[8]] + 0
# visibles[:,[8]]=1



# new_tracks = (new_tracks[:-5] + new_tracks[1:-4] + new_tracks[2:-3]  + new_tracks[3:-2] + new_tracks[4:-1] +  new_tracks[5:]) / 5
# new_tracks = resize_list_linterp(new_tracks.contiguous(), 49)



rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
#### WORKER
TITLE = "Candle Grab StopCam"

input_video_path = "candle_grab.MOV"
prompt = 'an arm reaches out and grabs a jar yankee candle jar on a couch with two candles on it in jars'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=12
num_inference_steps=20

SEED = rp.millis() % 9999
# SEED = 5072

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[28, 549.0, 189.0], [28, 549.0, 189.0]],
    [[28, 261.0, 258.0], [28, 261.0, 258.0]],
    [[28, 411.0, 346.0], [28, 411.0, 346.0]],
    [[32, 607.0, 363.0], [32, 607.0, 363.0]],
    [[32, 396.0, 205.0], [32, 396.0, 205.0]],
    [[32, 213.0, 242.0], [32, 213.0, 242.0]],
    [[32, 178.0, 419.0], [32, 178.0, 419.0]],
    [[32, 124.0, 68.0], [32, 124.0, 68.0]],
]


init_points = [
    x[0] for x in point_pairs
]


rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

visibles[:]=1


new_tracks = tracks + 0
new_visibles = visibles + 0



def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks
tracks[:,3]=cotracker_tracks[:,3]

visibles[:,0]=1
visibles[:26,3]=0
ref_points = [0,7,6,1]


# new_tracks = partial_drift(new_tracks, 0,1,2,3,4,5,6,7 , dy=100, dx=-100, t_start = 0, t_end=49)
# new_tracks = partial_drift(new_tracks, 2,3,4,5 , dx=320, dy=-100, t_start = 0, t_end=36)
new_tracks = uncamera(new_tracks, new_tracks[:,ref_points], origin_frame=25)

# new_tracks[:,[8]] = cotracker_tracks[:,[8]] + 0
# visibles[:,[8]]=1



# new_tracks = (new_tracks[:-5] + new_tracks[1:-4] + new_tracks[2:-3]  + new_tracks[3:-2] + new_tracks[4:-1] +  new_tracks[5:]) / 5
# new_tracks = resize_list_linterp(new_tracks.contiguous(), 49)



rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
#### WORKER
TITLE = "Cheerleader"

input_video_path = "Cheerleader2.mp4"
prompt = 'a cheerleader raises up a red pompom happily with energy in a gym'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=12
num_inference_steps=20

SEED = rp.millis() % 9999
# SEED = 5072

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[40, 150.0, 218.0], [40, 150.0, 218.0]],
    [[40, 243.0, 360.0], [40, 243.0, 360.0]],
    [[40, 346.0, 417.0], [40, 346.0, 417.0]],
    [[40, 439.0, 223.0], [40, 439.0, 223.0]],
    [[40, 447.0, 345.0], [40, 447.0, 345.0]],
    [[40, 342.0, 119.0], [40, 342.0, 119.0]],
    [[40, 470.0, 461.0], [40, 470.0, 461.0]]
]


init_points = [
    x[0] for x in point_pairs
]


rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

#Made without the good tracker
visibles = cotracker_visibles
tracks = cotracker_tracks

new_visibles[:,1]=1
visibles[:,1]=1

new_tracks = tracks + 0
new_visibles = visibles + 0


def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks

new_tracks=partial_drift(new_tracks,0,1,dy=-160,dx=-80,t_end=30)
new_tracks=partial_drift(new_tracks,0,dy=60,dx=160,t_end=49,t_start=30)

rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
#### WORKER
TITLE = "Cheerleader Two"

input_video_path = "Cheerleader1.mp4"
prompt = 'a cheerleader raises up a red pompom happily with energy in a gym'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=12
num_inference_steps=20

SEED = rp.millis() % 9999
SEED = 4409

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[42, 269.0, 365.0], [42, 269.0, 365.0]],
    [[42, 461.0, 369.0], [42, 461.0, 369.0]],
    [[42, 366.0, 144.0], [42, 366.0, 144.0]],
    [[42, 370.0, 315.0], [42, 370.0, 315.0]],
    # [[42, 417.0, 215.0], [42, 417.0, 215.0]],
    # [[42, 319.0, 216.5], [42, 319.0, 216.5]],
    [[2, 225.0, 466.0], [2, 225.0, 466.0]],
    [[2, 663.0, 463.0], [2, 663.0, 463.0]],

    [[43, 668.0, 335.0], [43, 668.0, 335.0]],
    [[43, 19.0, 360.0], [43, 19.0, 360.0]]
]

init_points = [
    x[0] for x in point_pairs
]


rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

#Made without the good tracker
visibles = cotracker_visibles
tracks = cotracker_tracks

new_tracks = tracks + 0
new_visibles = visibles + 0

new_visibles[:,1]=1
visibles[:,1]=1



def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks

new_tracks=partial_drift(new_tracks,1,dy=-300,dx=30,t_start=20,t_end=41)
# new_tracks=partial_drift(new_tracks,0,1,dy=-300,dx=0,t_start=20,t_end=41)
# new_tracks=partial_drift(new_tracks,0,dy=60,dx=160,t_end=49,t_start=30)
new_tracks[:,[0,1,2,3,6],0]+=5

def restore_tracks(new_tracks, tracks,  *i, t_start, t_end):
    for j in i:
        dx,dy=tracks[t_end,j] - new_tracks[t_end,j]
        new_tracks = partial_drift(new_tracks, j, dx=dx,dy=dy, t_start=t_start, t_end=t_end)
    return new_tracks

new_tracks = restore_tracks(new_tracks, tracks, 2,4,5,6,7, t_start=40, t_end=48)

rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
#### WORKER
TITLE = "Kids Racing"

input_video_path = "kids_racing.mp4"
prompt = 'kids racing and running on a road with trees on either side and the girl in the red shirt pulls ahead and wins the race'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=12
num_inference_steps=20

SEED = rp.millis() % 9999
# SEED = 5072

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs = [
    [[21, 267.0, 275.0], [21, 267.0, 275.0]],
    [[21, 521.0, 181.0], [21, 521.0, 181.0]],
    [[21, 397.0, 226.0], [21, 397.0, 226.0]],
    [[21, 549.0, 71.0], [21, 549.0, 71.0]],
    [[21, 143.0, 120.0], [21, 143.0, 120.0]],
    [[21, 288.0, 386.0], [21, 288.0, 386.0]],
    [[21, 516.0, 243.0], [21, 516.0, 243.0]],
    # [[21, 435.0, 200.0], [21, 435.0, 200.0]],
    # [[21, 368.0, 184.0], [21, 368.0, 184.0]],
    # [[21, 322.0, 189.0], [21, 322.0, 189.0]],
    # [[21, 272.0, 189.0], [21, 272.0, 189.0]]
]


init_points = [
    x[0] for x in point_pairs
]


rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

#Made without the good tracker
visibles = cotracker_visibles
tracks = cotracker_tracks

new_visibles[:,1]=1
visibles[:,1]=1

new_tracks = tracks + 0
new_visibles = visibles + 0


def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks

# new_tracks=partial_drift(new_tracks,0,1,dy=-160,dx=-80,t_end=30)
# new_tracks=partial_drift(new_tracks,0,dy=60,dx=160,t_end=49,t_start=30)

# new_tracks  [:,[0,5]] = resize_list_linterp(new_tracks  [:,[0,5]], 100)[:T]
# new_visibles[:,[0,5]] = rp.resize_list     (new_visibles[:,[0,5]], 100)[:T]

FASTKID=[1,6]
new_tracks  [:,[FASTKID]] = resize_list_linterp(new_tracks  [:,[FASTKID]], 100)[:T]
new_visibles[:,[FASTKID]] = rp.resize_list     (new_visibles[:,[FASTKID]], 100)[:T]


rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
# # del pipe

# #### WORKER
# TITLE = "Splash"

# input_video_path = "spash_slow.mp4"
# prompt = 'kids jumping in a pool and the dad reaches out to catch him as he splashes'

# latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
# guidance_scale=12
# num_inference_steps=39

# SEED = rp.millis() % 9999
# # SEED = 5072

# rp.seed_all(SEED)
# TITLE = f'[Seed {SEED}] {TITLE}'
# init_input_video()

# ##############################

# #In TXY form

# point_pairs =[
#     [[0, 135.0, 156.0], [0, 135.0, 156.0]],
#     # [[0, 135.0, 234.0], [0, 135.0, 234.0]],
#     [[0, 324.0, 189.0], [0, 324.0, 189.0]],
#     # [[0, 336.0, 273.0], [0, 336.0, 273.0]],
#     [[0, 444.5, 173.0+40], [0, 444.5, 173.0]],
#     [[0, 447.0, 277.0], [0, 447.0, 277.0]],
#     [[30, 312.0, 381.0], [30, 312.0, 381.0]],
#     # [[30, 149.0, 324.0], [30, 149.0, 324.0]],
#     [[30, 616.0, 110.0], [30, 616.0, 110.0]],
#     # [[30, 100.0, 114.0-40], [30, 109.0, 114.0]]
# ]


# init_points = [
#     x[0] for x in point_pairs
# ]

# K1=[0]
# K2=[1]#3
# K3=[2,3,4]#5
# CAM=[5,6]

# rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

# init_tracks()

# ##############################

# #Made without the good tracker
# visibles = cotracker_visibles
# tracks = cotracker_tracks

# new_tracks = tracks + 0
# new_visibles = visibles + 0

# new_visibles[:,1]=1
# visibles[:,1]=1



# def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
#     t_delta = t_end-t_start
#     tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
#     tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
#     return tracks

# # new_tracks=partial_drift(new_tracks,0,1,dy=-160,dx=-80,t_end=30)
# # new_tracks=partial_drift(new_tracks,0,dy=60,dx=160,t_end=49,t_start=30)

# # new_tracks  [:,[0,5]] = resize_list_linterp(new_tracks  [:,[0,5]], 100)[:T]
# # new_visibles[:,[0,5]] = rp.resize_list     (new_visibles[:,[0,5]], 100)[:T]


# new_tracks = uncamera(new_tracks, tracks[:,CAM])

# # new_tracks  [:,K3] = resize_list_linterp(new_tracks  [:,K3], 100)[:T]
# # new_visibles[:,K3] = rp.resize_list     (new_visibles[:,K3], 100)[:T]

# new_tracks  [:,K3+K1] = new_tracks  [:1,K3+K1]
# new_visibles[:,K3+K1] = new_visibles[:1,K3+K1]

# new_tracks  [:,K2] = resize_list_linterp(new_tracks  [20:,K2], T)
# new_visibles[:,K2] = rp.resize_list     (new_visibles[20:,K2], T)

# new_tracks = recamera(new_tracks, tracks[:,CAM])

# rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
# display_tracks_diff()

# ##############################

# init_blob_videos()
# init_sample()
# init_mp4_files()
# do_diffusion()
# save_diffusion_results()
# display_tracks_diff(output_video[:,:,:,:3])
# rp.display_video(output_video,framerate=15)

In [None]:
# del pipe

#### WORKER
TITLE = "Kittycat Fish"

input_video_path = "KittyFish.mp4"
prompt = 'the cat turns around and walks away. a cat watches a goldfish in a bowl as the goldfish swims around. the cat then gets up and walks away. The cat walks away. the cat gets up and walks away from the fish.'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=16
num_inference_steps=30

SEED = rp.millis() % 9999
SEED = 2

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs =[
    [[0, 441.0, 176.0], [0, 441.0, 176.0]],
    [[0, 95.0, 308.0], [0, 95.0, 308.0]],
    [[0, 58.0, 400.5], [0, 58.0, 400.5]],
    [[0, 522.0, 294.0], [0, 522.0, 294.0]],
    [[6, 219.0, 275.0], [6, 219.0, 275.0]],
    [[22, 254.0, 210.0], [22, 254.0, 210.0]],
    [[39, 315.0, 251.0], [39, 315.0, 251.0]],
    [[17, 136.0, 205.0], [17, 136.0, 205.0]]
]


init_points = [
    x[0] for x in point_pairs
]

# K1=[0]
# K2=[1]#3
# K3=[2,3,4]#5
# CAM=[5,6]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

#Made without the good tracker - keep things repeatable
visibles = cotracker_visibles
tracks = cotracker_tracks

new_tracks = tracks + 0
new_visibles = visibles + 0

new_visibles[:,1]=1
visibles[:,1]=1



def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks

# new_tracks=partial_drift(new_tracks,0,1,dy=-160,dx=-80,t_end=30)
# new_tracks=partial_drift(new_tracks,0,dy=60,dx=160,t_end=49,t_start=30)

new_tracks = partial_drift(new_tracks, 0, dx=100, t_start=0, t_end=T)
new_tracks = partial_drift(new_tracks, 0,3, dx=200, t_start=20, t_end=T)

# new_tracks  [:,[0,5]] = resize_list_linterp(new_tracks  [:,[0,5]], 100)[:T]
# new_visibles[:,[0,5]] = rp.resize_list     (new_visibles[:,[0,5]], 100)[:T]


# new_tracks = uncamera(new_tracks, tracks[:,CAM])

# new_tracks  [:,K3] = resize_list_linterp(new_tracks  [:,K3], 100)[:T]
# new_visibles[:,K3] = rp.resize_list     (new_visibles[:,K3], 100)[:T]

# new_tracks  [:,K3+K1] = new_tracks  [:1,K3+K1]
# new_visibles[:,K3+K1] = new_visibles[:1,K3+K1]

# new_tracks  [:,K2] = resize_list_linterp(new_tracks  [20:,K2], T)
# new_visibles[:,K2] = rp.resize_list     (new_visibles[20:,K2], T)

# new_tracks = recamera(new_tracks, tracks[:,CAM])

rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
# del pipe

#### WORKER
TITLE = "Bichon + Corgi : Bichon Stays Behind"

input_video_path = "Borgi.mp4"
prompt = 'A bichon frise stays behind, while a corgi runs on the sidewalk, chasing a bone.'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=16
num_inference_steps=20

SEED = rp.millis() % 9999
# SEED = 2

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs =[
    [[0, 215.0, 171.0], [0, 215.0, 171.0]],
    [[0, 430.0, 302.0], [0, 430.0, 302.0]],
    [[48, 180.0, 152.0], [48, 180.0, 152.0]],
    [[48, 375.0, 432.0], [48, 375.0, 432.0]],
    [[48, 340.0, 180.0], [48, 340.0, 180.0]],
    [[14, 612.0, 160.0], [14, 612.0, 160.0]],
    [[14, 650.0, 292.0], [14, 650.0, 292.0]]
]


init_points = [
    x[0] for x in point_pairs
]

# K1=[0]
# K2=[1]#3
# K3=[2,3,4]#5
# CAM=[5,6]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

#Made without the good tracker - keep things repeatable
visibles = cotracker_visibles
tracks = cotracker_tracks



new_tracks = tracks + 0
new_visibles = visibles + 0

new_tracks += torch.randn_like(new_tracks) * 2

new_visibles[:,1]=1
visibles[:,1]=1

new_visibles[25:,[2]]=0


def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks

# new_tracks=partial_drift(new_tracks,0,1,dy=-160,dx=-80,t_end=30)
# new_tracks=partial_drift(new_tracks,0,dy=60,dx=160,t_end=49,t_start=30)

# new_tracks = partial_drift(new_tracks, 0, dx=100, t_start=0, t_end=T)
# new_tracks = partial_drift(new_tracks, 0,3, dx=200, t_start=20, t_end=T)
new_tracks = partial_drift(new_tracks, 0, dx=-200, dy=-200, t_start=25, t_end=T)

# new_tracks  [:,[0,5]] = resize_list_linterp(new_tracks  [:,[0,5]], 100)[:T]
# new_visibles[:,[0,5]] = rp.resize_list     (new_visibles[:,[0,5]], 100)[:T]


# new_tracks = uncamera(new_tracks, tracks[:,CAM])

# new_tracks  [:,K3] = resize_list_linterp(new_tracks  [:,K3], 100)[:T]
# new_visibles[:,K3] = rp.resize_list     (new_visibles[:,K3], 100)[:T]

# new_tracks  [:,K3+K1] = new_tracks  [:1,K3+K1]
# new_visibles[:,K3+K1] = new_visibles[:1,K3+K1]

# new_tracks  [:,K2] = resize_list_linterp(new_tracks  [20:,K2], T)
# new_visibles[:,K2] = rp.resize_list     (new_visibles[20:,K2], T)

# new_tracks = recamera(new_tracks, tracks[:,CAM])

rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
# del pipe

#### WORKER
TITLE = "Bichon + Corgi : Bichon Stay Behind"

input_video_path = "Borgi.mp4"
prompt = 'A bichon frise and a corgi run on the sidewalk, chasing a bone. the bichon breaks to the left.'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=16
num_inference_steps=20

SEED = rp.millis() % 9999
SEED = 7945

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs =[
    [[0, 215.0, 171.0], [0, 215.0, 171.0]],
    [[0, 430.0, 302.0], [0, 430.0, 302.0]],
    [[48, 180.0, 152.0], [48, 180.0, 152.0]],
    [[48, 375.0, 432.0], [48, 375.0, 432.0]],
    [[48, 340.0, 180.0], [48, 340.0, 180.0]],
    [[14, 612.0, 160.0], [14, 612.0, 160.0]],
    [[14, 650.0, 292.0], [14, 650.0, 292.0]],

    # [[41, 363.0, 450.0], [41, 363.0, 450.0]],
    # [[34, 383.0, 447.0], [34, 383.0, 447.0]],
]


init_points = [
    x[0] for x in point_pairs
]

# K1=[0]
# K2=[1]#3
# K3=[2,3,4]#5
# CAM=[5,6]

STAY = [0]
CAM=[2,5,6]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

#Made without the good tracker - keep things repeatable
visibles = cotracker_visibles
tracks = cotracker_tracks



new_tracks = tracks + 0
new_visibles = visibles + 0

new_tracks += torch.randn_like(new_tracks) * 3.5

new_visibles[:,1]=1
visibles[:,1]=1

new_visibles[25:,[2]]=0


def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks

# new_tracks=partial_drift(new_tracks,0,1,dy=-160,dx=-80,t_end=30)
# new_tracks=partial_drift(new_tracks,0,dy=60,dx=160,t_end=49,t_start=30)

# new_tracks = partial_drift(new_tracks, 0, dx=100, t_start=0, t_end=T)
# new_tracks = partial_drift(new_tracks, 0,3, dx=200, t_start=20, t_end=T)
# new_tracks = partial_drift(new_tracks, 0, dx=-200, dy=-200, t_start=25, t_end=T)
# 
# new_tracks  [:,[0,5]] = resize_list_linterp(new_tracks  [:,[0,5]], 100)[:T]
# new_visibles[:,[0,5]] = rp.resize_list     (new_visibles[:,[0,5]], 100)[:T]


new_tracks[:,STAY] = uncamera(new_tracks[:,STAY], tracks[:,CAM])
new_tracks[:,STAY] = rp.blend(new_tracks[:1,STAY], new_tracks[:,STAY], 0)
new_tracks[:,STAY] = recamera(new_tracks[:,STAY], tracks[:,CAM])

# new_tracks  [:,K3] = resize_list_linterp(new_tracks  [:,K3], 100)[:T]
# new_visibles[:,K3] = rp.resize_list     (new_visibles[:,K3], 100)[:T]

# new_tracks  [:,K3+K1] = new_tracks  [:1,K3+K1]
# new_visibles[:,K3+K1] = new_visibles[:1,K3+K1]

# new_tracks  [:,K2] = resize_list_linterp(new_tracks  [20:,K2], T)
# new_visibles[:,K2] = rp.resize_list     (new_visibles[20:,K2], T)

# new_tracks = recamera(new_tracks, tracks[:,CAM])

rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
# del pipe

#### WORKER
TITLE = "Bichon + Corgi : Corgi Stay Behind"

input_video_path = "Borgi.mp4"
prompt = 'A bichon frise and a corgi run on the sidewalk, chasing a bone. the bichon breaks to the left.'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=16
num_inference_steps=20

SEED = rp.millis() % 9999
# SEED = 7945
SEED = 9995

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs =[
    [[0, 215.0, 171.0], [0, 215.0, 171.0]],
    [[0, 430.0, 302.0], [0, 430.0, 302.0]],
    [[48, 180.0, 152.0], [48, 180.0, 152.0]],
    [[48, 375.0, 432.0], [48, 375.0, 432.0]],
    [[48, 340.0, 180.0], [48, 340.0, 180.0]],
    [[14, 612.0, 160.0], [14, 612.0, 160.0]],
    [[14, 650.0, 292.0], [14, 650.0, 292.0]],

#     [[41, 363.0, 450.0], [41, 363.0, 450.0]],
#     [[34, 383.0, 447.0], [34, 383.0, 447.0]],
]


init_points = [
    x[0] for x in point_pairs
]

# K1=[0]
# K2=[1]#3
# K3=[2,3,4]#5
# CAM=[5,6]

STAY = [1]
CAM=[2,5,6]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

#Made without the good tracker - keep things repeatable
visibles = cotracker_visibles
tracks = cotracker_tracks



new_tracks = tracks + 0
new_visibles = visibles + 0

new_tracks += torch.randn_like(new_tracks) * 1

new_visibles[:,1]=1
visibles[:,1]=1

new_visibles[25:,[2]]=0


def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks

# new_tracks=partial_drift(new_tracks,0,1,dy=-160,dx=-80,t_end=30)
# new_tracks=partial_drift(new_tracks,0,dy=60,dx=160,t_end=49,t_start=30)

# new_tracks = partial_drift(new_tracks, 0, dx=100, t_start=0, t_end=T)
# new_tracks = partial_drift(new_tracks, 0,3, dx=200, t_start=20, t_end=T)
# new_tracks = partial_drift(new_tracks, 0, dx=-200, dy=-200, t_start=25, t_end=T)
# 
# new_tracks  [:,[0,5]] = resize_list_linterp(new_tracks  [:,[0,5]], 100)[:T]
# new_visibles[:,[0,5]] = rp.resize_list     (new_visibles[:,[0,5]], 100)[:T]


new_tracks[:,STAY] = uncamera(new_tracks[:,STAY], tracks[:,CAM])
new_tracks[:,STAY] = rp.blend(new_tracks[:1,STAY], new_tracks[:,STAY], 0)
new_tracks[:,STAY] = recamera(new_tracks[:,STAY], tracks[:,CAM])

# new_tracks  [:,K3] = resize_list_linterp(new_tracks  [:,K3], 100)[:T]
# new_visibles[:,K3] = rp.resize_list     (new_visibles[:,K3], 100)[:T]

# new_tracks  [:,K3+K1] = new_tracks  [:1,K3+K1]
# new_visibles[:,K3+K1] = new_visibles[:1,K3+K1]

# new_tracks  [:,K2] = resize_list_linterp(new_tracks  [20:,K2], T)
# new_visibles[:,K2] = rp.resize_list     (new_visibles[20:,K2], T)

# new_tracks = recamera(new_tracks, tracks[:,CAM])

rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
# del pipe

#### WORKER
TITLE = "Blacks Swan Go Faster"

input_video_path = "blackswan.mp4"
prompt = ''

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=6
num_inference_steps=20

SEED = rp.millis() % 9999
# SEED = 7945
SEED = 9995

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs =[
    [[0, 180.0, 305.0], [0, 180.0, 305.0]],
    [[0, 416.0, 316.0], [0, 416.0, 316.0]],
    [[0, 364.0, 98.0], [0, 364.0, 98.0]],
    [[0, 541.0, 18.0], [0, 541.0, 18.0]],
    [[25, 361.0, 44.0], [25, 361.0, 44.0]],
    [[42, 423.0, 37.0], [42, 423.0, 37.0]],
    [[48, 612.0, 39.0], [48, 612.0, 39.0]]
]


init_points = [
    x[0] for x in point_pairs
]

# K1=[0]
# K2=[1]#3
# K3=[2,3,4]#5
# CAM=[5,6]

STAY = [1]
CAM=[3]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

#Made without the good tracker - keep things repeatable
visibles = cotracker_visibles
tracks = cotracker_tracks



new_tracks = tracks + 0
new_visibles = visibles + 0

new_tracks += torch.randn_like(new_tracks) * 1

new_visibles[:,1]=1
visibles[:,1]=1

# new_visibles[25:,[2]]=0


def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks

new_tracks=partial_drift(new_tracks,0,1,2,dy=-60,dx=300,t_end=T)
# new_tracks=partial_drift(new_tracks,0,dy=60,dx=160,t_end=49,t_start=30)

# new_tracks = partial_drift(new_tracks, 0, dx=100, t_start=0, t_end=T)
# new_tracks = partial_drift(new_tracks, 0,3, dx=200, t_start=20, t_end=T)
# new_tracks = partial_drift(new_tracks, 0, dx=-200, dy=-200, t_start=25, t_end=T)
# 
# new_tracks  [:,[0,5]] = resize_list_linterp(new_tracks  [:,[0,5]], 100)[:T]
# new_visibles[:,[0,5]] = rp.resize_list     (new_visibles[:,[0,5]], 100)[:T]


# new_tracks[:,STAY] = uncamera(new_tracks[:,STAY], tracks[:,CAM])
# new_tracks[:,STAY] = rp.blend(new_tracks[:1,STAY], new_tracks[:,STAY], 0)
# new_tracks[:,STAY] = recamera(new_tracks[:,STAY], tracks[:,CAM])

# new_tracks  [:,K3] = resize_list_linterp(new_tracks  [:,K3], 100)[:T]
# new_visibles[:,K3] = rp.resize_list     (new_visibles[:,K3], 100)[:T]

# new_tracks  [:,K3+K1] = new_tracks  [:1,K3+K1]
# new_visibles[:,K3+K1] = new_visibles[:1,K3+K1]

# new_tracks  [:,K2] = resize_list_linterp(new_tracks  [20:,K2], T)
# new_visibles[:,K2] = rp.resize_list     (new_visibles[20:,K2], T)

# new_tracks = recamera(new_tracks, tracks[:,CAM])

rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
# del pipe

#### WORKER
TITLE = "Blacks Freeze Camera"

input_video_path = "blackswan.mp4"
prompt = 'A black swan swims through a river'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=3
num_inference_steps=20

SEED = rp.millis() % 9999
# SEED = 7945
# SEED = 9995

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs =[
    [[0, 180.0, 305.0], [0, 180.0, 305.0]],
    [[0, 416.0, 316.0], [0, 416.0, 316.0]],
    [[0, 364.0, 98.0], [0, 364.0, 98.0]],
    [[0, 541.0, 18.0], [0, 541.0, 18.0]],
    [[25, 361.0, 44.0], [25, 361.0, 44.0]],
    [[42, 423.0, 37.0], [42, 423.0, 37.0]],
    [[48, 612.0, 39.0], [48, 612.0, 39.0]],
    [[9, 650.5, 268.5], [9, 650.5, 268.5]],
    [[19, 631.0, 124.0], [19, 631.0, 124.0]],
    [[30, 297.0, 163.0], [30, 297.0, 163.0]],
    [[30, 64.0, 194.0], [30, 64.0, 194.0]],
    [[30, 646.0, 112.0], [30, 646.0, 112.0]],
    [[40, 641.0, 213.0], [40, 641.0, 213.0]],
    [[40, 273.0, 450.0], [40, 273.0, 450.0]],
    [[40, 165.0, 252.0], [40, 165.0, 252.0]],
    [[40, 44.0, 352.0], [40, 44.0, 352.0]],
    [[40, 495.0, 310.0], [40, 495.0, 310.0]],
    [[16, 511.0, 335.0], [16, 511.0, 335.0]],
    [[16, 603.0, 421.0], [16, 603.0, 421.0]],
    [[16, 78.0, 449.0], [16, 78.0, 449.0]],
    [[16, 307.0, 326.0], [16, 307.0, 326.0]],
    [[16, 219.0, 369.0], [16, 219.0, 369.0]],
    [[16, 371.0, 350.0], [16, 371.0, 350.0]],
    [[16, 448.5, 301.5], [16, 448.5, 301.5]],
    [[16, 417.0, 224.0], [16, 417.0, 224.0]],
    [[16, 399.0, 142.0], [16, 399.0, 142.0]],
]


init_points = [
    x[0] for x in point_pairs
]

SWAN = [0,1,2,20,21,22,23,24,25]
BG=[3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]
CAM=[3]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

#Made without the good tracker - keep things repeatable
visibles = cotracker_visibles
tracks = cotracker_tracks

new_tracks = tracks + 0
new_visibles = visibles + 0


new_visibles[:,1]=1
visibles[:,1]=1

def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks

new_tracks[:,:] = uncamera(new_tracks[:,:], tracks[:,CAM], T-1)

new_tracks[:,BG] = new_tracks[-1:,BG]
# new_visibles[:,BG]=1

# new_tracks += torch.randn_like(new_tracks) * 1

# new_tracks[:,SWAN] = rp.blend(new_tracks[:1,SWAN], new_tracks[:,STAY], 0)
# new_tracks[:,SWAN] = recamera(new_tracks[:,SWAN], tracks[:,CAM])

rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
# del pipe

#### WORKER
TITLE = "Blacks Freeze Camera"

input_video_path = "blackswan.mp4"
prompt = 'A black swan swims through a river'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=0
num_inference_steps=20

SEED = rp.millis() % 9999
# SEED = 7945
# SEED = 9995
SEED = 1515

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs =[
    [[0, 180.0, 305.0], [0, 180.0, 305.0]],
    [[0, 416.0, 316.0], [0, 416.0, 316.0]],
    [[0, 364.0, 98.0], [0, 364.0, 98.0]],
    [[0, 541.0, 18.0], [0, 541.0, 18.0]],
    [[25, 361.0, 44.0], [25, 361.0, 44.0]],
    [[42, 423.0, 37.0], [42, 423.0, 37.0]],
    [[48, 612.0, 39.0], [48, 612.0, 39.0]],
    [[9, 650.5, 268.5], [9, 650.5, 268.5]],
    [[19, 631.0, 124.0], [19, 631.0, 124.0]],
    [[30, 297.0, 163.0], [30, 297.0, 163.0]],
    [[30, 64.0, 194.0], [30, 64.0, 194.0]],
    [[30, 646.0, 112.0], [30, 646.0, 112.0]],
    [[40, 641.0, 213.0], [40, 641.0, 213.0]],
    [[40, 273.0, 450.0], [40, 273.0, 450.0]],
    [[40, 165.0, 252.0], [40, 165.0, 252.0]],
    [[40, 44.0, 352.0], [40, 44.0, 352.0]],
    [[40, 495.0, 310.0], [40, 495.0, 310.0]],
    [[16, 511.0, 335.0], [16, 511.0, 335.0]],
    [[16, 603.0, 421.0], [16, 603.0, 421.0]],
    [[16, 78.0, 449.0], [16, 78.0, 449.0]],
    [[16, 307.0, 326.0], [16, 307.0, 326.0]],
    [[16, 219.0, 369.0], [16, 219.0, 369.0]],
    [[16, 371.0, 350.0], [16, 371.0, 350.0]],
    [[16, 448.5, 301.5], [16, 448.5, 301.5]],
    [[16, 417.0, 224.0], [16, 417.0, 224.0]],
    [[16, 399.0, 142.0], [16, 399.0, 142.0]],
]


init_points = [
    x[0] for x in point_pairs
]

SWAN = [0,1,2,20,21,22,23,24,25]
BG=[3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]
CAM=[3]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

#Made without the good tracker - keep things repeatable
visibles = cotracker_visibles
tracks = cotracker_tracks

new_tracks = tracks + 0
new_visibles = visibles + 0


new_visibles[:,1]=1
visibles[:,1]=1

def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks

new_tracks[:,:] = uncamera(new_tracks[:,:], tracks[:,CAM], T-1)

new_tracks[:,BG] = new_tracks[-1:,BG]

#ZOOM OUT
mx=600
my=250
deltas = new_tracks+0
deltas[:,:,0]-=mx
deltas[:,:,1]-=my
deltas*=.65
deltas[:,:,0]+=mx
deltas[:,:,1]+=my
new_tracks=deltas

#End on visible
for bg in BG:
    for i in reversed(range(T)):
        v=new_visibles[i,bg]
        x,y=tracks[i,bg]
        if not v and x<0 or y<0 or x>=W or y>=H:
            #MAKE OUT OF BOUNDS VISIBLE
            new_visibles[i,bg]=1

        
# new_visibles[:,BG]=1

# new_tracks += torch.randn_like(new_tracks) * 1

# new_tracks[:,SWAN] = rp.blend(new_tracks[:1,SWAN], new_tracks[:,STAY], 0)
# new_tracks[:,SWAN] = recamera(new_tracks[:,SWAN], tracks[:,CAM])

rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
# del pipe

#### WORKER
TITLE = "City Biker"

input_video_path = "CityBiker.mp4"
prompt = 'An aerial view of a bicyclist in a blue shirt a rides the bike viewed from above. '#It starts with the biker on the left, with three cars on the right. As the video goes on, the cars drive out of view of the camera and the biker is still in frame'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=20
num_inference_steps=20

SEED = rp.millis() % 9999
# SEED = 7945
# SEED = 9995
# SEED = 1515

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs =[
    [[0, 17.0, 368.0], [0, 17.0, 368.0]],
    [[35, 124.0, 398.0], [35, 124.0, 398.0]],

    [[48, 267.0, 384.0], [48, 267.0, 384.0]],
    [[48, 270.0, 433.0], [48, 270.0, 433.0]],
    [[29, 24.0, 433.0], [29, 24.0, 433.0]],

    
    [[11, 355.0, 206.0], [11, 355.0, 206.0]],
    [[11, 462.0, 106.0], [11, 462.0, 106.0]],
    [[48, 74.0, 255.0], [48, 74.0, 255.0]],
    [[48, 265.0, 259.0], [48, 265.0, 259.0]],
    [[48, 625.0, 325.0], [48, 625.0, 325.0]],
    [[5, 458.0, 212.0], [5, 458.0, 212.0]],

    #EXTRAS
    [[5, 525.0, 102.0], [5, 525.0, 102.0]],
    [[5, 313.0, 355.0], [5, 313.0, 355.0]],
    [[5, 112.0, 118.0], [5, 112.0, 118.0]],
    # [[5, 91.0, 236.0], [5, 91.0, 236.0]],
    [[26, 169.5, 164.0], [26, 169.5, 164.0]],
    [[26, 107.5, 297.0], [26, 107.5, 297.0]],
    [[26, 657.0, 322.0], [26, 657.0, 322.0]],
    # [[26, 627.0, 57.0], [26, 627.0, 57.0]],
    [[26, 500.0, 68.0], [26, 500.0, 68.0]],
    [[26, 23.0, 101.0], [26, 23.0, 101.0]],
    [[26, 153.0, 63.0], [26, 153.0, 63.0]],
    [[26, 422.0, 54.0], [26, 422.0, 54.0]],
    [[26, 497.0, 218.0], [26, 497.0, 218.0]],
    # [[14, 537.0, 265.0], [14, 537.0, 265.0]],
    [[10, 593.0, 360.0], [10, 593.0, 360.0]],
    [[10, 485.0, 243.0], [10, 485.0, 243.0]],
    [[13, 446.0, 222.0], [13, 446.0, 222.0]],
    # [[30, 561.5, 240.0], [30, 561.5, 240.0]],
    # [[14, 454.0, 216.0], [14, 454.0, 216.0]],
    # [[33, 237.0, 83.0], [33, 237.0, 83.0]],
    # [[48, 105.0, 287.0], [48, 105.0, 287.0]],
    # [[48, 343.0, 341.0], [48, 343.0, 341.0]]
]


init_points = [
    x[0] for x in point_pairs
]

# CAM=[3]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

#Made without the good tracker - keep things repeatable
visibles = cotracker_visibles
tracks = cotracker_tracks

new_tracks = tracks + 0
new_visibles = visibles + 0


# new_visibles[:,1]=1
# visibles[:,1]=1

def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks


BIKER=[0,1,2,3,4]


new_visibles[:,BIKER]=1

new_tracks[:,BIKER,0] += 100 #Shift biker to the right
new_tracks = partial_drift(new_tracks, *BIKER, dx=100, dy=0, t_start=0, t_end=43)
#Make them less spaced out
# new_tracks[:33,BIKER] = (new_tracks[33:34,BIKER] - new_tracks[33:34,BIKER].mean(1,keepdim=True)) + new_tracks[:33,BIKER].mean(1,keepdim=True)
new_tracks = partial_drift(new_tracks, 0, dx=-30, dy=0, t_start=0, t_end=43) #Parallax
new_tracks = partial_drift(new_tracks, *BIKER, dx=-60, dy=0, t_start=28, t_end=T) #Dont go too far
new_tracks = partial_drift(new_tracks, *BIKER, dx=0, dy=-100, t_start=0, t_end=T)

new_tracks[:,BIKER,0] += 150 #Shift biker to the right
new_tracks = partial_drift(new_tracks, *BIKER, dx=-150, dy=0, t_start=21, t_end=39)

# new_tracks += torch.randn_like(new_tracks) * .125

# new_tracks[:,:] = uncamera(new_tracks[:,:], tracks[:,CAM], T-1)

# new_tracks[:,BG] = new_tracks[-1:,BG]

# #ZOOM OUT
# mx=600
# my=250
# deltas = new_tracks+0
# deltas[:,:,0]-=mx
# deltas[:,:,1]-=my
# deltas*=.65
# deltas[:,:,0]+=mx
# deltas[:,:,1]+=my
# new_tracks=deltas

# #End on visible
# for bg in BG:
#     for i in reversed(range(T)):
#         v=new_visibles[i,bg]
#         x,y=tracks[i,bg]
#         if not v and x<0 or y<0 or x>=W or y>=H:
#             #MAKE OUT OF BOUNDS VISIBLE
#             new_visibles[i,bg]=1

rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)

In [None]:
# del pipe

#### WORKER
TITLE = "[FAILURE] Spinning Ballerina"

input_video_path = "MakeNoSpin.mp4"
prompt = 'A spinning ballerina '#It starts with the biker on the left, with three cars on the right. As the video goes on, the cars drive out of view of the camera and the biker is still in frame'

latent_conditioning_dropout = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
guidance_scale=5
num_inference_steps=20

SEED = rp.millis() % 9999
# SEED = 7945
# SEED = 9995
# SEED = 1515

rp.seed_all(SEED)
TITLE = f'[Seed {SEED}] {TITLE}'
init_input_video()

##############################

#In TXY form

point_pairs =[
    [[0, 103.5, 292.0], [0, 103.5, 292.0]],
    [[0, 453.0, 448.0], [0, 453.0, 448.0]],
    [[0, 469.0, 207.0], [0, 469.0, 207.0]],
    [[29, 164.0, 406.0], [29, 164.0, 406.0]],
    [[29, 223.0, 38.0], [29, 223.0, 38.0]],
    [[29, 354.0, 117.0], [29, 354.0, 117.0]],
    [[29, 541.0, 247.0], [29, 541.0, 247.0]],
    [[37, 383.0, 20.0], [37, 383.0, 20.0]],
    [[37, 414.0, 106.0], [37, 414.0, 106.0]],
    [[37, 448.0, 257.0], [37, 448.0, 257.0]],
    [[44, 528.0, 41.0], [44, 528.0, 41.0]],
    [[44, 478.0, 225.0], [44, 478.0, 225.0]],

   [[44, 478.0, 225.0], [44, 478.0, 225.0]],
    [[9, 375.0, 441.0], [9, 375.0, 441.0]],
    [[19, 103.0, 438.0], [19, 103.0, 438.0]],
    [[30, 106.5, 438.0], [30, 106.5, 438.0]],
    [[41, 162.0, 240.0], [41, 162.0, 240.0]],
    [[41, 669.0, 215.0], [41, 669.0, 215.0]],
    [[44, 101.0, 285.0], [44, 101.0, 285.0]],
]


init_points = [
    x[0] for x in point_pairs
]

CAM=[13, 1]

rp.display_dict(rp.gather_vars('input_video_path prompt init_points'))

init_tracks()

##############################

#Made without the good tracker - keep things repeatable
visibles = cotracker_visibles
tracks = cotracker_tracks

new_tracks = tracks + 0
new_visibles = visibles + 0


# new_visibles[:,1]=1
# visibles[:,1]=1

def partial_drift(tracks, *i, dx=0, dy=0, t_start=0, t_end=0):
    t_delta = t_end-t_start
    tracks = add_drift(tracks, *i, dx=dx/t_delta, dy=dy/t_delta, t_origin=t_start, do_before=False) 
    tracks = add_drift(tracks, *i, dx=-dx/t_delta, dy=-dy/t_delta, t_origin=t_end, do_before=False) 
    return tracks


# BIKER=[0,1,2,3,4]


# new_visibles[:]=1

# partial_drift(new_tracks, *BIKER, dx=-150, dy=0, t_start=21, t_end=39)

# new_tracks += torch.randn_like(new_tracks) * .125

new_tracks[:,:] = uncamera(new_tracks[:,:], tracks[:,CAM], 40)

# new_tracks[:,BG] = new_tracks[-1:,BG]

# #ZOOM OUT
# mx=600
# my=250
# deltas = new_tracks+0
# deltas[:,:,0]-=mx
# deltas[:,:,1]-=my
# deltas*=.65
# deltas[:,:,0]+=mx
# deltas[:,:,1]+=my
# new_tracks=deltas

# #End on visible
# for bg in BG:
#     for i in reversed(range(T)):
#         v=new_visibles[i,bg]
#         x,y=tracks[i,bg]
#         if not v and x<0 or y<0 or x>=W or y>=H:
#             #MAKE OUT OF BOUNDS VISIBLE
#             new_visibles[i,bg]=1

rp.display_video(gridded_video(draw_tracks(input_video, new_tracks, new_visibles)))
display_tracks_diff()

##############################

init_blob_videos()
init_sample()
init_mp4_files()
do_diffusion()
save_diffusion_results()
display_tracks_diff(output_video[:,:,:,:3])
rp.display_video(output_video,framerate=15)