In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt 
from tqdm import tqdm
import time 

torch.manual_seed(42)
np.random.seed(42)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")



Usinf device: cpu


In [4]:
print("Loading dataset...")
data = np.load('../data/synthetic_tracking_dataset.npz', allow_pickle=True)
videos = data['videos']
positions = data['positions']

print(f"✓ Loaded {len(videos)} videos")
print(f"✓ Video shape: {videos.shape}")
print(f"✓ Each video has {videos.shape[1]} frames")

Loading dataset...
✓ Loaded 1000 videos
✓ Video shape: (1000, 50, 32, 32)
✓ Each video has 50 frames


In [11]:
class TrackingDataset(Dataset):
    """
    Dataset for tracking task.
    Given two consecutive frames, predict the position in the second frame.
    """
    def __init__(self, videos, positions):
        self.videos = videos
        self.positions = positions

    def __len__(self):
        return len(self.videos) * (self.videos.shape[1] - 1)

    def __getitem__(self, idx):

        video_idx = idx // (self.videos.shape[1] - 1)
        frame_idx = idx % (self.videos.shape[1] - 1)

        frame1 = self.videos[video_idx, frame_idx]
        frame2 = self.videos[video_idx, frame_idx + 1]

        input_frames = np.stack([frame1, frame2], axis=0)

        target_pos = np.array(self.positions[video_idx][frame_idx + 1])

        input_frames = torch.FloatTensor(input_frames)
        target_pos = torch.FloatTensor(target_pos)

        return input_frames, target_pos

print("Creating dataset...")
full_dataset = TrackingDataset(videos, positions)

train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size

train_dataset, val_dataset = torch.utils.data.random_split(
    full_dataset, [train_size, val_size]
)

print(f"✓ Training samples: {len(train_dataset)}")
print(f"✓ Validation samples: {len(val_dataset)}")

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

test_input, test_target = next(iter(train_loader))
print(f"n✓ Batch input shape: {test_input.shape}")
print(f"✓ Batch target shape: {test_target.shape}")
        

    

Creating dataset...
✓ Training samples: 39200
✓ Validation samples: 9800
n✓ Batch input shape: torch.Size([32, 2, 32, 32])
✓ Batch target shape: torch.Size([32, 2])
