In [62]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch_geometric.data import Data, Batch
from tqdm import tqdm
import torch.nn.functional as F
import numpy as np
import torch
from torch.utils.data import random_split

from torch.utils.data import Dataset

In [63]:
if torch.backends.mps.is_available():
    device = torch.device('mps')
    print("Apple GPU")
elif torch.cuda.is_available():
    device = torch.device('cuda')
    print("CUDA GPU")
else:
    device = torch.device('cpu')

Apple GPU


In [64]:
def getData(path):
    train_file = np.load(path+"/train.npz")
    train_data = train_file['data']
    test_file = np.load(path+"/test_input.npz")
    test_data = test_file['data']
    print(f"Training Data's shape is {train_data.shape} and Test Data's is {test_data.shape}")
    return train_data, test_data
trainData, testData = getData("./data/")

Training Data's shape is (10000, 50, 110, 6) and Test Data's is (2100, 50, 50, 6)


In [76]:
class WindowedNormalizedDataset(Dataset):
    def __init__(self, data, scale=10.0):
        self.data = data
        self.scale = scale
        self.dt = 0.1  # Assuming 0.1s timesteps

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        scene = self.data[idx].copy()  # (50 agents, 110 timesteps, 6 features)
        presence = (scene[..., 0] != 0) | (scene[..., 1] != 0)  # (50, 110)

        origin = scene[0, 49].copy()
        tx, ty, _, _, theta, _ = origin

        cos_theta = np.cos(-theta)
        sin_theta = np.sin(-theta)

        # Create feature tensor with 14 features
        normalized_scene = np.zeros((50, 110, 14), dtype=np.float32)

        # --- Existing normalization (features 0-8) ---
        # Positions
        x = scene[..., 0] - tx
        y = scene[..., 1] - ty
        x_n = x * cos_theta - y * sin_theta
        y_n = x * sin_theta + y * cos_theta
        normalized_scene[..., 0] = x_n / self.scale
        normalized_scene[..., 1] = y_n / self.scale
        
        # Velocities
        vx = scene[..., 2]
        vy = scene[..., 3]
        vx_n = vx * cos_theta - vy * sin_theta
        vy_n = vx * sin_theta + vy * cos_theta
        normalized_scene[..., 2] = vx_n / self.scale
        normalized_scene[..., 3] = vy_n / self.scale
        
        # Heading
        heading = scene[..., 4]
        normalized_heading = heading - theta
        normalized_heading = (normalized_heading + np.pi) % (2 * np.pi) - np.pi
        normalized_scene[..., 4] = normalized_heading
        
        # Agent type and presence
        normalized_scene[..., 5] = scene[..., 5]  # agent_type
        normalized_scene[..., 6] = presence.astype(np.float32)  # presence
        
        # Speed
        speed = np.sqrt(vx ** 2 + vy ** 2)
        normalized_scene[..., 7] = speed / self.scale
        
        # Distance to ego
        ego_pos = scene[0, :, :2]  # (110, 2)
        dist_to_ego = np.linalg.norm(scene[..., :2] - ego_pos[None, :, :], axis=-1)
        normalized_scene[..., 8] = dist_to_ego / self.scale

        # === New Feature 1: Minimum Distance to Any Agent ===
        min_dists = np.full((50, 110), 1000.0)  # Initialize with large distance
        positions = scene[..., :2]  # Original positions (50, 110, 2)
        
        for t in range(110):
            # Only consider present agents
            present_agents = np.where(presence[:, t])[0]
            if len(present_agents) < 2:
                continue  # Skip if less than 2 agents present
                
            # Get present agents' positions
            pos_t = positions[present_agents, t, :]
            
            # Compute pairwise distances
            diff = pos_t[:, None, :] - pos_t[None, :, :]
            dist_matrix = np.linalg.norm(diff, axis=-1)
            
            # Ignore self-distance
            np.fill_diagonal(dist_matrix, np.inf)
            
            # Find minimum distances
            agent_min_dists = np.min(dist_matrix, axis=1)
            
            # Update only present agents
            min_dists[present_agents, t] = agent_min_dists
        
        normalized_scene[..., 9] = min_dists / self.scale  # Feature index 9

        # === New Feature 2: Acceleration Magnitude ===
        acceleration = np.zeros_like(speed)
        # Forward difference for acceleration
        acceleration[:, 1:] = (speed[:, 1:] - speed[:, :-1]) / self.dt
        # Handle first timestep
        acceleration[:, 0] = acceleration[:, 1]  
        normalized_scene[..., 10] = acceleration / self.scale  # Feature index 10

        # === New Feature 3: Time-to-Collision (TTC) with Ego ===
        # Relative velocity magnitude
        rel_vel = np.sqrt((vx - vx[0])**2 + (vy - vy[0])**2)
        # Avoid division by zero
        ttc = np.full((50, 110), 10.0)  # Default to max TTC (10s)
        valid_mask = (dist_to_ego > 0.1) & (rel_vel > 0.1)
        ttc[valid_mask] = dist_to_ego[valid_mask] / rel_vel[valid_mask]
        # Clip to meaningful range (0-10s)
        ttc = np.clip(ttc, 0, 10)  
        normalized_scene[..., 11] = ttc / 10.0  # Feature index 11 (scaled 0-1)

        # === Optional: Ego Agent Flag ===
        ego_mask = (np.arange(50) == 0).astype(np.float32)[:, None]
        normalized_scene[..., 12] = ego_mask  # Feature index 12

        # === Optional: Future Velocity Angle ===
        future_vel_angle = np.zeros((50, 110))
        future_vel_angle[:, :-1] = np.arctan2(vy_n[:, 1:], vx_n[:, 1:])
        normalized_scene[..., 13] = future_vel_angle  # Feature index 13

        # Mask out invalid timesteps
        missing_mask = np.expand_dims(~presence, -1)
        normalized_scene = np.where(missing_mask, 0, normalized_scene)

        # Inputs: first 50 timesteps
        X = normalized_scene[:, :50, :]  # (50 agents, 50 timesteps, 14 features)

        # Target: ego agent's future positions and presence
        ego_future = normalized_scene[0, 50:]
        Y = np.zeros((60, 3), dtype=np.float32)
        Y[:, :2] = ego_future[:, :2]  # Normalized positions
        Y[:, 2] = ego_future[:, 6]    # Presence

        return (
            torch.tensor(X, dtype=torch.float32),
            torch.tensor(Y, dtype=torch.float32),
            torch.tensor(origin, dtype=torch.float32)
        )

In [77]:
class WindowedNormalizedTestDataset(Dataset):
    def __init__(self, data, scale=10.0):
        self.data = data
        self.scale = scale
        self.dt = 0.1  # Assuming 0.1s timesteps

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        scene = self.data[idx].copy()  # (50 agents, 110 timesteps, 6 features)
        presence = (scene[..., 0] != 0) | (scene[..., 1] != 0)  # (50, 110)

        origin = scene[0, 49].copy()
        tx, ty, _, _, theta, _ = origin

        cos_theta = np.cos(-theta)
        sin_theta = np.sin(-theta)

        # Create feature tensor with 14 features
        normalized_scene = np.zeros((50, 50, 14), dtype=np.float32)

        # --- Existing normalization (features 0-8) ---
        # Positions
        x = scene[..., 0] - tx
        y = scene[..., 1] - ty
        x_n = x * cos_theta - y * sin_theta
        y_n = x * sin_theta + y * cos_theta
        normalized_scene[..., 0] = x_n / self.scale
        normalized_scene[..., 1] = y_n / self.scale
        
        # Velocities
        vx = scene[..., 2]
        vy = scene[..., 3]
        vx_n = vx * cos_theta - vy * sin_theta
        vy_n = vx * sin_theta + vy * cos_theta
        normalized_scene[..., 2] = vx_n / self.scale
        normalized_scene[..., 3] = vy_n / self.scale
        
        # Heading
        heading = scene[..., 4]
        normalized_heading = heading - theta
        normalized_heading = (normalized_heading + np.pi) % (2 * np.pi) - np.pi
        normalized_scene[..., 4] = normalized_heading
        
        # Agent type and presence
        normalized_scene[..., 5] = scene[..., 5]  # agent_type
        normalized_scene[..., 6] = presence.astype(np.float32)  # presence
        
        # Speed
        speed = np.sqrt(vx ** 2 + vy ** 2)
        normalized_scene[..., 7] = speed / self.scale
        
        # Distance to ego
        ego_pos = scene[0, :, :2]  # (110, 2)
        dist_to_ego = np.linalg.norm(scene[..., :2] - ego_pos[None, :, :], axis=-1)
        normalized_scene[..., 8] = dist_to_ego / self.scale

        # === New Feature 1: Minimum Distance to Any Agent ===
        min_dists = np.full((50, 50), 1000.0)  # Initialize with large distance
        positions = scene[..., :2]  # Original positions (50, 110, 2)
        
        for t in range(50):
            # Only consider present agents
            present_agents = np.where(presence[:, t])[0]
            if len(present_agents) < 2:
                continue  # Skip if less than 2 agents present
                
            # Get present agents' positions
            pos_t = positions[present_agents, t, :]
            
            # Compute pairwise distances
            diff = pos_t[:, None, :] - pos_t[None, :, :]
            dist_matrix = np.linalg.norm(diff, axis=-1)
            
            # Ignore self-distance
            np.fill_diagonal(dist_matrix, np.inf)
            
            # Find minimum distances
            agent_min_dists = np.min(dist_matrix, axis=1)
            
            # Update only present agents
            min_dists[present_agents, t] = agent_min_dists
        
        normalized_scene[..., 9] = min_dists / self.scale  # Feature index 9

        # === New Feature 2: Acceleration Magnitude ===
        acceleration = np.zeros_like(speed)
        # Forward difference for acceleration
        acceleration[:, 1:] = (speed[:, 1:] - speed[:, :-1]) / self.dt
        # Handle first timestep
        acceleration[:, 0] = acceleration[:, 1]  
        normalized_scene[..., 10] = acceleration / self.scale  # Feature index 10

        # === New Feature 3: Time-to-Collision (TTC) with Ego ===
        # Relative velocity magnitude
        rel_vel = np.sqrt((vx - vx[0])**2 + (vy - vy[0])**2)
        # Avoid division by zero
        ttc = np.full((50, 50), 10.0)  # Default to max TTC (10s)
        valid_mask = (dist_to_ego > 0.1) & (rel_vel > 0.1)
        ttc[valid_mask] = dist_to_ego[valid_mask] / rel_vel[valid_mask]
        # Clip to meaningful range (0-10s)
        ttc = np.clip(ttc, 0, 10)  
        normalized_scene[..., 11] = ttc / 10.0  # Feature index 11 (scaled 0-1)

        # === Optional: Ego Agent Flag ===
        ego_mask = (np.arange(50) == 0).astype(np.float32)[:, None]
        normalized_scene[..., 12] = ego_mask  # Feature index 12

        # === Optional: Future Velocity Angle ===
        future_vel_angle = np.zeros((50, 50))
        future_vel_angle[:, :-1] = np.arctan2(vy_n[:, 1:], vx_n[:, 1:])
        normalized_scene[..., 13] = future_vel_angle  # Feature index 13

        # Mask out invalid timesteps
        missing_mask = np.expand_dims(~presence, -1)
        normalized_scene = np.where(missing_mask, 0, normalized_scene)

        # Inputs: first 50 timesteps
        X = normalized_scene[:, :50, :]  # (50 agents, 50 timesteps, 14 features)

        # Target: ego agent's future positions and presence
        # ego_future = normalized_scene[0, 50:]
        # Y = np.zeros((60, 3), dtype=np.float32)
        # Y[:, :2] = ego_future[:, :2]  # Normalized positions
        # Y[:, 2] = ego_future[:, 6]    # Presence

        return (
            torch.tensor(X, dtype=torch.float32),
            # torch.tensor(Y, dtype=torch.float32),
            torch.tensor(origin, dtype=torch.float32)
        )

In [78]:
trainData[1, 0, 49, :], trainData[1, 0, 50, :]

(array([ 3.16906469e+03,  1.68248551e+03,  5.46145515e+00, -5.85380650e+00,
        -8.22467566e-01,  0.00000000e+00]),
 array([ 3.16959927e+03,  1.68191109e+03,  5.35655550e+00, -5.75120145e+00,
        -8.22600550e-01,  0.00000000e+00]))

In [79]:
data = WindowedNormalizedDataset(trainData)
X, Y, origin = data.__getitem__(1)
X[0, 49, :], Y[0, :], origin.shape

(tensor([ 0.0000,  0.0000,  0.8006,  0.0019,  0.0000,  0.0000,  1.0000,  0.8006,
          0.0000,  0.8294, -0.0571,  1.0000,  1.0000,  0.0016]),
 tensor([7.8468e-02, 9.1270e-05, 1.0000e+00]),
 torch.Size([6]))

In [80]:
# x, y = denormalize_ego(Y[0, :2], origin)
# x, y

In [81]:
def denormalize_ego_batch(predicted, origin, scale=10.0):
    """
    Convert batch of normalized (and scaled) ego predictions back to global coordinates.

    predicted: (B, ..., 2) tensor of normalized [x, y] positions
    origin: (B, 6) tensor of ego's reference state at t=49
    Returns:
        (B, ..., 2) tensor of global [x, y] positions
    """
    tx = origin[:, 0]  # (B,)
    ty = origin[:, 1]  # (B,)
    theta = origin[:, 4]  # (B,)

    cos_theta = torch.cos(theta)
    sin_theta = torch.sin(theta)

    # Expand for broadcasting
    while len(cos_theta.shape) < len(predicted.shape) - 1:
        cos_theta = cos_theta.unsqueeze(1)
        sin_theta = sin_theta.unsqueeze(1)
        tx = tx.unsqueeze(1)
        ty = ty.unsqueeze(1)

    # Unscale before denormalizing
    x = predicted[..., 0] * scale
    y = predicted[..., 1] * scale

    # Rotate
    x_rot = x * cos_theta - y * sin_theta
    y_rot = x * sin_theta + y * cos_theta

    # Translate
    x_global = x_rot + tx
    y_global = y_rot + ty

    return torch.stack([x_global, y_global], dim=-1)


In [82]:
import torch
import torch.nn as nn

class TrajectoryTransformer(nn.Module):
    def __init__(self, input_dim=700, model_dim=256, num_heads=8, num_layers=6, dropout=0.1, pred_len=60, num_agents=50):
        super().__init__()
        self.model_dim = model_dim
        self.pred_len = pred_len
        self.num_agents = num_agents
        
        # Process each agent's full trajectory (50*7 = 350) into a single token
        self.trajectory_encoder = nn.Sequential(
            nn.Linear(input_dim, model_dim),
            nn.LayerNorm(model_dim),
            nn.ReLU(),
            nn.Linear(model_dim, model_dim),
            nn.LayerNorm(model_dim),
            nn.ReLU(),
            nn.Linear(model_dim, model_dim),
            nn.LayerNorm(model_dim),
            nn.ReLU()
        )
        
        # 2-layer transformer encoder to process agent tokens
        self.transformer_encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=model_dim, 
                nhead=num_heads, 
                dropout=dropout, 
                batch_first=True
            ),
            num_layers=num_layers
        )
        
        # Final linear layer to predict ego vehicle trajectory
        self.output_fcpre = nn.Linear(model_dim, model_dim)  # 60*2 = 120
        self.output_fc = nn.Linear(model_dim, pred_len * 2)  # 60*2 = 120
    
    def forward(self, x):
        B, N, T, Ft = x.shape
        
        x = x.view(B, N, T * Ft)  # (B, 50, 350)
        
        # Encode each agent's trajectory into a token
        agent_tokens = self.trajectory_encoder(x)  # (B, 50, model_dim)
        
        # Process all agent tokens through transformer
        encoded_tokens = self.transformer_encoder(agent_tokens)  # (B, 50, model_dim)
        
        # Extract ego vehicle token (assuming agent 0 is ego)
        ego_token = encoded_tokens[:, 0, :]  # (B, model_dim)
        
        # Predict ego trajectory
        output = F.relu(self.output_fcpre(ego_token))  # (B, pred_len*2)

        output = self.output_fc(output)  # (B, pred_len*2)
        
        # Reshape to (B, pred_len, 2)
        output = output.view(B, self.pred_len, 2)  # (B, 60, 2)
        
        return output

# Test run
model = TrajectoryTransformer()
x = torch.randn(1, 50, 50, 14)  
out = model(x)
print(f"Input shape: {x.shape}")
print(f"Output shape: {out.shape}")  # Expected: (1, 60, 2)

# Print model summary
print(f"\nModel parameters: {sum(p.numel() for p in model.parameters()):,}")

Input shape: torch.Size([1, 50, 50, 14])
Output shape: torch.Size([1, 60, 2])

Model parameters: 8,299,640


In [83]:
model = TrajectoryTransformer().to(device=device)
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params}")

Total parameters: 8299640


In [84]:
np.random.seed(42)
num_samples = trainData.shape[0]
indices = np.random.permutation(num_samples)
split_index = int(0.9 * num_samples)
train_idx, val_idx = indices[:split_index], indices[split_index:]

# Split the data
train_data = trainData[train_idx]
val_data = trainData[val_idx]

print("Train shape:", train_data.shape)
print("Validation shape:", val_data.shape)

Train shape: (9000, 50, 110, 6)
Validation shape: (1000, 50, 110, 6)


In [85]:
trainTensor = WindowedNormalizedDataset(train_data)
testTensor = WindowedNormalizedDataset(val_data)
train_dataloader = DataLoader(trainTensor, batch_size=128, shuffle=True)
val_dataloader = DataLoader(testTensor, batch_size=128, shuffle=False)

In [87]:
 # train MSE 0.0022571232 | train val MSE 0.0125668047 | val MAE 2.0903749615 | val MSE 1.2566813445
torch.cuda.empty_cache()

best_model = torch.load("./models/final/best_model.pt",  map_location=torch.device('cpu'))
model.load_state_dict(best_model)

epochs = 1000
lossFn = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-5, weight_decay=1e-6)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.25)
best_val_loss = 0.0125668047 #float('inf')
best_train_loss = 0.0022571232 #float('inf')
position_scale = 1.0
velocity_scale = 1.0
all_losses = {
    'training_mse_loss':[],
    'validation_mse_loss':[],
    'true_mse':[],
    'true_mae':[]
}

for each_epoch in range(epochs):
    model.train()
    runningLoss = 0.0
    loop = tqdm(train_dataloader, desc=f"Epoch [{each_epoch+1}/{epochs}]")
    
    for batchX, batchY, origin in loop:
        batchX = batchX.to(device)
        batchY = batchY.to(device)
        origin = origin.to(device)

        
        pred = model(batchX)  # pred shape: (B, 60, 2)
        
        loss = lossFn(pred[..., :2], batchY[..., :2]).to(device)
        
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        runningLoss += loss.item()        
    
    model.eval()
    val_loss = 0
    val_mae = 0
    val_mse = 0
    
    with torch.no_grad():
        for batchX, batchY, origin in loop:
            batchX = batchX.to(device)
            batchY = batchY.to(device)
            origin = origin.to(device)

            
            pred = model(batchX)  # pred shape: (B, 60, 2)
            
            loss = lossFn(pred[..., :2], batchY[..., :2]).to(device)
            unnorm_pred = denormalize_ego_batch(pred, origin)
            unnorm_true = denormalize_ego_batch(batchY, origin)

            # print(pred[..., :2].shape, batchY[..., :2].shape, origin.shape, unnorm_pred.shape)
            
            # break
            
            # unnorm_pred = denormalize_ego(pred[..., :2], origin)
            # unnorm_true = denormalize_ego(batchY, origin)

            
            val_loss += loss.item()
            val_mae += nn.L1Loss()(unnorm_pred[..., :2], unnorm_true[..., :2]).item()
            val_mse += nn.MSELoss()(unnorm_pred[..., :2], unnorm_true[..., :2]).item()
    # break
    train_loss = runningLoss/len(train_dataloader)
    val_loss /= len(val_dataloader)
    val_mae /= len(val_dataloader)
    val_mse /= len(val_dataloader)
    
    all_losses["training_mse_loss"].append(train_loss)
    all_losses["validation_mse_loss"].append(val_loss)
    all_losses["true_mse"].append(val_mse)
    all_losses["true_mae"].append(val_mae)
    
    loop.write(f" train MSE {train_loss:.10f} | train val MSE {val_loss:.10f} | val MAE {val_mae:.10f} | val MSE {val_mse:.10f}")
    scheduler.step()
    
    if train_loss < best_train_loss and val_loss < best_val_loss :#- 1e-3
        best_val_loss = val_loss
        best_train_loss = train_loss
        no_improvement = 0
        torch.save(model.state_dict(), "./models/final/best_model.pt")
        loop.write(f" model Saved")
    torch.cuda.empty_cache()

#  train MSE 0.0019722332 | train val MSE 0.0106049001 | val MAE 1.9069033768 | val MSE 1.0604904490 - 7.8

 #  train MSE 0.0019108728 | train val MSE 0.0105765359 | val MAE 1.9288981240 | val MSE 1.0576545876 (Baseline)

Epoch [1/1000]: 100%|██████████| 71/71 [00:35<00:00,  2.01it/s]


 train MSE 0.0029425269 | train val MSE 0.0165168019 | val MAE 2.3176996000 | val MSE 1.6516812667


Epoch [2/1000]: 100%|██████████| 71/71 [00:35<00:00,  2.00it/s]


 train MSE 0.0025348027 | train val MSE 0.0142798762 | val MAE 2.1727116723 | val MSE 1.4279882563


Epoch [3/1000]: 100%|██████████| 71/71 [00:34<00:00,  2.04it/s]


 train MSE 0.0023348625 | train val MSE 0.0129534931 | val MAE 2.0769047271 | val MSE 1.2953509865


Epoch [4/1000]: 100%|██████████| 71/71 [00:35<00:00,  2.02it/s]


 train MSE 0.0022171857 | train val MSE 0.0137854397 | val MAE 2.1483566724 | val MSE 1.3785430230


Epoch [5/1000]: 100%|██████████| 71/71 [00:35<00:00,  2.01it/s]


 train MSE 0.0021941995 | train val MSE 0.0121889721 | val MAE 2.0432669837 | val MSE 1.2188989185
 model Saved


Epoch [6/1000]: 100%|██████████| 71/71 [00:35<00:00,  2.01it/s]


 train MSE 0.0020661576 | train val MSE 0.0112839255 | val MAE 1.9663441908 | val MSE 1.1283926629
 model Saved


Epoch [7/1000]: 100%|██████████| 71/71 [00:35<00:00,  2.01it/s]


 train MSE 0.0020865798 | train val MSE 0.0121758730 | val MAE 2.0375033021 | val MSE 1.2175879218


Epoch [8/1000]: 100%|██████████| 71/71 [00:35<00:00,  2.01it/s]


 train MSE 0.0020198265 | train val MSE 0.0117653533 | val MAE 2.0003164485 | val MSE 1.1765354788


Epoch [9/1000]: 100%|██████████| 71/71 [00:35<00:00,  2.01it/s]


 train MSE 0.0020361550 | train val MSE 0.0124446647 | val MAE 2.0664384011 | val MSE 1.2444661930


Epoch [10/1000]: 100%|██████████| 71/71 [00:35<00:00,  1.99it/s]


 train MSE 0.0019722332 | train val MSE 0.0106049001 | val MAE 1.9069033768 | val MSE 1.0604904490
 model Saved


Epoch [11/1000]: 100%|██████████| 71/71 [00:36<00:00,  1.95it/s]


 train MSE 0.0019806324 | train val MSE 0.0127116919 | val MAE 2.0958217010 | val MSE 1.2711694874


Epoch [12/1000]: 100%|██████████| 71/71 [00:35<00:00,  1.99it/s]


 train MSE 0.0019331989 | train val MSE 0.0140788117 | val MAE 2.2140480373 | val MSE 1.4078809433


Epoch [13/1000]: 100%|██████████| 71/71 [00:35<00:00,  2.02it/s]


 train MSE 0.0019743287 | train val MSE 0.0136030282 | val MAE 2.1517007165 | val MSE 1.3603035975


Epoch [14/1000]: 100%|██████████| 71/71 [00:35<00:00,  2.03it/s]


 train MSE 0.0019008753 | train val MSE 0.0106993354 | val MAE 1.9399659503 | val MSE 1.0699346634


Epoch [15/1000]: 100%|██████████| 71/71 [00:35<00:00,  2.02it/s]


 train MSE 0.0018884297 | train val MSE 0.0105072333 | val MAE 1.9089235291 | val MSE 1.0507247029
 model Saved


Epoch [16/1000]: 100%|██████████| 71/71 [00:35<00:00,  2.01it/s]


 train MSE 0.0018814761 | train val MSE 0.0105204639 | val MAE 1.9100987911 | val MSE 1.0520462627


Epoch [17/1000]: 100%|██████████| 71/71 [00:35<00:00,  1.97it/s]


 train MSE 0.0018773851 | train val MSE 0.0112747150 | val MAE 1.9550358206 | val MSE 1.1274727648


Epoch [18/1000]: 100%|██████████| 71/71 [00:35<00:00,  2.00it/s]


 train MSE 0.0018444205 | train val MSE 0.0119352292 | val MAE 2.0434247237 | val MSE 1.1935235569


Epoch [19/1000]: 100%|██████████| 71/71 [00:35<00:00,  1.98it/s]


 train MSE 0.0018555013 | train val MSE 0.0104798045 | val MAE 1.9142996836 | val MSE 1.0479805134
 model Saved


Epoch [20/1000]: 100%|██████████| 71/71 [00:36<00:00,  1.96it/s]


 train MSE 0.0018596046 | train val MSE 0.0119421695 | val MAE 2.0661371946 | val MSE 1.1942176698


Epoch [21/1000]: 100%|██████████| 71/71 [00:35<00:00,  2.03it/s]


 train MSE 0.0017850912 | train val MSE 0.0112950785 | val MAE 1.9876394998 | val MSE 1.1295085745


Epoch [22/1000]: 100%|██████████| 71/71 [00:34<00:00,  2.04it/s]


 train MSE 0.0017304899 | train val MSE 0.0109483944 | val MAE 1.9615234621 | val MSE 1.0948403897


Epoch [23/1000]: 100%|██████████| 71/71 [00:34<00:00,  2.05it/s]


 train MSE 0.0017339914 | train val MSE 0.0099548031 | val MAE 1.8753447738 | val MSE 0.9954814240
 model Saved


Epoch [24/1000]: 100%|██████████| 71/71 [00:34<00:00,  2.05it/s]


 train MSE 0.0017512040 | train val MSE 0.0105982430 | val MAE 1.9143188745 | val MSE 1.0598251913


Epoch [25/1000]: 100%|██████████| 71/71 [00:35<00:00,  2.02it/s]


 train MSE 0.0017266043 | train val MSE 0.0098207767 | val MAE 1.8569340166 | val MSE 0.9820784796
 model Saved


Epoch [26/1000]: 100%|██████████| 71/71 [00:35<00:00,  2.02it/s]


 train MSE 0.0017262176 | train val MSE 0.0101037744 | val MAE 1.8828231711 | val MSE 1.0103782518


Epoch [27/1000]: 100%|██████████| 71/71 [00:34<00:00,  2.03it/s]


 train MSE 0.0017297758 | train val MSE 0.0101239692 | val MAE 1.8755007982 | val MSE 1.0123970760


Epoch [28/1000]: 100%|██████████| 71/71 [00:34<00:00,  2.04it/s]


 train MSE 0.0016893622 | train val MSE 0.0110162325 | val MAE 1.9608830623 | val MSE 1.1016234178


Epoch [29/1000]: 100%|██████████| 71/71 [00:36<00:00,  1.97it/s]


 train MSE 0.0017240362 | train val MSE 0.0106920988 | val MAE 1.9413995743 | val MSE 1.0692106374


Epoch [30/1000]: 100%|██████████| 71/71 [00:35<00:00,  2.02it/s]


 train MSE 0.0017109432 | train val MSE 0.0101808386 | val MAE 1.8885744791 | val MSE 1.0180842094


Epoch [31/1000]: 100%|██████████| 71/71 [00:35<00:00,  2.01it/s]


 train MSE 0.0017154289 | train val MSE 0.0110509405 | val MAE 1.9703605622 | val MSE 1.1050948156


Epoch [32/1000]:   3%|▎         | 2/71 [00:01<00:42,  1.63it/s]


KeyboardInterrupt: 

In [45]:
test_dataset = WindowedNormalizedTestDataset(testData)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)


best_model = torch.load("./models/final/best_model.pt")
model = model = TrajectoryTransformer().to(device=device)
# model = model = TrajectoryTransformerPlus().to(device=device)


model.load_state_dict(best_model)
model.eval()

pred_list = []
with torch.no_grad():
    for batchX, origin in test_loader:
        batchX = batchX.to(device)
        batchY = batchY.to(device)
        origin = origin.to(device)

        
        pred = model(batchX)  # pred shape: (B, 60, 2)
        
        unnorm_pred = denormalize_ego_batch(pred[..., :2], origin)
        # print(unnorm_pred.shape)
        pred_list.append(unnorm_pred.cpu().numpy())
        # print(len(pred))
        

pred_list = np.concatenate(pred_list, axis=0)  
pred_output = pred_list.reshape(-1, 2)  # (N*60, 2)
output_df = pd.DataFrame(pred_output, columns=['x', 'y'])
output_df.index.name = 'index'
output_df.to_csv('./models/modelI/testTransFormer.csv', index=True)

In [None]:
 # train MSE 0.0139515618 | train val MSE 0.0823020875 | val MAE 4.7103952616 | val MSE 8.2302096859 - test 9.08
