# Extract Trajectories (No normalization)

In [None]:
import pandas as pd

def format_data(file_path):
    # Import and parse data
    pedestrian_data = pd.read_csv(file_path, sep='\t', header=None)
    pedestrian_data.columns = ["Frame Number", "Pedestrian ID", "X-Coordinate", "Y-Coordinate"]

    # Group the data by 'Pedestrian ID' and extract trajectories as lists of (x, y) tuples
    trajectories = pedestrian_data.groupby("Pedestrian ID").apply(lambda group: (group.name, list(zip(group["X-Coordinate"], group["Y-Coordinate"]))))
    trajectories = trajectories.tolist()

    # Return list of trajectories
    # Each entry is (Pedestrian ID, Trajectory)
    return trajectories

for trajectory in format_data(r'src\datasets\eth\test\biwi_eth.txt'):
    print(trajectory)

# Extract Traectories (Z-scale normalization)

In [3]:
import pandas as pd

def format_data(file_path):
    # Import and parse data
    pedestrian_data = pd.read_csv(file_path, sep='\t', header=None)
    pedestrian_data.columns = ["Frame Number", "Pedestrian ID", "X-Coordinate", "Y-Coordinate"]

    # Calculate mean and standard deviation for normalization
    x_mean = pedestrian_data["X-Coordinate"].mean()
    x_std = pedestrian_data["X-Coordinate"].std()
    y_mean = pedestrian_data["Y-Coordinate"].mean()
    y_std = pedestrian_data["Y-Coordinate"].std()

    # Apply z-scale normalization
    pedestrian_data["X-Coordinate"] = (pedestrian_data["X-Coordinate"] - x_mean) / x_std
    pedestrian_data["Y-Coordinate"] = (pedestrian_data["Y-Coordinate"] - y_mean) / y_std

    # Group the data by 'Pedestrian ID' and extract normalized trajectories
    trajectories = pedestrian_data.groupby("Pedestrian ID").apply(lambda group: (group.name, list(zip(group["X-Coordinate"], group["Y-Coordinate"]))))
    trajectories = trajectories.tolist()

    # Return both the normalized trajectories and the values needed for inverse transformation
    return trajectories, x_mean, x_std, y_mean, y_std

def denormalize_trajectory(normalized_trajectory, x_mean, x_std, y_mean, y_std):
    # Convert each normalized (x, y) back to original coordinates
    denormalized_trajectory = [(x * x_std + x_mean, y * y_std + y_mean) for x, y in normalized_trajectory]
    return denormalized_trajectory

# Example usage
normalized_trajectories, x_mean, x_std, y_mean, y_std = format_data(r'src\datasets\eth\test\biwi_eth.txt')

# Denormalize each trajectory
for pedestrian_id, normalized_trajectory in normalized_trajectories:
    original_trajectory = denormalize_trajectory(normalized_trajectory, x_mean, x_std, y_mean, y_std)
    original_trajectory = normalized_trajectory
    print(f"Pedestrian ID: {pedestrian_id}, Original Trajectory: {original_trajectory}")


Pedestrian ID: 1.0, Original Trajectory: [(0.6401034649450357, -0.9088463800418342), (0.8623848524434465, -0.8034506365282145), (1.0826637049193493, -0.6980548930145949), (1.2949324173052192, -0.5241519162171225), (1.5112061997361057, -0.3713280881223741)]
Pedestrian ID: 2.0, Original Trajectory: [(1.6774166066042866, 0.25577658578366214), (1.3670236781155147, 0.22942764990525732), (1.2228411564949235, 0.25577658578366214), (1.010572444109054, 0.3453629677702387), (0.8623848524434465, 0.4876472215136254), (0.6941719105527572, 0.540345093270435), (0.5359716437746088, 0.6246616880813307), (0.3817764470414768, 0.6878991341895025), (0.24159899546590224, 0.7195178572435882), (0.11944435909290171, 0.7932948777031222), (-0.004712812302607148, 0.8776114725140179), (-0.07880660813541084, 0.9724676416762754), (-0.15089786894570636, 1.1937987030548765), (-0.21297645464346068, 1.046244662135809), (-0.2630398302061659, 1.2623059363387292), (-0.3591615112865598, 1.341352743973944), (-0.4893262877495

# Relative Coordinates

In [1]:
import os
import math
import torch
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data.sampler import Sampler
from torch.utils.data.dataloader import DataLoader

def pecnet_traj_collate_fn(data):
    obs_seq_list, pred_seq_list, non_linear_ped_list, loss_mask_list, _, _ = zip(*data)

    _len = [len(seq) for seq in obs_seq_list]
    cum_start_idx = [0] + np.cumsum(_len).tolist()
    seq_start_end = [[start, end] for start, end in zip(cum_start_idx, cum_start_idx[1:])]
    seq_start_end = torch.LongTensor(seq_start_end)
    scene_mask = torch.zeros(sum(_len), sum(_len), dtype=torch.bool)
    for idx, (start, end) in enumerate(seq_start_end):
        scene_mask[start:end, start:end] = 1

    out = [torch.cat(obs_seq_list, dim=0), torch.cat(pred_seq_list, dim=0),
           torch.cat(non_linear_ped_list, dim=0), torch.cat(loss_mask_list, dim=0), scene_mask, seq_start_end]
    return tuple(out)

def poly_fit(traj, traj_len, threshold):
    """
    Input:
    - traj: Numpy array of shape (2, traj_len)
    - traj_len: Len of trajectory
    - threshold: Minimum error to be considered for non linear traj
    Output:
    - int: 1 -> Non Linear 0-> Linear
    """
    t = np.linspace(0, traj_len - 1, traj_len)
    res_x = np.polyfit(t, traj[0, -traj_len:], 2, full=True)[1]
    res_y = np.polyfit(t, traj[1, -traj_len:], 2, full=True)[1]
    if res_x + res_y >= threshold:
        return 1.0
    else:
        return 0.0

def read_file(_path, delim='\t'):
    data = []
    if delim == 'tab':
        delim = '\t'
    elif delim == 'space':
        delim = ' '
    with open(_path, 'r') as f:
        for line in f:
            line = line.strip().split(delim)
            line = [float(i) for i in line]
            data.append(line)
    return np.asarray(data)

class PECNETTrajectoryDataset(Dataset):
    """Dataloder for the Trajectory datasets"""
    def __init__(self, data_dir, obs_len=8, pred_len=12, skip=1, threshold=0.002, min_ped=1, delim='\t'):
        """
        Args:
        - data_dir: Directory containing dataset files in the format
        <frame_id> <ped_id> <x> <y>
        - obs_len: Number of time-steps in input trajectories
        - pred_len: Number of time-steps in output trajectories
        - skip: Number of frames to skip while making the dataset
        - threshold: Minimum error to be considered for non linear traj
        when using a linear predictor
        - min_ped: Minimum number of pedestrians that should be in a seqeunce
        - delim: Delimiter in the dataset files
        """
        super(PECNETTrajectoryDataset, self).__init__()

        self.data_dir = data_dir
        self.obs_len = obs_len
        self.pred_len = pred_len
        self.skip = skip
        self.seq_len = self.obs_len + self.pred_len
        self.delim = delim

        all_files = os.listdir(self.data_dir)
        all_files = [os.path.join(self.data_dir, _path) for _path in all_files]
        num_peds_in_seq = []
        seq_list = []
        seq_list_rel = []
        loss_mask_list = []
        non_linear_ped = []
        for path in all_files:
            data = read_file(path, delim)
            frames = np.unique(data[:, 0]).tolist()
            frame_data = []
            for frame in frames:
                frame_data.append(data[frame == data[:, 0], :])
            num_sequences = int(math.ceil((len(frames) - self.seq_len + 1) / skip))

            for idx in range(0, num_sequences * self.skip + 1, skip):
                curr_seq_data = np.concatenate(frame_data[idx:idx + self.seq_len], axis=0)
                peds_in_curr_seq = np.unique(curr_seq_data[:, 1])
                curr_seq_rel = np.zeros((len(peds_in_curr_seq), 2, self.seq_len))
                curr_seq = np.zeros((len(peds_in_curr_seq), 2, self.seq_len))
                curr_loss_mask = np.zeros((len(peds_in_curr_seq), self.seq_len))
                num_peds_considered = 0
                _non_linear_ped = []
                for _, ped_id in enumerate(peds_in_curr_seq):
                    curr_ped_seq = curr_seq_data[curr_seq_data[:, 1] == ped_id, :]
                    curr_ped_seq = np.around(curr_ped_seq, decimals=4)
                    pad_front = frames.index(curr_ped_seq[0, 0]) - idx
                    pad_end = frames.index(curr_ped_seq[-1, 0]) - idx + 1
                    if pad_end - pad_front != self.seq_len:
                        continue
                    curr_ped_seq = np.transpose(curr_ped_seq[:, 2:])
                    curr_ped_seq = curr_ped_seq
                    # Make coordinates relative
                    rel_curr_ped_seq = np.zeros(curr_ped_seq.shape)
                    rel_curr_ped_seq[:, 1:] = curr_ped_seq[:, 1:] - curr_ped_seq[:, :-1]
                    _idx = num_peds_considered
                    curr_seq[_idx, :, pad_front:pad_end] = curr_ped_seq
                    curr_seq_rel[_idx, :, pad_front:pad_end] = rel_curr_ped_seq
                    # Linear vs Non-Linear Trajectory
                    _non_linear_ped.append(poly_fit(curr_ped_seq, pred_len, threshold))
                    curr_loss_mask[_idx, pad_front:pad_end] = 1
                    num_peds_considered += 1

                if num_peds_considered > min_ped:
                    non_linear_ped += _non_linear_ped
                    num_peds_in_seq.append(num_peds_considered)
                    loss_mask_list.append(curr_loss_mask[:num_peds_considered])
                    seq_list.append(curr_seq[:num_peds_considered])
                    seq_list_rel.append(curr_seq_rel[:num_peds_considered])

        self.num_seq = len(seq_list)
        seq_list = np.concatenate(seq_list, axis=0)
        seq_list_rel = np.concatenate(seq_list_rel, axis=0)
        loss_mask_list = np.concatenate(loss_mask_list, axis=0)
        non_linear_ped = np.asarray(non_linear_ped)
        self.num_peds_in_seq = np.array(num_peds_in_seq)

        # Convert numpy -> Torch Tensor
        self.obs_traj = torch.from_numpy(seq_list[:, :, :self.obs_len]).type(torch.float)
        self.pred_traj = torch.from_numpy(seq_list[:, :, self.obs_len:]).type(torch.float)
        self.obs_traj_rel = torch.from_numpy(seq_list_rel[:, :, :self.obs_len]).type(torch.float)
        self.pred_traj_rel = torch.from_numpy(seq_list_rel[:, :, self.obs_len:]).type(torch.float)
        self.loss_mask = torch.from_numpy(loss_mask_list).type(torch.float)
        self.non_linear_ped = torch.from_numpy(non_linear_ped).type(torch.float)
        cum_start_idx = [0] + np.cumsum(num_peds_in_seq).tolist()
        self.seq_start_end = [(start, end) for start, end in zip(cum_start_idx, cum_start_idx[1:])]

    def __len__(self):
        return self.num_seq

    def __getitem__(self, index):
        start, end = self.seq_start_end[index]
        out = [self.obs_traj[start:end, :], self.pred_traj[start:end, :],
               self.non_linear_ped[start:end], self.loss_mask[start:end, :], None, [[0, end - start]]]
        return out



def load_dataset(model_name,dataset_path,batch_size,obs_len=8,pred_len=12,skip=1,
                 delim='\t',loader_num_workers=20,):    
    if model_name == 'pecnet':
        dset_train = PECNETTrajectoryDataset(dataset_path, obs_len=obs_len, pred_len=pred_len)
        loader_phase = DataLoader(dset_train, batch_size,collate_fn=pecnet_traj_collate_fn, shuffle=False)
        return loader_phase
    else:
        raise NotImplementedError


dataset = load_dataset('pecnet', r'src\datasets\eth\test', batch_size=64)


: 