In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Download Data

In [None]:
%cd /content/drive/MyDrive/IDL/kitti/
!mkdir tracking
%cd tracking

!wget "https://s3.eu-central-1.amazonaws.com/avg-kitti/data_tracking_image_2.zip"
!wget "https://s3.eu-central-1.amazonaws.com/avg-kitti/data_tracking_velodyne.zip"
!wget "https://s3.eu-central-1.amazonaws.com/avg-kitti/data_tracking_label_2.zip"

/content/drive/MyDrive/IDL/kitti
/content/drive/MyDrive/IDL/kitti/tracking
--2023-04-22 14:12:48--  https://s3.eu-central-1.amazonaws.com/avg-kitti/data_tracking_image_2.zip
Resolving s3.eu-central-1.amazonaws.com (s3.eu-central-1.amazonaws.com)... 52.219.208.13
Connecting to s3.eu-central-1.amazonaws.com (s3.eu-central-1.amazonaws.com)|52.219.208.13|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 15813146295 (15G) [application/zip]
Saving to: ‘data_tracking_image_2.zip’


2023-04-22 14:24:31 (21.5 MB/s) - ‘data_tracking_image_2.zip’ saved [15813146295/15813146295]

--2023-04-22 14:24:32--  https://s3.eu-central-1.amazonaws.com/avg-kitti/data_tracking_velodyne.zip
Resolving s3.eu-central-1.amazonaws.com (s3.eu-central-1.amazonaws.com)... 52.219.208.1
Connecting to s3.eu-central-1.amazonaws.com (s3.eu-central-1.amazonaws.com)|52.219.208.1|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 36829852418 (34G) [application/zip]
Saving to: ‘da

In [None]:
%cd /content/drive/MyDrive/IDL/kitti/tracking/

# !unzip data_tracking_label_2.zip
# !unzip -q data_tracking_image_2.zip
# !unzip -q data_tracking_velodyne.zip

## KITTI Preprocess

In [None]:
import pickle
import json
import os
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

In [None]:
%cd /content/drive/MyDrive/IDL/kitti/tracking/training/label_02

if not os.path.exists('sequence_identified_data.csv'):
    print("Creating Dataset")
    
    # Read all files into a single dataframe
    df = pd.DataFrame()
    for file in tqdm(sorted(os.listdir('.'))):
        with open(file) as f: data = [[file]+line.strip().split(' ') for line in f.readlines()]
        df = pd.concat([df, pd.DataFrame(data, columns='file frame track_id type truncated occluded alpha bbox_left bbox_top bbox_right bbox_bottom dim_height dim_width dim_length loc_x loc_y loc_z rotation_y'.split())])
    df.reset_index(drop=True, inplace=True)
    for col in 'frame track_id'.split():
        df[col] = df[col].astype(int)
    df = df[df['type']=='Car']

    # Tag sequences with same sequence number
    seq_count = -1
    df['seq'] = [-1 for _ in range(len(df))]
    for file in tqdm(sorted(df['file'].unique())):
        for track_id in sorted(df[df['file']==file]['track_id']):
            single_car_df = df[(df['file']==file) & (df['track_id']==track_id)]
            prev_index = None
            seq_count += 1
            for index, row in single_car_df.iterrows():
                if prev_index is None:
                    prev_index = index
                    df.loc[index, 'seq'] = seq_count
                else:
                    if df.loc[index, 'frame']-1 != df.loc[prev_index, 'frame']: seq_count += 1
                    df.loc[index, 'seq'] = seq_count
                    prev_index = index

    # Replace sequence numbers with unique increasing numbers
    replace_dict = {}
    for seq in sorted(df['seq'].unique()):
        replace_dict[seq] = len(replace_dict)
    df['seq'].replace(replace_dict, inplace=True)

    # Save processed dataset
    df.to_csv('sequence_identified_data.csv', index=False)
    display(df['file frame track_id seq'.split()].sort_values('file frame seq'.split()))
else: 
    print("File already exists")
    df = pd.read_csv('sequence_identified_data.csv')
    display(df)

/content/drive/MyDrive/IDL/kitti/tracking/training/label_02
File already exists


Unnamed: 0,file,frame,track_id,type,truncated,occluded,alpha,bbox_left,bbox_top,bbox_right,bbox_bottom,dim_height,dim_width,dim_length,loc_x,loc_y,loc_z,rotation_y,seq
0,0000.txt,109,5,Car,0,0,-1.214970,873.920950,187.130316,982.119251,244.218665,1.507812,1.687051,4.041130,9.645580,1.969339,21.814435,-0.804429,1
1,0000.txt,110,5,Car,0,0,-1.224466,863.383830,186.542931,971.945745,244.780494,1.507812,1.687051,4.041130,9.162753,1.943332,21.409208,-0.825836,1
2,0000.txt,111,5,Car,0,0,-1.233380,852.403904,185.936597,961.413222,245.363981,1.507812,1.687051,4.041130,8.679926,1.917325,21.003980,-0.847243,1
3,0000.txt,112,5,Car,0,0,-1.241673,840.951001,185.310324,950.503592,245.970495,1.507812,1.687051,4.041130,8.197099,1.891318,20.598752,-0.868650,1
4,0000.txt,113,4,Car,0,1,0.667215,0.000000,175.332718,251.622402,278.824334,1.523438,1.697113,4.518535,-8.507631,1.593847,11.655923,0.047361,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27295,0020.txt,834,123,Car,0,2,-1.594327,565.630998,181.832172,656.129408,259.693635,1.413044,1.659561,3.762694,-0.032819,1.353894,15.148199,-1.596901,573
27296,0020.txt,835,122,Car,0,0,-1.594804,548.118604,192.727745,735.384677,366.247299,1.388734,1.564079,3.448134,0.247743,1.507700,7.772067,-1.564615,572
27297,0020.txt,835,123,Car,0,2,-1.594290,565.603696,181.760988,656.100683,259.600719,1.413044,1.659561,3.762694,-0.033325,1.352205,15.148427,-1.596896,573
27298,0020.txt,836,122,Car,0,0,-1.594785,548.263634,192.700731,735.200760,365.899948,1.388734,1.564079,3.448134,0.248135,1.507455,7.782714,-1.564588,572


In [None]:
axis = 0
window_length = 10
drop_cols = 'file frame track_id type seq truncated occluded'.split()

for df, split_name in zip([train_df, val_df], ['train', 'val']):
    os.system(f"rm -rf {split_name}_data_winlen_{window_length}.npy")
    if not os.path.exists(f'{split_name}_data_winlen_{window_length}.npy'):
        print("Creating numpy")
        train = []
        frame_train = []
        for seq in sorted(df['seq'].unique()):
            temp_df = df[df['seq']==seq].sort_values('frame')
            frames = temp_df['frame'].to_list()
            if frames[0] + len(frames) - 1 != frames[-1]: print(frames)
            train.append( temp_df.drop(drop_cols, axis=1).to_numpy() )
            frame_train.append( temp_df[drop_cols].to_numpy() )
        data = []
        frame_data = []
        for arr, frame_arr in zip(train, frame_train): 
            data += [i for i in np.array_split(arr, range(window_length, arr.shape[axis], window_length), axis=axis) if i.shape[0]==window_length]
            frame_data += [i for i in np.array_split(frame_arr, range(window_length, frame_arr.shape[axis], window_length), axis=axis) if i.shape[0]==window_length]
        data = np.stack(data, axis=0)
        frame_data = np.stack(frame_data, axis=0)

        np.save(f'{split_name}_data_winlen_{window_length}.npy', data)
        np.save(f'{split_name}_frame_data_winlen_{window_length}.npy', frame_data)

    else: 
        print("Loading numpy")
        data = np.load(f'{split_name}_data_winlen_{window_length}.npy')
        frame_data = np.load(f'{split_name}_frame_data_winlen_{window_length}.npy')

    print(data.shape, frame_data.shape)

Creating numpy
(1799, 10, 12) (1799, 10, 7)
Creating numpy
(675, 10, 12) (675, 10, 7)
