In [None]:
'''
######################################
Code of the ETH dataset preprocessing
Data preprocessing steps:
                        1) Adjusting the File data to the desired format [#FrameID, PedID, PosX, PosY] 
                        2) Interpolation
                        3) Taking a frame each 10 frames (rather than each 6 frames)
 by Habiba AMROUNE, June 2021
######################################
'''

In [9]:
import math 
import numpy as np
from numpy import savetxt
from scipy import interpolate
from operator import itemgetter

In [10]:
'''
Reading the annotations from the obsmat.txt file given by the Biwi dataset, 
obsmat format: [frame_number pedestrian_ID pos_x pos_z pos_y v_x v_z v_y ]
and extracting only [frame_number pedestrian_ID pos_x pos_y] 
'''
data_to_interpolate = np.genfromtxt('eth_hotel.txt',usecols = (0, 1, 2, 4))

#displaying floats with 2 decimal places
float_formatter = "{:.2f}".format
np.set_printoptions(formatter={'float_kind':float_formatter})  

savetxt('Data_formated_eth_hotel.csv', data_to_interpolate, delimiter=',')

In [11]:
#############################################################################################################
########################################### Interpolation ###################################################
#############################################################################################################
    
interpolated_data = np.empty((0,4), dtype=float)
t=0
for i in range ( 1, int(np.max(data_to_interpolate[:,1])+1)):
    mask = data_to_interpolate[:, 1] == i
    traj_of_ped_i = data_to_interpolate[mask, :]   #list of frames where the pedestrian 'i' appeard 
    if (traj_of_ped_i.size == 0):
        print('-----------------------------------------------------')
        print('this PedID does''t exist in the data :',i)
        x=x+1
        t=t+1
    else:
        x = int(traj_of_ped_i[0,0])                    #the first appearance of the pedestrian 'i'
        y = int(traj_of_ped_i[-1,0])                   #the last appearance of the pedestrian 'i'
        
        if ((x%10) != 0):                              #testing on the first frame
            if ((x%10)<5):
                x = int(traj_of_ped_i[0,0]) - (x%10)
            else:
                x = (int(traj_of_ped_i[0,0]) - (x%10))+10
                
        if ((y%10) != 0):                              #testing on the last frame
            if ((y%10)<5):
                y = int(traj_of_ped_i[-1,0]) - (y%10)
            else:
                y = (int(traj_of_ped_i[-1,0]) - (y%10))+10

        while x < y:                                  #until last appearance of the pedestrian 'i'
            for j in range(0, traj_of_ped_i.shape[0]): 
                z = np.where(traj_of_ped_i[:,0]== x)
                if np.squeeze(traj_of_ped_i[z,0]) == x:   # if the frame exist copy the values of the positions
                    exist_frame = traj_of_ped_i[z,:]
                    interpolated_data = np.append(interpolated_data, exist_frame[0,:,:], axis=0)
                    x=x+1
                else:                                # else: interpolate using scipy.interpolate.interp1d
                    f = interpolate.interp1d(traj_of_ped_i[:,0],[traj_of_ped_i[:,2],traj_of_ped_i[:,3]],fill_value="extrapolate",bounds_error=False)
                    inter = f(x)
                    interpolated_data = np.append(interpolated_data, np.array([[int(x), int(i), float(inter[0]), float(inter[1])]]), axis=0)
                    x=x+1
                if x == y+1:
                    break
    
    percentage = i/(int(np.max(data_to_interpolate[:,1])+1))*100
    percentage = "{:.2f}".format(percentage)
    print('interpolation percentage ', percentage,'%')
    print('-----------------------------------------------------')
        
print('Number of missing pedestrians is: ',t)        
print('-----------------------------------------------------')
print('File infos:')
print('\t Number of trajectories = ',np.max(data_to_interpolate[:,1])-t)
print('\t Number of observations = ',np.max(data_to_interpolate[:,0]))
print('-----------------------------------------------------')


interpolated_data = interpolated_data[np.argsort(interpolated_data[:, 0])]
savetxt('interpolated_data_eth_hotel.csv', interpolated_data, delimiter=',')

  if np.squeeze(traj_of_ped_i[z,0]) == x:   # if the frame exist copy the values of the positions


interpolation percentage  0.24 %
-----------------------------------------------------
interpolation percentage  0.48 %
-----------------------------------------------------
interpolation percentage  0.71 %
-----------------------------------------------------
interpolation percentage  0.95 %
-----------------------------------------------------
interpolation percentage  1.19 %
-----------------------------------------------------
interpolation percentage  1.43 %
-----------------------------------------------------
interpolation percentage  1.66 %
-----------------------------------------------------
interpolation percentage  1.90 %
-----------------------------------------------------
interpolation percentage  2.14 %
-----------------------------------------------------
interpolation percentage  2.38 %
-----------------------------------------------------
interpolation percentage  2.61 %
-----------------------------------------------------
interpolation percentage  2.85 %
----------

In [14]:
#############################################################################################################
################################### Taking a frame each 10 frames ###########################################
#############################################################################################################
def frames_10(inter_data):
    frames = inter_data[:,0]
    frames = sorted(frames)
    frames = np.unique(frames)
    final_data = []
    for i in range(0,len(frames)):
        id = frames[i]
        if (((id%10)==0) or(id==0)):
            frm = inter_data[inter_data[:,0] == id,:]
            final_data.extend(frm) 
    final_data = np.unique(final_data, axis=0)
    return final_data

Final_data = frames_10(interpolated_data)
savetxt('Final_data_eth_hotel.csv', Final_data, delimiter=',')