In [1]:
'''
######################################
Code of the UCY dataset preprocessing
Data preprocessing steps:
                        1) Adjusting the File data to the desired format [#FrameID, PedID, PosX, PosY] 
                        2) converting the positions from image coordinnates to  world coordinates
                        3) Interpolation
                        4) Taking a frame each 10 frames (25fps ==> 2.5fps)
 by Habiba AMROUNE, June 2021
######################################
'''

'\n######################################\nCode of the UCY dataset preprocessing\nData preprocessing steps:\n                        1) Adjusting the File data to the desired format [#FrameID, PedID, PosX, PosY] \n                        2) converting the positions from image coordinnates to  world coordinates\n                        3) Interpolation\n                        4) Taking a frame each 10 frames (25fps ==> 2.5fps)\n by Habiba AMROUNE, 2021\n######################################\n'

In [70]:
import math 
import numpy as np
from numpy import savetxt
from scipy import interpolate
from operator import itemgetter

In [71]:
#############################################################################################################
############## Getting the desired format [#FrameID, PedID, PosX, PosY] From Pixels to Meters ###############
#############################################################################################################

#Reading the vsp file from the UCY dataset
pixels = open("crowds_zara02.vsp","r") 

#Calculating the number of trajectories
spline = pixels.readline()
spline = spline.split("-", 1)
num_splines = int(spline[0])

#Calculating the number of observations
height = open("crowds_zara02.vsp","r")
height =(len(height.readlines( ))-num_splines)

print('File infos:')
print('\t Number of trajectories = ',num_splines)
print('\t Number of observations = ',height)
print('-----------------------------------------------------')

# Adjusting the File data to the desired format [#FrameID, PedID, PosX, PosY]
Formated_data = np.zeros((height, 4))         #(Number of lines,[frame,ped,y,x])
t_sp = 0
for sp in range(0, num_splines):
    line_sp = pixels.readline()
    line_sp = line_sp.split("-", 1)
    points = int(line_sp[0])
    for pt in range(0, points):
        # we only take PosX, PosY, FrameID
        line = pixels.readline()
        line = line.split(" ", 3)
        x = float(line[0])
        y = float(line[1])
        frame = float(line[2])
        
        #FrameID, PedID, PosX, PosY         
        Formated_data[t_sp,0] = frame
        Formated_data[t_sp,1] = sp + 1
        Formated_data[t_sp,3] = 288 - y
        Formated_data[t_sp,2] = x + 360
    
        t_sp = t_sp + 1

#Sorting the data by FrameID
Formated_data = sorted(Formated_data, key=itemgetter(0))
savetxt('Data_formated_crowds_zara02.csv', Formated_data, delimiter=',')

File infos:
	 Number of trajectories =  204
	 Number of observations =  2376
-----------------------------------------------------


In [72]:
#############################################################################################################
######################################  From Pixels to Meters ###############################################
#############################################################################################################

px = np.ones((len(Formated_data), 3))  
meters = np.zeros((len(Formated_data), 4))
H_mat = np.zeros((3, 3))

#Values of the homography matrix
value =  np.array([[0.02104651, 0, 0], [0, -0.02386598, 13.74680446], [0, 0, 1]]) ## np.array([[0.02104651, 0, 0], [0, 0.02386598 , 0], [0, 0, 1]])

print('Geting the homography matrix values')
for i in range(0, 3):
    H_mat[i,0] = value[i,0]
    H_mat[i,1] = value[i,1]
    H_mat[i,2] = value[i,2]    
print('Done!\n-----------------------------------------------------')

print('Getting PosX, PosY')
#Getting PosX, PosY    
for i in range(0, len(Formated_data)):
    px[i,0] = Formated_data[i][2]
    px[i,1] = Formated_data[i][3]
print('Done!\n-----------------------------------------------------')

print('Conversion of positions from pixels to meters')
for i in range(0,len(Formated_data)):
    meters[i,2] = H_mat[0][0] * px[i][0] + H_mat[0][1] * px[i][1] + H_mat[0][2]
    meters[i,3] = H_mat[1][0] * px[i][0] + H_mat[1][1] * px[i][1] + H_mat[1][2]
    meters[i,1] = Formated_data[i][1]
    meters[i,0] = Formated_data[i][0]
print('Done!\n-----------------------------------------------------')

savetxt('Data_in_meters_crowds_zara02.csv', meters, delimiter=',')

Geting the homography matrix values
Done!
-----------------------------------------------------
Getting PosX, PosY
Done!
-----------------------------------------------------
Conversion of positions from pixels to meters
Done!
-----------------------------------------------------


In [73]:
#############################################################################################################
########################################### Interpolation ###################################################
#############################################################################################################

data_to_interpolate = np.zeros((len(meters),4),dtype=float)
for i in range(0, len(meters)):
    data_to_interpolate[i,0] = meters[i,0]
    data_to_interpolate[i,1] = meters[i,1]
    data_to_interpolate[i,2] = meters[i,2]
    data_to_interpolate[i,3] = meters[i,3]
    
interpolated_data = np.empty((0,4), dtype=float)
t=0
for i in range ( 1, int(np.max(data_to_interpolate[:,1])+1)):
    mask = data_to_interpolate[:, 1] == i
    traj_of_ped_i = data_to_interpolate[mask, :]   #list of frames where the pedestrian 'i' appeard 
    if (traj_of_ped_i.size == 0):
        print('-----------------------------------------------------')
        print('this PedID does''t exist in the data',i)
        x=x+1
        t=t+1
    else:
        x = int(traj_of_ped_i[0,0])                    #the first appearance of the pedestrian 'i'
        y = int(traj_of_ped_i[-1,0])                   #the last appearance of the pedestrian 'i'
        
        if ((x%10) != 0):                              #testing on the first frame
            if ((x%10)<5):
                x = int(traj_of_ped_i[0,0]) - (x%10)
            else:
                x = (int(traj_of_ped_i[0,0]) - (x%10))+10
                
        if ((y%10) != 0):                              #testing on the last frame
            if ((y%10)<5):
                y = int(traj_of_ped_i[-1,0]) - (y%10)
            else:
                y = (int(traj_of_ped_i[-1,0]) - (y%10))+10

        while x < y:                                  #until last appearance of the pedestrian 'i'
            for j in range(0, traj_of_ped_i.shape[0]): 
                z = np.where(traj_of_ped_i[:,0]== x)
                if np.squeeze(traj_of_ped_i[z,0]) == x:   # if the frame exist copy the values of the positions
                    exist_frame = traj_of_ped_i[z,:]
                    interpolated_data = np.append(interpolated_data, exist_frame[0,:,:], axis=0)
                    x=x+1
                else:                                # else: interpolate using scipy.interpolate.interp1d
                    f = interpolate.interp1d(traj_of_ped_i[:,0],[traj_of_ped_i[:,2],traj_of_ped_i[:,3]],fill_value="extrapolate",bounds_error=False)
                    inter = f(x)
                    interpolated_data = np.append(interpolated_data, np.array([[int(x), int(i), float(inter[0]), float(inter[1])]]), axis=0)
                    x=x+1
                if x == y+1:
                    break
    
    percentage = i/(int(np.max(data_to_interpolate[:,1])+1))*100
    percentage = "{:.2f}".format(percentage)
    print('interpolation percentage ', percentage,'%')
    print('-----------------------------------------------------')
        
print('Number of missing pedestrians is: ',t)        
print('-----------------------------------------------------')


interpolated_data = interpolated_data[np.argsort(interpolated_data[:, 0])]
savetxt('interpolated_data_crowds_zara02.csv', interpolated_data, delimiter=',')

  if np.squeeze(traj_of_ped_i[z,0]) == x:   # if the frame exist copy the values of the positions


interpolation percentage  0.49 %
-----------------------------------------------------
interpolation percentage  0.98 %
-----------------------------------------------------
interpolation percentage  1.46 %
-----------------------------------------------------
interpolation percentage  1.95 %
-----------------------------------------------------
interpolation percentage  2.44 %
-----------------------------------------------------
interpolation percentage  2.93 %
-----------------------------------------------------
interpolation percentage  3.41 %
-----------------------------------------------------
interpolation percentage  3.90 %
-----------------------------------------------------
interpolation percentage  4.39 %
-----------------------------------------------------
interpolation percentage  4.88 %
-----------------------------------------------------
interpolation percentage  5.37 %
-----------------------------------------------------
interpolation percentage  5.85 %
----------

In [63]:
#############################################################################################################
################################### taking a frame each 10 frames ###########################################
#############################################################################################################
def frames_10(inter_data):
    frames = inter_data[:,0]
    frames = sorted(frames)
    frames = np.unique(frames)
    final_data = []
    for i in range(0,len(frames)):
        id = frames[i]
        if (((id%10)==0) or(id==0)):
            frm = inter_data[inter_data[:,0] == id,:]
            final_data.extend(frm) 
    final_data = np.unique(final_data, axis=0)
    return final_data

Final_data = frames_10(interpolated_data)
savetxt('Final_data_crowds_zara02.csv', Final_data, delimiter=',')