Mount the drive

In [None]:
# Mount your drive to access the dataset.
from google.colab import drive
drive.mount('/content/gdrive')
!ls -l "/content/gdrive/My Drive/"

Save some useful paths

In [None]:
coord_df_path = '/content/gdrive/My Drive/IVA/Datasets/info/2d_skeletal_data_unbc_coords.csv'
seq_df_path = '/content/gdrive/My Drive/IVA/Datasets/info/2d_skeletal_data_unbc_sequence.csv'
fig_dir = '/content/gdrive/My Drive/IVA/Datasets/info/histogram.png'
dataset_dir = '/content/gdrive/My Drive/IVA/Datasets/info/'

# 1) Information on dataset distribution

In [None]:
import pandas as pd
import statistics
import matplotlib.pyplot as plt

Prints the average, maximum and minimum length of the sequences and saves a histogram with all the lengths of the sequences

In [None]:
data = pd.read_csv(seq_df_path)

print(data)

print("Info su lunghezza sequenze del dataset:")
print("Medium length : " , data["num_frames"].mean())
print("Max length : " ,data["num_frames"].max())
print("Min length : " ,data["num_frames"].min())

data['num_frames'].plot(kind='hist',bins=200)
plt.axvline(dati['num_frames'].mean(), c='red')
plt.xlabel('Number of Frame')
plt.ylabel('Grequencies')
plt.title("sequence length distribution")
plt.savefig(fig_dir, dpi=200)
plt.close()

plt.show()


# 2) Dataset Generation


In [None]:
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import operator
from sklearn.preprocessing import RobustScaler

Select only the indexes of the landmarks to be used


In [None]:
selected_lndks_idx = range(0, 66)
#selected_lndks_idx = [5, 11, 19, 24, 30, 37, 41, 44, 46, 50, 52, 56, 58]

train_video_idx = range(0,179)
test_video_idx = range(180,200)

##2.1) Utility functions
Define some utilities functions

In [None]:
# Get the velocities of all selected landmark for each frame of each sequence

def get_velocities_frames():
  
  coord_df = pd.read_csv(coord_df_path)
  seq_df = pd.read_csv(seq_df_path)
  velocities = []
  for seq_num in np.arange(seq_df.shape[0]):
      lndks = coord_df.loc[coord_df['0'] == seq_num].values
      lndks = lndks[:, 2:]
      num_lndks = 66
      num_frames = seq_df['num_frames'][seq_num]
      centroid_x = np.array([np.sum(lndks[i, 0:num_lndks]) / num_lndks for i in range(num_frames)])
      centroid_y = np.array([np.sum(lndks[i, num_lndks:]) / num_lndks for i in range(num_frames)])

      offset = np.hstack((np.repeat(centroid_x.reshape(-1, 1), num_lndks, axis=1),
                          np.repeat(centroid_y.reshape(-1, 1), num_lndks, axis=1)))

      lndks_centered = lndks - offset
      lndks_centered[:, 30] = centroid_x

      lndks_centered[:, 30 + num_lndks] = centroid_y
      lndk_vel = np.power(np.power(lndks_centered[0:lndks_centered.shape[0] - 1, 0:num_lndks] -
                                  lndks_centered[1:lndks_centered.shape[0], 0:num_lndks], 2) +
                          np.power(lndks_centered[0:lndks_centered.shape[0] - 1, num_lndks:] -
                                  lndks_centered[1:lndks_centered.shape[0], num_lndks:], 2), 0.5)
      data_velocities = []
      for k in np.arange(1, lndk_vel.shape[0]):
          data_velocities.append(np.array(lndk_vel[k, selected_lndks_idx]))
      velocities.append(np.array(data_velocities))
  return velocities

##2.2) Dataset Generation

In [None]:
# Create two csv files, one for the training dataset and one for the test dataset

velocities = get_velocities_frames()
seq_df = pd.read_csv(seq_df_path)

list = []
element = []
sequenza = []
for id_seq in range(0, len(velocities)):
    vas = seq_df.iloc[id_seq][1]
    element.append(id_seq)
    sequenza = velocities[id_seq]
    for id_frames in range(0, len(sequenza)):
        element.append(id_frames)
        frame = sequenza[id_frames]
        for v in range(0, len(frame)):
            velocita = frame[v]
            element.append(velocita)
        element.append(vas)
        list.append(element)
        element = [id_seq]
    element = []

col = ['Sequenza','Frame']
for i in range(0, len(selected_lndks_idx)):
    s = 'Vel' + str(i)
    col.append(s)
col.append('Label')

df = pd.DataFrame(list,columns=col)

print(df)

train = df.loc[(df['Sequenza'] < 180)]
test = df.loc[(df['Sequenza'] >= 180)]

name_csv_train = dataset_dir + 'train-velocity-' + str(len(selected_lndks_idx)) + '.csv'
name_csv_test = dataset_dir + 'test-velocity-' + str(len(selected_lndks_idx)) + '.csv'

train.to_csv(name_csv_train, index=False)
test.to_csv(name_csv_test, index=False)


    

       Sequenza  Frame      Vel0      Vel1  ...     Vel63     Vel64     Vel65  Label
0             0      0  0.070404  0.156512  ...  0.130682  0.264413  0.229226      0
1             0      1  0.125513  0.114344  ...  0.120285  0.138199  0.126977      0
2             0      2  0.253132  0.231857  ...  0.081278  0.111015  0.104563      0
3             0      3  0.187514  0.174368  ...  0.061480  0.069704  0.061332      0
4             0      4  0.229464  0.197635  ...  0.051159  0.047535  0.046308      0
...         ...    ...       ...       ...  ...       ...       ...       ...    ...
47993       199    128  0.278181  0.243939  ...  0.062450  0.090637  0.052599      0
47994       199    129  0.235192  0.215203  ...  0.007179  0.060353  0.065273      0
47995       199    130  0.214192  0.216479  ...  0.035070  0.074119  0.118038      0
47996       199    131  0.201538  0.189191  ...  0.035153  0.043057  0.090181      0
47997       199    132  0.337752  0.305015  ...  0.064156  0.1120