# 1) Organize imports

In this section we install and import the needed packages. Then we mount our GDrive.

In [None]:
import os
import pickle
import matplotlib
import operator
import statistics
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

Useful paths.

In [None]:
# Path of two part of dataset
COORD_DF_PATH = '/content/gdrive/My Drive/IVA/Datasets/info/2d_skeletal_data_unbc_coords.csv'
SEQ_DF_PATH = '/content/gdrive/My Drive/IVA/Datasets/info/2d_skeletal_data_unbc_sequence.csv'
# Path where save the data extract from dataset
FIG_DIR = '/content/gdrive/My Drive/IVA/Datasets/info/histogram.png'
DATASET_DIR = '/content/gdrive/My Drive/IVA/Datasets/info/'

Mount the drive.

In [None]:
# Mount your drive to access the dataset.
from google.colab import drive
drive.mount('/content/gdrive')

# 2) Information on dataset distribution

Save and show some information of dataset and histogram with all the lengths of the sequences.

In [None]:
data = pd.read_csv(SEQ_DF_PATH)

mean = data['num_frames'].mean()
max = data['num_frames'].max()
min = data['num_frames'].min()

data['num_frames'].plot(kind='hist',bins=200)
plt.axvline(data['num_frames'].mean(), c='red')
plt.xlabel('Number of Frame')
plt.ylabel('Frequencies')
plt.title("sequence length distribution")
plt.savefig(FIG_DIR, dpi=200)
plt.close()

plt.show()


# 3) Selection of Landmarks 

Possible landmarks selection:

*   Eyes *→* [36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]
*   Eyebrows *→* [17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
*   Nose *→* [27, 28, 29, 30, 31, 32, 33, 34, 35]
*   Mouth *→* [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,64, 65]
* Face countours *→* [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]




In [None]:
# Some examples of landmark selection 

# Eye
group_eye = [36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]

# Eyebrows
group_eyebrows = [17, 18, 19, 20, 21, 22, 23, 24, 25, 26]

# Nose
group_nose = [27, 28, 29, 30, 31, 32, 33, 34, 35]

# Mouth
group_mouth = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65]

# Face countours
group_face_countours = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]

# Eyes + Eyebrows
group_eyes_eyebrows = [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]

# Nose + Mouth
group_nose_mouth = [27, 28, 29, 30, 31, 32, 33, 34, 35, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65]

# All
group_all = range(0, 66)


Select the group of the landmarks to be used.

In [None]:
selected_lndks_idx = group_all

Show the landmarks selected of a frame example.

In [None]:
seq_name = "['bn080t1aeunaff']"

coord_df = pd.read_csv(COORD_DF_PATH)
seq_df = pd.read_csv(SEQ_DF_PATH)

seq = seq_df.query('sequence_name== @seq_name')
seq_idx = seq.index.values[0]

VAS = seq['VAS'][seq_idx]
num_frames = seq['num_frames'][seq_idx]

print("Sequence: " + seq_name + " --> VAS: " + str(VAS) + " - Frame numbers: " + str(num_frames))
lndks = coord_df.loc[coord_df['0'] == seq_idx].values

num_lndks = 66
lndks = lndks[:, 2:]

lndks_x = lndks[:, :num_lndks]
lndks_y = lndks[:, num_lndks:]

selected_x = []
selected_y = []
lndks_selected_x = []
lndks_selected_y = []
for n in range(0,num_frames):
    for i in selected_lndks_idx:
        selected_x.append(lndks_x[n][i])
        selected_y.append(lndks_y[n][i])
    lndks_selected_x.append(selected_x)
    lndks_selected_y.append(selected_y)
    selected_x = []
    selected_y = []

lndks_selected_x = np.array(lndks_selected_x)
lndks_selected_y = np.array(lndks_selected_y)


plt.title("Landmarks Frame " + str(0))
plt.scatter(lndks_selected_x[0, :], lndks_selected_y[0, :], s=100, alpha=0.5)
plt.xlim(50,200)
plt.ylim(75,225)
plt.gca().invert_yaxis()

plt.show()

# 4) Dataset Generation


In this section we extract the data from dataset.

##4.1) Utility functions


Define some utilities functions.

In [None]:
# Get the velocities of all selected landmark for each frame of each sequence

def get_velocities_frames():
  
  coord_df = pd.read_csv(COORD_DF_PATH)
  seq_df = pd.read_csv(SEQ_DF_PATH)
  velocities = []
  for seq_num in np.arange(seq_df.shape[0]):
      lndks = coord_df.loc[coord_df['0'] == seq_num].values
      lndks = lndks[:, 2:]
      num_lndks = 66
      num_frames = seq_df['num_frames'][seq_num]
      centroid_x = np.array([np.sum(lndks[i, 0:num_lndks]) / num_lndks for i in range(num_frames)])
      centroid_y = np.array([np.sum(lndks[i, num_lndks:]) / num_lndks for i in range(num_frames)])

      offset = np.hstack((np.repeat(centroid_x.reshape(-1, 1), num_lndks, axis=1),
                          np.repeat(centroid_y.reshape(-1, 1), num_lndks, axis=1)))

      lndks_centered = lndks - offset

      lndks_centered[:, 30] = centroid_x
      lndks_centered[:, 30 + num_lndks] = centroid_y

      lndk_vel = np.power(np.power(lndks_centered[0:lndks_centered.shape[0] - 1, 0:num_lndks] -
                                  lndks_centered[1:lndks_centered.shape[0], 0:num_lndks], 2) +
                          np.power(lndks_centered[0:lndks_centered.shape[0] - 1, num_lndks:] -
                                  lndks_centered[1:lndks_centered.shape[0], num_lndks:], 2), 0.5)
      data_velocities = []
      for k in np.arange(1, lndk_vel.shape[0]):
          data_velocities.append(np.array(lndk_vel[k, selected_lndks_idx]))
      velocities.append(np.array(data_velocities))
      
  return velocities

##4.2) Dataset Generation

In [None]:
# Create two csv files, one for the training dataset and one for the test dataset

velocities = get_velocities_frames()
seq_df = pd.read_csv(SEQ_DF_PATH)

lst = []
element = []
sequenza = []
for id_seq in range(0, len(velocities)):
    vas = seq_df.iloc[id_seq][1]
    element.append(id_seq)
    sequenza = velocities[id_seq]
    for id_frames in range(0, len(sequenza)):
        element.append(id_frames)
        frame = sequenza[id_frames]
        for v in range(0, len(frame)):
            velocita = frame[v]
            element.append(velocita)
        element.append(vas)
        lst.append(element)
        element = [id_seq]
    element = []

col = ['Sequenza','Frame']
for i in range(0, len(selected_lndks_idx)):
    s = 'Vel' + str(i)
    col.append(s)
col.append('Label')

df = pd.DataFrame(lst,columns=col)

train = df.loc[(df['Sequenza'] < 180)]
test = df.loc[(df['Sequenza'] >= 180)]

name_csv_train = DATASET_DIR + 'train-velocity-' + str(len(selected_lndks_idx)) + '.csv'
name_csv_test = DATASET_DIR + 'test-velocity-' + str(len(selected_lndks_idx)) + '.csv'

train.to_csv(name_csv_train, index=False)
test.to_csv(name_csv_test, index=False)


    