<a href="https://colab.research.google.com/github/Mario-td/HandGestureClassification/blob/master/DataPreparation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Data preparation**

In [None]:
!pip install orderedset
import pandas as pd
import numpy as np
from orderedset import OrderedSet

In [None]:
# Allows access to Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [106]:
# Path to the files
path = '/content/drive/My Drive/MastersThesis/Dataset/'
df_3D = pd.read_csv(path + '/HandGesture3D.csv')

In [107]:
# Gets the number of steps per sample
n_steps = max(df_3D['Sequence']) + 1

# **Find defective data**

In [108]:
# Finds out which sequences contain more than N frames 
# with all zeros in the columns
N = 5
def findDefaultSequence(ind, n_steps, N): # O(nlogn)
  # Sequence number
  n = 0
  count = 0
  # Samples with more than N rows with all zeros
  s = OrderedSet()

  for i in ind:
    if int (i/n_steps) != n:
      count = 1
      n = int (i/n_steps)
    else:
      count = count + 1
    if count >= N:
        s.add(int (i/n_steps))
  return s

len(findDefaultSequence(ind, n_steps, N))

120

In [109]:
# Drops default samples
for i in reversed(findDefaultSequence(ind, n_steps, N)):  
  df_3D = df_3D.drop(np.arange(i * n_steps , i * n_steps + n_steps, 1))

# **Data augmentation**

In [110]:
# Duplicates the samples by inverting the order of the sequence in time
df_3D_inv_t  = df_3D[::-1].reset_index()
del df_3D_inv_t ['index']
df_3D_inv_t ['Sequence'] = abs(df_3D_inv_t ['Sequence'] - n_steps + 1)
df_3D = pd.concat([df_3D, df_3D_inv_t]).reset_index()
del df_3D['index']

In [111]:
# Duplicates the samples by inverting the x coordinate with respect to the y axis
# "mirroring" the coordinates in the images
df_3D_inv_pos = df_3D

n_keypoints = 21
img_width = 640 # Image width
feat = []

for i in range(0, n_keypoints):
  feat.append('j%d_x' % i)

df_3D_inv_pos[feat] = abs(df_3D[feat] - img_width)
df_3D = pd.concat([df_3D, df_3D_inv_pos]).reset_index()
del df_3D ['index']
df_3D

Unnamed: 0,Label,Sequence,j0_x,j0_y,j0_z,j1_x,j1_y,j1_z,j2_x,j2_y,j2_z,j3_x,j3_y,j3_z,j4_x,j4_y,j4_z,j5_x,j5_y,j5_z,j6_x,j6_y,j6_z,j7_x,j7_y,j7_z,j8_x,j8_y,j8_z,j9_x,j9_y,j9_z,j10_x,j10_y,j10_z,j11_x,j11_y,j11_z,j12_x,j12_y,j12_z,j13_x,j13_y,j13_z,j14_x,j14_y,j14_z,j15_x,j15_y,j15_z,j16_x,j16_y,j16_z,j17_x,j17_y,j17_z,j18_x,j18_y,j18_z,j19_x,j19_y,j19_z,j20_x,j20_y,j20_z
0,0,0,366.343750,375.640625,0.0,327.953125,326.281250,0.0,295.046875,287.890625,257.0,278.59375,244.015625,257.0,251.171875,222.078125,514.0,349.890625,227.562500,257.0,349.890625,189.171875,0.0,349.890625,167.234375,257.0,349.890625,139.812500,1028.0,377.312500,227.56250,256.0,382.796875,194.656250,0.0,382.796875,161.750000,0.0,388.281250,128.84375,0.0,404.734375,238.531250,0.0,415.703125,211.109375,0.0,415.703125,178.203125,0.0,415.703125,150.781250,0.0,421.187500,271.43750,0.0,443.125000,249.500000,0.0,448.609375,227.56250,0.0,443.125000,200.140625,0.0
1,0,1,398.765625,374.203125,0.0,355.390625,319.984375,257.0,339.125000,265.765625,257.0,328.28125,227.812500,257.0,306.593750,206.125000,514.0,404.187500,216.968750,257.0,404.187500,184.437500,257.0,409.609375,162.750000,257.0,415.031250,135.640625,0.0,431.296875,227.81250,1.0,436.718750,189.859375,0.0,447.562500,157.328125,257.0,458.406250,130.21875,1542.0,452.984375,244.078125,257.0,469.250000,206.125000,0.0,480.093750,179.015625,0.0,480.093750,151.906250,0.0,463.828125,271.18750,0.0,501.781250,249.500000,0.0,507.203125,227.81250,0.0,507.203125,211.546875,0.0
2,0,2,406.531250,369.312500,0.0,373.718750,320.093750,257.0,351.843750,265.406250,257.0,340.90625,227.125000,257.0,324.500000,205.250000,514.0,417.468750,216.187500,257.0,422.937500,188.843750,257.0,428.406250,161.500000,257.0,439.343750,134.156250,0.0,444.812500,227.12500,257.0,450.281250,188.843750,257.0,466.687500,161.500000,257.0,477.625000,128.68750,1542.0,466.687500,243.531250,257.0,488.562500,210.718750,0.0,499.500000,183.375000,0.0,504.968750,156.031250,0.0,477.625000,270.87500,0.0,510.437500,254.468750,0.0,521.375000,232.59375,0.0,526.843750,210.718750,0.0
3,0,3,416.500000,358.781250,0.0,383.968750,315.406250,257.0,356.859375,266.609375,257.0,340.59375,228.656250,257.0,329.750000,196.125000,257.0,421.921875,223.234375,257.0,432.765625,185.281250,0.0,443.609375,158.171875,0.0,454.453125,136.484375,0.0,454.453125,228.65625,257.0,470.718750,190.703125,257.0,481.562500,158.171875,257.0,497.828125,131.06250,1542.0,476.140625,244.921875,257.0,497.828125,206.968750,0.0,508.671875,179.859375,0.0,514.093750,158.171875,0.0,486.984375,272.03125,0.0,519.515625,255.765625,0.0,530.359375,239.50000,0.0,535.781250,217.812500,0.0
4,0,4,421.000000,360.000000,0.0,388.000000,310.500000,257.0,366.000000,266.500000,257.0,360.50000,222.500000,257.0,349.500000,195.000000,514.0,437.500000,222.500000,257.0,448.500000,184.000000,257.0,459.500000,156.500000,0.0,470.500000,134.500000,0.0,465.000000,228.00000,257.0,481.500000,189.500000,257.0,498.000000,156.500000,257.0,514.500000,134.50000,0.0,487.000000,244.500000,257.0,509.000000,211.500000,257.0,525.500000,184.000000,0.0,536.500000,156.500000,0.0,498.000000,272.00000,0.0,525.500000,255.500000,0.0,542.000000,239.00000,0.0,553.000000,217.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
435323,0,27,421.000000,360.000000,0.0,388.000000,310.500000,257.0,366.000000,266.500000,257.0,360.50000,222.500000,257.0,349.500000,195.000000,514.0,437.500000,222.500000,257.0,448.500000,184.000000,257.0,459.500000,156.500000,0.0,470.500000,134.500000,0.0,465.000000,228.00000,257.0,481.500000,189.500000,257.0,498.000000,156.500000,257.0,514.500000,134.50000,0.0,487.000000,244.500000,257.0,509.000000,211.500000,257.0,525.500000,184.000000,0.0,536.500000,156.500000,0.0,498.000000,272.00000,0.0,525.500000,255.500000,0.0,542.000000,239.00000,0.0,553.000000,217.000000,0.0
435324,0,28,416.500000,358.781250,0.0,383.968750,315.406250,257.0,356.859375,266.609375,257.0,340.59375,228.656250,257.0,329.750000,196.125000,257.0,421.921875,223.234375,257.0,432.765625,185.281250,0.0,443.609375,158.171875,0.0,454.453125,136.484375,0.0,454.453125,228.65625,257.0,470.718750,190.703125,257.0,481.562500,158.171875,257.0,497.828125,131.06250,1542.0,476.140625,244.921875,257.0,497.828125,206.968750,0.0,508.671875,179.859375,0.0,514.093750,158.171875,0.0,486.984375,272.03125,0.0,519.515625,255.765625,0.0,530.359375,239.50000,0.0,535.781250,217.812500,0.0
435325,0,29,406.531250,369.312500,0.0,373.718750,320.093750,257.0,351.843750,265.406250,257.0,340.90625,227.125000,257.0,324.500000,205.250000,514.0,417.468750,216.187500,257.0,422.937500,188.843750,257.0,428.406250,161.500000,257.0,439.343750,134.156250,0.0,444.812500,227.12500,257.0,450.281250,188.843750,257.0,466.687500,161.500000,257.0,477.625000,128.68750,1542.0,466.687500,243.531250,257.0,488.562500,210.718750,0.0,499.500000,183.375000,0.0,504.968750,156.031250,0.0,477.625000,270.87500,0.0,510.437500,254.468750,0.0,521.375000,232.59375,0.0,526.843750,210.718750,0.0
435326,0,30,398.765625,374.203125,0.0,355.390625,319.984375,257.0,339.125000,265.765625,257.0,328.28125,227.812500,257.0,306.593750,206.125000,514.0,404.187500,216.968750,257.0,404.187500,184.437500,257.0,409.609375,162.750000,257.0,415.031250,135.640625,0.0,431.296875,227.81250,1.0,436.718750,189.859375,0.0,447.562500,157.328125,257.0,458.406250,130.21875,1542.0,452.984375,244.078125,257.0,469.250000,206.125000,0.0,480.093750,179.015625,0.0,480.093750,151.906250,0.0,463.828125,271.18750,0.0,501.781250,249.500000,0.0,507.203125,227.81250,0.0,507.203125,211.546875,0.0


# **Data Normalization**

In [112]:
# Max depth
d16bits = 65538
# Image height
img_height = 480
for i in range(0, n_keypoints):
  df_3D['j%d_x' % i] = df_3D['j%d_x' % i] / img_width
  df_3D['j%d_y' % i] = df_3D['j%d_y' % i] / img_height
  df_3D['j%d_z' % i] = df_3D['j%d_z' % i] / d16bits
df_3D

Unnamed: 0,Label,Sequence,j0_x,j0_y,j0_z,j1_x,j1_y,j1_z,j2_x,j2_y,j2_z,j3_x,j3_y,j3_z,j4_x,j4_y,j4_z,j5_x,j5_y,j5_z,j6_x,j6_y,j6_z,j7_x,j7_y,j7_z,j8_x,j8_y,j8_z,j9_x,j9_y,j9_z,j10_x,j10_y,j10_z,j11_x,j11_y,j11_z,j12_x,j12_y,j12_z,j13_x,j13_y,j13_z,j14_x,j14_y,j14_z,j15_x,j15_y,j15_z,j16_x,j16_y,j16_z,j17_x,j17_y,j17_z,j18_x,j18_y,j18_z,j19_x,j19_y,j19_z,j20_x,j20_y,j20_z
0,0,0,0.572412,0.782585,0.0,0.512427,0.679753,0.000000,0.461011,0.599772,0.003921,0.435303,0.508366,0.003921,0.392456,0.462663,0.007843,0.546704,0.474089,0.003921,0.546704,0.394108,0.000000,0.546704,0.348405,0.003921,0.546704,0.291276,0.015686,0.589551,0.474089,0.003906,0.598120,0.405534,0.000000,0.598120,0.336979,0.000000,0.606689,0.268424,0.000000,0.632397,0.496940,0.000000,0.649536,0.439811,0.000000,0.649536,0.371257,0.0,0.649536,0.314128,0.0,0.658105,0.565495,0.0,0.692383,0.519792,0.0,0.700952,0.474089,0.0,0.692383,0.416960,0.0
1,0,1,0.623071,0.779590,0.0,0.555298,0.666634,0.003921,0.529883,0.553678,0.003921,0.512939,0.474609,0.003921,0.479053,0.429427,0.007843,0.631543,0.452018,0.003921,0.631543,0.384245,0.003921,0.640015,0.339062,0.003921,0.648486,0.282585,0.000000,0.673901,0.474609,0.000015,0.682373,0.395540,0.000000,0.699316,0.327767,0.003921,0.716260,0.271289,0.023528,0.707788,0.508496,0.003921,0.733203,0.429427,0.000000,0.750146,0.372949,0.0,0.750146,0.316471,0.0,0.724731,0.564974,0.0,0.784033,0.519792,0.0,0.792505,0.474609,0.0,0.792505,0.440723,0.0
2,0,2,0.635205,0.769401,0.0,0.583936,0.666862,0.003921,0.549756,0.552930,0.003921,0.532666,0.473177,0.003921,0.507031,0.427604,0.007843,0.652295,0.450391,0.003921,0.660840,0.393424,0.003921,0.669385,0.336458,0.003921,0.686475,0.279492,0.000000,0.695020,0.473177,0.003921,0.703564,0.393424,0.003921,0.729199,0.336458,0.003921,0.746289,0.268099,0.023528,0.729199,0.507357,0.003921,0.763379,0.438997,0.000000,0.780469,0.382031,0.0,0.789014,0.325065,0.0,0.746289,0.564323,0.0,0.797559,0.530143,0.0,0.814648,0.484570,0.0,0.823193,0.438997,0.0
3,0,3,0.650781,0.747461,0.0,0.599951,0.657096,0.003921,0.557593,0.555436,0.003921,0.532178,0.476367,0.003921,0.515234,0.408594,0.003921,0.659253,0.465072,0.003921,0.676196,0.386003,0.000000,0.693140,0.329525,0.000000,0.710083,0.284342,0.000000,0.710083,0.476367,0.003921,0.735498,0.397298,0.003921,0.752441,0.329525,0.003921,0.777856,0.273047,0.023528,0.743970,0.510254,0.003921,0.777856,0.431185,0.000000,0.794800,0.374707,0.0,0.803271,0.329525,0.0,0.760913,0.566732,0.0,0.811743,0.532845,0.0,0.828687,0.498958,0.0,0.837158,0.453776,0.0
4,0,4,0.657813,0.750000,0.0,0.606250,0.646875,0.003921,0.571875,0.555208,0.003921,0.563281,0.463542,0.003921,0.546094,0.406250,0.007843,0.683594,0.463542,0.003921,0.700781,0.383333,0.003921,0.717969,0.326042,0.000000,0.735156,0.280208,0.000000,0.726562,0.475000,0.003921,0.752344,0.394792,0.003921,0.778125,0.326042,0.003921,0.803906,0.280208,0.000000,0.760938,0.509375,0.003921,0.795312,0.440625,0.003921,0.821094,0.383333,0.0,0.838281,0.326042,0.0,0.778125,0.566667,0.0,0.821094,0.532292,0.0,0.846875,0.497917,0.0,0.864062,0.452083,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
435323,0,27,0.657813,0.750000,0.0,0.606250,0.646875,0.003921,0.571875,0.555208,0.003921,0.563281,0.463542,0.003921,0.546094,0.406250,0.007843,0.683594,0.463542,0.003921,0.700781,0.383333,0.003921,0.717969,0.326042,0.000000,0.735156,0.280208,0.000000,0.726562,0.475000,0.003921,0.752344,0.394792,0.003921,0.778125,0.326042,0.003921,0.803906,0.280208,0.000000,0.760938,0.509375,0.003921,0.795312,0.440625,0.003921,0.821094,0.383333,0.0,0.838281,0.326042,0.0,0.778125,0.566667,0.0,0.821094,0.532292,0.0,0.846875,0.497917,0.0,0.864062,0.452083,0.0
435324,0,28,0.650781,0.747461,0.0,0.599951,0.657096,0.003921,0.557593,0.555436,0.003921,0.532178,0.476367,0.003921,0.515234,0.408594,0.003921,0.659253,0.465072,0.003921,0.676196,0.386003,0.000000,0.693140,0.329525,0.000000,0.710083,0.284342,0.000000,0.710083,0.476367,0.003921,0.735498,0.397298,0.003921,0.752441,0.329525,0.003921,0.777856,0.273047,0.023528,0.743970,0.510254,0.003921,0.777856,0.431185,0.000000,0.794800,0.374707,0.0,0.803271,0.329525,0.0,0.760913,0.566732,0.0,0.811743,0.532845,0.0,0.828687,0.498958,0.0,0.837158,0.453776,0.0
435325,0,29,0.635205,0.769401,0.0,0.583936,0.666862,0.003921,0.549756,0.552930,0.003921,0.532666,0.473177,0.003921,0.507031,0.427604,0.007843,0.652295,0.450391,0.003921,0.660840,0.393424,0.003921,0.669385,0.336458,0.003921,0.686475,0.279492,0.000000,0.695020,0.473177,0.003921,0.703564,0.393424,0.003921,0.729199,0.336458,0.003921,0.746289,0.268099,0.023528,0.729199,0.507357,0.003921,0.763379,0.438997,0.000000,0.780469,0.382031,0.0,0.789014,0.325065,0.0,0.746289,0.564323,0.0,0.797559,0.530143,0.0,0.814648,0.484570,0.0,0.823193,0.438997,0.0
435326,0,30,0.623071,0.779590,0.0,0.555298,0.666634,0.003921,0.529883,0.553678,0.003921,0.512939,0.474609,0.003921,0.479053,0.429427,0.007843,0.631543,0.452018,0.003921,0.631543,0.384245,0.003921,0.640015,0.339062,0.003921,0.648486,0.282585,0.000000,0.673901,0.474609,0.000015,0.682373,0.395540,0.000000,0.699316,0.327767,0.003921,0.716260,0.271289,0.023528,0.707788,0.508496,0.003921,0.733203,0.429427,0.000000,0.750146,0.372949,0.0,0.750146,0.316471,0.0,0.724731,0.564974,0.0,0.784033,0.519792,0.0,0.792505,0.474609,0.0,0.792505,0.440723,0.0


# **2D dataset**

In [113]:
df_2D = df_3D
for i in range(0, n_keypoints):
  del df_2D ['j%d_z' % i]
df_2D

Unnamed: 0,Label,Sequence,j0_x,j0_y,j1_x,j1_y,j2_x,j2_y,j3_x,j3_y,j4_x,j4_y,j5_x,j5_y,j6_x,j6_y,j7_x,j7_y,j8_x,j8_y,j9_x,j9_y,j10_x,j10_y,j11_x,j11_y,j12_x,j12_y,j13_x,j13_y,j14_x,j14_y,j15_x,j15_y,j16_x,j16_y,j17_x,j17_y,j18_x,j18_y,j19_x,j19_y,j20_x,j20_y
0,0,0,0.572412,0.782585,0.512427,0.679753,0.461011,0.599772,0.435303,0.508366,0.392456,0.462663,0.546704,0.474089,0.546704,0.394108,0.546704,0.348405,0.546704,0.291276,0.589551,0.474089,0.598120,0.405534,0.598120,0.336979,0.606689,0.268424,0.632397,0.496940,0.649536,0.439811,0.649536,0.371257,0.649536,0.314128,0.658105,0.565495,0.692383,0.519792,0.700952,0.474089,0.692383,0.416960
1,0,1,0.623071,0.779590,0.555298,0.666634,0.529883,0.553678,0.512939,0.474609,0.479053,0.429427,0.631543,0.452018,0.631543,0.384245,0.640015,0.339062,0.648486,0.282585,0.673901,0.474609,0.682373,0.395540,0.699316,0.327767,0.716260,0.271289,0.707788,0.508496,0.733203,0.429427,0.750146,0.372949,0.750146,0.316471,0.724731,0.564974,0.784033,0.519792,0.792505,0.474609,0.792505,0.440723
2,0,2,0.635205,0.769401,0.583936,0.666862,0.549756,0.552930,0.532666,0.473177,0.507031,0.427604,0.652295,0.450391,0.660840,0.393424,0.669385,0.336458,0.686475,0.279492,0.695020,0.473177,0.703564,0.393424,0.729199,0.336458,0.746289,0.268099,0.729199,0.507357,0.763379,0.438997,0.780469,0.382031,0.789014,0.325065,0.746289,0.564323,0.797559,0.530143,0.814648,0.484570,0.823193,0.438997
3,0,3,0.650781,0.747461,0.599951,0.657096,0.557593,0.555436,0.532178,0.476367,0.515234,0.408594,0.659253,0.465072,0.676196,0.386003,0.693140,0.329525,0.710083,0.284342,0.710083,0.476367,0.735498,0.397298,0.752441,0.329525,0.777856,0.273047,0.743970,0.510254,0.777856,0.431185,0.794800,0.374707,0.803271,0.329525,0.760913,0.566732,0.811743,0.532845,0.828687,0.498958,0.837158,0.453776
4,0,4,0.657813,0.750000,0.606250,0.646875,0.571875,0.555208,0.563281,0.463542,0.546094,0.406250,0.683594,0.463542,0.700781,0.383333,0.717969,0.326042,0.735156,0.280208,0.726562,0.475000,0.752344,0.394792,0.778125,0.326042,0.803906,0.280208,0.760938,0.509375,0.795312,0.440625,0.821094,0.383333,0.838281,0.326042,0.778125,0.566667,0.821094,0.532292,0.846875,0.497917,0.864062,0.452083
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
435323,0,27,0.657813,0.750000,0.606250,0.646875,0.571875,0.555208,0.563281,0.463542,0.546094,0.406250,0.683594,0.463542,0.700781,0.383333,0.717969,0.326042,0.735156,0.280208,0.726562,0.475000,0.752344,0.394792,0.778125,0.326042,0.803906,0.280208,0.760938,0.509375,0.795312,0.440625,0.821094,0.383333,0.838281,0.326042,0.778125,0.566667,0.821094,0.532292,0.846875,0.497917,0.864062,0.452083
435324,0,28,0.650781,0.747461,0.599951,0.657096,0.557593,0.555436,0.532178,0.476367,0.515234,0.408594,0.659253,0.465072,0.676196,0.386003,0.693140,0.329525,0.710083,0.284342,0.710083,0.476367,0.735498,0.397298,0.752441,0.329525,0.777856,0.273047,0.743970,0.510254,0.777856,0.431185,0.794800,0.374707,0.803271,0.329525,0.760913,0.566732,0.811743,0.532845,0.828687,0.498958,0.837158,0.453776
435325,0,29,0.635205,0.769401,0.583936,0.666862,0.549756,0.552930,0.532666,0.473177,0.507031,0.427604,0.652295,0.450391,0.660840,0.393424,0.669385,0.336458,0.686475,0.279492,0.695020,0.473177,0.703564,0.393424,0.729199,0.336458,0.746289,0.268099,0.729199,0.507357,0.763379,0.438997,0.780469,0.382031,0.789014,0.325065,0.746289,0.564323,0.797559,0.530143,0.814648,0.484570,0.823193,0.438997
435326,0,30,0.623071,0.779590,0.555298,0.666634,0.529883,0.553678,0.512939,0.474609,0.479053,0.429427,0.631543,0.452018,0.631543,0.384245,0.640015,0.339062,0.648486,0.282585,0.673901,0.474609,0.682373,0.395540,0.699316,0.327767,0.716260,0.271289,0.707788,0.508496,0.733203,0.429427,0.750146,0.372949,0.750146,0.316471,0.724731,0.564974,0.784033,0.519792,0.792505,0.474609,0.792505,0.440723


In [114]:
# Saves the dataframes
df_2D.to_csv(path + '/HandGesture2D.csv', index=False)
df_3D.to_csv(path + '/HandGesture3D.csv', index=False)