In [2]:
import torch
from torch import nn as nn

import cv2 as cv
import mediapipe as mp

import numpy as np
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt
from IPython.display import display, Image
from sklearn.preprocessing import MinMaxScaler, Normalizer

from tqdm import tqdm
import os
import sys

import json
from glob import glob
from collections import OrderedDict


In [3]:
df = pd.read_json("data-real/landmarks/all_hand_landmarks.json").transpose()
df.head()

Unnamed: 0,x,y,z,label
data-real/images/A/001.jpg,"{'0': 0.46490940451622004, '1': 0.555219709873...","{'0': 0.708309531211853, '1': 0.66132336854934...","{'0': -5.075436320112203e-07, '1': -0.03012177...",A
data-real/images/A/002.jpg,"{'0': 0.48861816525459206, '1': 0.633939564228...","{'0': 0.5617042779922481, '1': 0.5200358629226...","{'0': -9.525291488898802e-07, '1': -0.05132721...",A
data-real/images/A/003.jpg,"{'0': 0.37925067543983404, '1': 0.471243590116...","{'0': 0.7275454401969911, '1': 0.7026125192642...","{'0': -7.949893756631354e-07, '1': -0.04028412...",A
data-real/images/A/004.jpg,"{'0': 0.36332768201828003, '1': 0.460623979568...","{'0': 0.7279697656631471, '1': 0.6918165683746...","{'0': -5.866638161933224e-07, '1': -0.02518229...",A
data-real/images/A/005.jpg,"{'0': 0.44915258884429904, '1': 0.587392032146...","{'0': 0.633230507373809, '1': 0.59939664602279...","{'0': -9.324294296675362e-07, '1': -0.04653644...",A


In [21]:
def preprocess_column(series, scaler=MinMaxScaler()):

    # Expand dict into new DataFrame
    df = DataFrame(series.tolist())

    # Scale coordinates
    df = DataFrame(scaler.fit_transform(df))

    # Normalize coordinates
    # normalizer = Normalizer()
    # df = DataFrame(normalizer.fit_transform(df))

    # Renaming columns
    df.columns = [f"{series.name}{col}"for col in df.columns]

    return df

In [28]:
def expand_df(df):

    x_df = DataFrame(df['x'].tolist())
    y_df = DataFrame(df['y'].tolist())
    z_df = DataFrame(df['z'].tolist())

    x_df.columns = [f"x{col}"for col in x_df.columns]
    y_df.columns = [f"y{col}"for col in y_df.columns]
    z_df.columns = [f"z{col}"for col in z_df.columns]

    expanded_df = pd.concat([x_df, y_df, z_df, df['label'].reset_index(drop=True)], axis=1)
    
    return expanded_df


In [30]:
expand_df(df).head()

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,z12,z13,z14,z15,z16,z17,z18,z19,z20,label
0,0.464909,0.55522,0.611244,0.628575,0.657871,0.55937,0.580615,0.569546,0.557562,0.497237,...,-0.075991,-0.018208,-0.079061,-0.076559,-0.054041,-0.029359,-0.068963,-0.067767,-0.053324,A
1,0.488618,0.63394,0.752804,0.788872,0.839207,0.691999,0.717096,0.68747,0.665154,0.598479,...,-0.099306,-0.026427,-0.105693,-0.088758,-0.052505,-0.035092,-0.090098,-0.073405,-0.043022,A
2,0.379251,0.471244,0.547347,0.584386,0.597717,0.536127,0.538939,0.510218,0.489882,0.477515,...,-0.096355,-0.02868,-0.079185,-0.08184,-0.067349,-0.035101,-0.067516,-0.070111,-0.059815,A
3,0.363328,0.460624,0.525905,0.550559,0.587458,0.484731,0.498303,0.479902,0.467675,0.422905,...,-0.053812,-0.001071,-0.057173,-0.050915,-0.028034,-0.011431,-0.050666,-0.042651,-0.023892,A
4,0.449153,0.587392,0.698809,0.739597,0.790906,0.642463,0.676976,0.64251,0.613779,0.557109,...,-0.098641,-0.032327,-0.107583,-0.092093,-0.058714,-0.043752,-0.096343,-0.081553,-0.054045,A


In [31]:
def scale_normalize(df):
    # identify numerical columns
    numerical_columns = df.select_dtypes(include=['int', 'float']).columns
    
    # Create a scaler and fit the data
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df[numerical_columns])
    
    # Create a new DataFrame to store scaled data
    scaled_df = pd.DataFrame(scaled_data, columns=numerical_columns, index=df.index)
    
    # Replace the normalized columns with original columns in the data frame
    df[numerical_columns] = scaled_df
    
    return df

In [32]:
scale_normalize(expand_df(df)).head()

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,z12,z13,z14,z15,z16,z17,z18,z19,z20,label
0,0.43366,0.506388,0.570567,0.611941,0.612966,0.589777,0.586113,0.568777,0.569886,0.53233,...,0.780083,0.838387,0.785467,0.798391,0.810096,0.750743,0.732881,0.749458,0.776498,A
1,0.45726,0.588564,0.729409,0.791667,0.797452,0.749245,0.735437,0.693492,0.678734,0.656852,...,0.754283,0.827091,0.755535,0.783356,0.812056,0.742939,0.708216,0.74263,0.789248,A
2,0.348396,0.418725,0.49887,0.562396,0.551767,0.56183,0.540516,0.506032,0.501416,0.508074,...,0.757548,0.823994,0.785328,0.791882,0.793114,0.742926,0.734569,0.746619,0.768464,A
3,0.332546,0.40764,0.47481,0.524469,0.54133,0.500034,0.496057,0.47397,0.478949,0.440907,...,0.804628,0.861939,0.810068,0.829996,0.843282,0.775148,0.754235,0.779874,0.812926,A
4,0.417976,0.539973,0.668822,0.73642,0.748312,0.689685,0.691542,0.645942,0.626759,0.60597,...,0.755018,0.818982,0.753411,0.779245,0.804132,0.73115,0.700927,0.732762,0.775605,A


In [27]:
preprocessed_df = pd.concat([preprocess_column(df['x']), preprocess_column(df['y']), preprocess_column(df['z']), df['label'].reset_index(drop=True)], axis=1)

preprocessed_df.head()

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,z12,z13,z14,z15,z16,z17,z18,z19,z20,label
0,0.43366,0.506388,0.570567,0.611941,0.612966,0.589777,0.586113,0.568777,0.569886,0.53233,...,0.780083,0.838387,0.785467,0.798391,0.810096,0.750743,0.732881,0.749458,0.776498,A
1,0.45726,0.588564,0.729409,0.791667,0.797452,0.749245,0.735437,0.693492,0.678734,0.656852,...,0.754283,0.827091,0.755535,0.783356,0.812056,0.742939,0.708216,0.74263,0.789248,A
2,0.348396,0.418725,0.49887,0.562396,0.551767,0.56183,0.540516,0.506032,0.501416,0.508074,...,0.757548,0.823994,0.785328,0.791882,0.793114,0.742926,0.734569,0.746619,0.768464,A
3,0.332546,0.40764,0.47481,0.524469,0.54133,0.500034,0.496057,0.47397,0.478949,0.440907,...,0.804628,0.861939,0.810068,0.829996,0.843282,0.775148,0.754235,0.779874,0.812926,A
4,0.417976,0.539973,0.668822,0.73642,0.748312,0.689685,0.691542,0.645942,0.626759,0.60597,...,0.755018,0.818982,0.753411,0.779245,0.804132,0.73115,0.700927,0.732762,0.775605,A


In [7]:
with open("processed_data.json", "w") as file:
    file.write(preprocessed_df.to_json(indent=4))