In [84]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
from torch_geometric.nn import GCNConv, global_mean_pool
from torch_geometric.utils import to_undirected
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from tqdm import tqdm

In [85]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

### Data Processing

In [86]:
df = pd.read_csv('signs_en.csv')
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Landmark 1 x,Landmark 1 y,Landmark 2 x,Landmark 2 y,Landmark 3 x,Landmark 3 y,Landmark 4 x,Landmark 4 y,...,Landmark 16 y,Landmark 17 x,Landmark 17 y,Landmark 18 x,Landmark 18 y,Landmark 19 x,Landmark 19 y,Landmark 20 x,Landmark 20 y,Label
0,0,0.0,0.447142,0.590708,0.469595,0.513098,0.471293,0.451205,0.45587,0.405068,...,0.630833,0.518164,0.645377,0.555401,0.660619,0.535724,0.671495,0.516766,0.669519,good
1,1,1.0,0.447354,0.59827,0.47089,0.518473,0.474211,0.456242,0.458783,0.4072,...,0.634632,0.518493,0.648955,0.55785,0.663069,0.536107,0.673253,0.514505,0.670299,good
2,2,2.0,0.447751,0.594389,0.471821,0.516508,0.474668,0.455287,0.459516,0.407649,...,0.634539,0.519102,0.647704,0.56107,0.663423,0.539275,0.672495,0.516909,0.668556,good
3,3,3.0,0.447579,0.594926,0.47134,0.515999,0.474191,0.453736,0.458233,0.405407,...,0.636172,0.52029,0.647888,0.561078,0.664179,0.539073,0.673748,0.516713,0.669505,good
4,4,4.0,0.448942,0.590661,0.472229,0.51142,0.474214,0.448855,0.457358,0.401411,...,0.629209,0.520169,0.637428,0.560247,0.658442,0.537632,0.668001,0.514961,0.66207,good


In [87]:
df = df.sample(frac=1, random_state=42)
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Landmark 1 x,Landmark 1 y,Landmark 2 x,Landmark 2 y,Landmark 3 x,Landmark 3 y,Landmark 4 x,Landmark 4 y,...,Landmark 16 y,Landmark 17 x,Landmark 17 y,Landmark 18 x,Landmark 18 y,Landmark 19 x,Landmark 19 y,Landmark 20 x,Landmark 20 y,Label
415,415,86.0,0.326569,0.674404,0.357834,0.703072,0.375689,0.741255,0.382552,0.776034,...,0.606782,0.336321,0.575292,0.347091,0.577,0.336864,0.592053,0.330558,0.594813,not good
2927,588,,0.456985,0.661305,0.467504,0.639797,0.480497,0.630084,0.492144,0.625483,...,0.629676,0.45743,0.617761,0.481457,0.615641,0.491817,0.621171,0.497802,0.628334,c
3194,855,,0.431389,0.722176,0.459923,0.701749,0.483064,0.689345,0.492661,0.675986,...,0.582822,0.423682,0.622943,0.434341,0.581182,0.455356,0.578791,0.47081,0.590627,c
298,298,298.0,0.378981,0.597129,0.39701,0.543736,0.398701,0.502422,0.384802,0.47308,...,0.610331,0.440011,0.626854,0.469402,0.634171,0.454586,0.64144,0.438621,0.639632,good
1874,634,,0.443326,0.806144,0.465649,0.758831,0.444961,0.711449,0.419999,0.689627,...,0.541464,0.397769,0.688573,0.404561,0.640295,0.408814,0.610057,0.412891,0.581599,b


In [88]:
features = df.drop(columns=['Label', 'Unnamed: 0.1', 'Unnamed: 0'])
features.shape

(3644, 40)

In [89]:
labels = df['Label']
labels.unique()

array(['not good', 'c', 'good', 'b', 'okay', 'a'], dtype=object)

In [90]:
le = LabelEncoder()
labels = le.fit_transform(labels)
labels.shape

(3644,)

![](hand_landmarks.png)

Edge index of hand landmarks are constant. Edge index is a 2D matrix with 2 rows. First row represents origin and second row represents end. Per column, it lists all the connections the graph makes. Essentially it is one of the matrix representations of a graph. Undirected does both a to b and b to a.

In [91]:
# make edge index
edge_index = torch.tensor([
    [0, 0, 0, 1, 2, 3, 5, 5, 6, 7, 9, 9, 10, 11, 13, 13, 14, 15, 17, 18, 19],
    [1, 5, 17, 2, 3, 4, 6, 9, 7, 8, 10, 13, 11, 12, 17, 14, 15, 16, 18, 19, 20]
    ], dtype=torch.long).to(device)

# make it undirected
edge_index = to_undirected(edge_index)

In [92]:
features = features.to_numpy()
features = features.reshape(-1, 20, 2)

In [93]:
datas = []
for i in range(len(features)):
    data = Data(
        x=torch.tensor(features[i], dtype=torch.float).to(device), 
        edge_index=edge_index,
        y=torch.tensor(labels[i], dtype=torch.long).to(device)
    )
    datas.append(data)

In [94]:
split = int(len(datas) * 0.8)

train_dl = DataLoader(datas[:split], batch_size=32)
test_dl = DataLoader(datas[split:], batch_size=32)

In [95]:
sample = next(iter(train_dl))
sample

DataBatch(x=[640, 2], edge_index=[2, 1344], y=[32], batch=[640], ptr=[33])

### Model Creation