In [1]:
from torch.utils.data import Dataset
from torch import Tensor
import numpy as np
import pandas as pd

class LandmarkDataset(Dataset):
    def __init__(self, landmark_file, label_file, label_vec_file):
        super().__init__()
        self.landmarks = np.load(landmark_file)
        self.labels = pd.read_json(label_file)[["gloss"]]
        self.label_vec = np.load(label_vec_file)
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, index):
        landmarks = Tensor(self.landmarks.get(str(index)))
        word = self.labels.loc[index].iloc[0]
        word_vec = Tensor(self.label_vec.get(word))
        return landmarks, word, word_vec
    
ds = LandmarkDataset("landmarks_V2.npz", "WLASL_parsed_data.json", "word_vec.npz")
val, word, word_vec = ds[2]
val.shape

torch.Size([68, 48, 3])

In [None]:
# Code for encoding the labels
# import numpy as np
# import pandas as pd
# import fasttext

# labels = pd.read_json("WLASL_parsed_data.json").loc[:, "gloss"]
# fasttext.util.download_model('en', if_exists='ignore')
# ft = fasttext.load_model('cc.en.300.bin')
# encoded_labels = {label: ft.get_word_vector(label) for label in labels}
# np.savez_compressed("word_vec.npz", **encoded_labels)

In [None]:
import torch
from sklearn.model_selection import train_test_split
import torch.nn.utils.rnn as rnn_utils
from torch.utils.data import Subset, DataLoader

TEST_SIZE = 0.2
BATCH_SIZE = 32
SEED = 10
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

train_i, test_i = train_test_split(
    range(len(ds)),
    stratify=ds.labels,
    test_size=TEST_SIZE,
    random_state=SEED
    )

torch.manual_seed(SEED)

train_split = Subset(ds, train_i)
test_split = Subset(ds, test_i)

def collate(batch):
    batch.sort(key=lambda x: x[0].shape[0], reverse=True)
    
    landmarks, words, word_vecs = zip(*batch)
    lengths = [len(lm) for lm in landmarks]

    padded_landmarks = rnn_utils.pad_sequence(landmarks).to(DEVICE)
    word_vecs = torch.stack(word_vecs).to(DEVICE)
    
    return \
        padded_landmarks, \
        lengths, \
        list(words), \
        word_vecs

train_batches = DataLoader(train_split, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate)
test_batches = DataLoader(test_split, batch_size=BATCH_SIZE, collate_fn=collate)

landmarks, lengths, labels, label_vecs = next(iter(train_batches))
landmarks.shape

torch.Size([119, 64, 48, 3])

In [3]:
from torch import nn

class TestModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Sequential(
            nn.LazyLinear(200),
            nn.LeakyReLU(),            
            nn.LazyLinear(300),
            nn.LeakyReLU(),
        )
        self.rnn = nn.GRU(300, 300)
        self.fc2 = nn.Sequential(
            nn.LazyLinear(300),
            nn.LeakyReLU(),
            nn.LazyLinear(300),
            nn.LeakyReLU(0.2),
            nn.LazyLinear(300),
        )

    def forward(self, landmarks, lengths):
        frames, batch, lm, pos = landmarks.shape
        x = landmarks.view(frames, batch, lm * pos)
        x = self.fc1(x)
        x = rnn_utils.pack_padded_sequence(x, lengths)
        x, h = self.rnn(x)
        x = self.fc2(h)
        x = h.squeeze(0)
        return x

model = TestModel().to(DEVICE)

landmarks, lengths, labels, label_vecs = next(iter(train_batches))

outp = model(landmarks, lengths)
outp



tensor([[-0.0061, -0.0030,  0.0518,  ...,  0.0022,  0.0323, -0.0005],
        [-0.0061, -0.0004,  0.0514,  ...,  0.0004,  0.0322,  0.0055],
        [-0.0065, -0.0021,  0.0505,  ...,  0.0030,  0.0336,  0.0027],
        ...,
        [-0.0080,  0.0218,  0.0731,  ...,  0.0183,  0.0470,  0.0044],
        [-0.0079, -0.0003,  0.0414,  ..., -0.0036,  0.0359, -0.0054],
        [-0.0217,  0.0347,  0.0975,  ...,  0.0294,  0.0603, -0.0128]],
       device='cuda:0', grad_fn=<SqueezeBackward1>)

In [4]:
import torch.nn.functional as F

class VecToWord:
    def __init__(self, ds, device):
        self.vectors = torch.tensor(np.array(list(ds.label_vec.values())), device=DEVICE)
        self.vectors.requires_grad_(False)

        self.dictionary = list(ds.label_vec.keys())
    
    def to_word(self, vec):
        classes = self.to_class(vec).tolist()
        return [self.dictionary[c] for c in classes]
        
    def get_acc(self, vec, target_words):
        return sum(pred == targ for pred, targ in zip(self.to_word(vec), target_words))


    def to_class(self, vec):
        res = F.cosine_similarity(
            vec.unsqueeze(1).repeat(1, 2000, 1), 
            self.vectors.repeat(vec.shape[0], 1, 1), 
            dim=2)
        return res.argmax(1)

v2w = VecToWord(ds, DEVICE)
v2w.get_acc(outp, labels)

0

In [5]:
from torch.optim import Adam
from torch.nn import MSELoss
from tqdm import tqdm

criterion = MSELoss()
optimizer = Adam(model.parameters(), lr=0.001, eps=1e-3) # lower eps cause of f16
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

num_epochs = 5

torch.autograd.set_detect_anomaly(True)

losses = []
accuracies = []

model.train()
for epoch in range(num_epochs):
    running_loss = 0.0
    correct = 0
    for inputs, lengths, words, word_vec in tqdm(train_batches, smoothing=0.8):
        optimizer.zero_grad()
        outputs = model(inputs, lengths)

        loss = criterion(outputs, word_vec)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        correct += v2w.get_acc(outputs, words)

    loss = running_loss/ len(train_batches)
    acc = 100 * correct / len(train_batches)

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss}, Accuracy: {acc:03f}%")
    losses.append(loss)
    accuracies.append(acc)

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 150/150 [00:15<00:00,  9.70it/s]


Epoch 1/5, Loss: 0.005274246347447237, Accuracy: 5.333333%


100%|██████████| 150/150 [00:15<00:00,  9.57it/s]


Epoch 2/5, Loss: 0.0046615773734326165, Accuracy: 2.666667%


100%|██████████| 150/150 [00:15<00:00,  9.83it/s]


Epoch 3/5, Loss: 0.004621435410032669, Accuracy: 4.666667%


100%|██████████| 150/150 [00:15<00:00,  9.89it/s]


Epoch 4/5, Loss: 0.004611053070984781, Accuracy: 4.666667%


100%|██████████| 150/150 [00:14<00:00, 10.15it/s]

Epoch 5/5, Loss: 0.004603677638806402, Accuracy: 4.666667%





In [6]:
model.eval()
correct = 0
with torch.no_grad():
    for inputs, lengths, words, word_vec in tqdm(test_batches, smoothing=0.8):
        outputs = model(inputs, lengths)

        correct += v2w.get_acc(outputs, words)

print(f"Accuracy: {100 * correct / len(test_batches):03f}%")

100%|██████████| 38/38 [00:02<00:00, 16.03it/s]

Accuracy: 0.000000%



