In [1]:
import os
import re
import json
import pickle

import fasttext

import shutil

import time
from tqdm import tqdm
import math

import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import KFold

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam, AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau

from transformers import AutoTokenizer, AutoModel

In [2]:
words = np.load('/home/sju/HyoJun/Creative_semester_system/words.npy')
words = words.tolist()
y_data1 = np.load('/home/sju/HyoJun/Creative_semester_system/y_data1.npy')

In [3]:
class TrainDataset(Dataset):
    def __init__(self, words, landmarks):
        self.words = words
        self.landmarks = torch.tensor(landmarks, dtype=torch.float32)  # (N, T, L, D)

    def __len__(self):
        return len(self.words)

    def __getitem__(self, idx):
        word = self.words[idx]                # str or list of str
        landmark_seq = self.landmarks[idx]    # (T, L, D)
        return word, landmark_seq

class TestDataset(Dataset):
    def __init__(self, words, landmarks):
        self.words = words
        self.landmarks = torch.tensor(landmarks, dtype=torch.float32)
        self.first_frame = self.landmarks[:, 0, :, :]  # (N, L, D)

    def __len__(self):
        return len(self.words)

    def __getitem__(self, idx):
        word = self.words[idx]
        first_frame = self.first_frame[idx]   # (L, D)
        return word, first_frame

In [4]:
class WordEmbeddingDecoderModel(nn.Module):
    def __init__(self, ft_model_path, hidden_size, num_frames, num_landmarks, dim):
        super(WordEmbeddingDecoderModel, self).__init__()
        self.num_frames = num_frames
        self.num_landmarks = num_landmarks
        self.dim = dim
        self.hidden_size = hidden_size

        self.ft = fasttext.load_model(ft_model_path)
        self.embedding_dim = self.ft.get_dimension()
        
        self.encoder_proj = nn.Linear(self.embedding_dim, hidden_size)

        self.decoder_input_proj = nn.Linear(num_landmarks * dim, hidden_size)

        self.decoder_layer = nn.TransformerDecoderLayer(
            d_model=hidden_size,
            nhead=8,
            dim_feedforward=hidden_size//2,
            dropout=0.3,
            activation='relu',
            batch_first=True
        )
        self.transformer_decoder = nn.TransformerDecoder(self.decoder_layer, num_layers=8)

        self.fc = nn.Linear(hidden_size, num_landmarks * dim)

    def encode_text(self, batch_words, device='cuda'):
        
        batch_embeddings = []
        for words in batch_words:
            word_embeds = [self.ft.get_word_vector(w) for w in words]
            batch_embeddings.append(word_embeds)
        
        embeddings = torch.tensor(batch_embeddings).to(device)
        embeddings = self.encoder_proj(embeddings)
        
        return embeddings
        

    def forward(self, words, decoder_input: torch.Tensor, device: torch.device):
        
        memory = self.encode_text(words, device)

        # Prepare decoder input
        decoder_input = decoder_input.to(device) 
        
        batch_size = decoder_input.size(0)
        num_frames = decoder_input.size(1)
        
        decoder_input_flat = decoder_input.view(batch_size, num_frames, -1)
        decoder_input_proj = self.decoder_input_proj(decoder_input_flat)  # (B, T, hidden_size)

        # Create autoregressive mask
        tgt_mask = torch.triu(torch.ones(num_frames, num_frames), diagonal=1).bool().to(device)

        # Decode
        decoder_output = self.transformer_decoder(
            tgt=decoder_input_proj,
            memory=memory,
            tgt_mask=tgt_mask
        )  # (B, T, hidden_size)

        output = self.fc(decoder_output)  # (B, T, L*D)
        output = output.view(batch_size, num_frames, self.num_landmarks, self.dim)

        return output

In [5]:
#MODEL = BERT
HIDDEN_SIZE = 768
NUM_FRAMES = y_data1.shape[1]
NUM_LANDMARKS = y_data1.shape[2]
DIM = 2

EPOCHS = 200
BATCH_SIZE = 1
LR = 1e-6

In [6]:
train_dataset = TrainDataset(words, y_data1)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [7]:
model = WordEmbeddingDecoderModel(
    ft_model_path='/home/sju/HyoJun/Creative_semester_system/cc.ko.300.bin',
    hidden_size=HIDDEN_SIZE,
    num_frames=NUM_FRAMES,
    num_landmarks=NUM_LANDMARKS,
    dim=DIM
).cuda()

In [8]:
class BoneLengthLoss(nn.Module):
    def __init__(self, bone_pairs):
        super(BoneLengthLoss, self).__init__()
        self.bone_pairs = bone_pairs
    
    def forward(self, pred, target):
        assert pred.shape == target.shape
        
        B, T, N, D = pred.shape
        pred_bones = []
        target_bones = []
        for i, j in self.bone_pairs:
            pred_bone = torch.norm(pred[:, :, i, :] - pred[:, :, j, :], dim=-1)
            target_bone = torch.norm(target[:, :, i, :] - target[:, :, j, :], dim=-1)
            pred_bones.append(pred_bone)
            target_bones.append(target_bone)
        
        pred_bones = torch.stack(pred_bones, dim=-1)
        target_bones = torch.stack(target_bones, dim=-1)
        
        loss = torch.mean((pred_bones - target_bones) ** 2)
        return loss

In [9]:
hand_bone_pairs = [
    (0, 1), (1, 2), (2, 3), (3, 4),      # Thumb
    (0, 5), (5, 6), (6, 7), (7, 8),      # Index
    (0, 9), (9,10), (10,11), (11,12),    # Middle
    (0,13), (13,14), (14,15), (15,16),   # Ring
    (0,17), (17,18), (18,19), (19,20)    # Pinky
]

pose_bone_pairs = [
    (0, 1),   # nose to neck
    (1, 2), (2, 3), (3, 4), (4, 5),   # right arm
    (1, 6), (6, 7), (7, 8), (8, 9),   # left arm
    (1, 10), (10, 11), (11, 12),     # right leg
    (1, 13), (13, 14), (14, 15),     # left leg
    (0, 16), (0, 17),  # eyes
    (16, 18), (17, 19),  # ears
    (11, 22), (14, 19),  # ankles to toes
]

In [10]:
#criterion = nn.L1Loss()
criterion = nn.MSELoss()
#criterion_hand = BoneLengthLoss(hand_bone_pairs)
#criterion_pose = BoneLengthLoss(pose_bone_pairs)

optimizer = AdamW(model.parameters(), lr=LR)

cos_epoch = int(EPOCHS*0.75)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, (EPOCHS-cos_epoch)*len(train_dataloader))

steps_per_epoch = len(train_dataloader)
#val_steps = len(valid_dataloader)

In [11]:
%%time

for epoch in tqdm(range(EPOCHS)):
    
    model.train()
    train_loss = 0
    t = time.time()
    
    for step, batch in enumerate(train_dataloader):
        word, landmark = batch
        word = word  # word는 list여서 GPU로 이동하지 않음
        landmark = landmark.cuda()  # landmark는 Tensor이므로 GPU로 이동
        
        decoder_input = landmark

        optimizer.zero_grad()
        
        output = model(word, decoder_input, 'cuda')
        
        loss = criterion(output, landmark) 
        #criterion(output[..., 0:70, :], landmark[..., 0:70, :]) + criterion_pose(output[..., 70:95, :], landmark[..., 70:95, :]) + criterion_hand(output[..., 95:116, :], landmark[..., 95:116, :]) + criterion_hand(output[..., 116:137, :], landmark[..., 116:137, :])
        loss.backward()

        optimizer.step()
            
        train_loss += loss.item()
        print ("Step [{}/{}] Loss: {:.3f} Time: {:.1f}".format(step+1, steps_per_epoch, train_loss/(step+1), time.time()-t),end='\r',flush=True)
        
        if epoch > cos_epoch:
            scheduler.step()
            
    print('')
    train_loss /= (step+1)

  0%|                                                   | 0/200 [00:00<?, ?it/s]

Step [1/3000] Loss: 0.573 Time: 0.2

  embeddings = torch.tensor(batch_embeddings).to(device)


Step [3000/3000] Loss: 0.098 Time: 43.1

  0%|▏                                        | 1/200 [00:43<2:23:04, 43.14s/it]


Step [3000/3000] Loss: 0.035 Time: 42.4

  1%|▍                                        | 2/200 [01:25<2:20:50, 42.68s/it]


Step [3000/3000] Loss: 0.028 Time: 42.7

  2%|▌                                        | 3/200 [02:08<2:20:07, 42.68s/it]


Step [3000/3000] Loss: 0.023 Time: 42.9

  2%|▊                                        | 4/200 [02:51<2:19:41, 42.76s/it]


Step [3000/3000] Loss: 0.019 Time: 42.8

  2%|█                                        | 5/200 [03:33<2:18:59, 42.76s/it]


Step [3000/3000] Loss: 0.016 Time: 43.4

  3%|█▏                                       | 6/200 [04:17<2:18:58, 42.98s/it]


Step [3000/3000] Loss: 0.015 Time: 42.7

  4%|█▍                                       | 7/200 [04:59<2:17:56, 42.88s/it]


Step [3000/3000] Loss: 0.014 Time: 43.0

  4%|█▋                                       | 8/200 [05:42<2:17:18, 42.91s/it]


Step [3000/3000] Loss: 0.012 Time: 42.6

  4%|█▊                                       | 9/200 [06:25<2:16:15, 42.81s/it]


Step [3000/3000] Loss: 0.012 Time: 42.8


  5%|██                                      | 10/200 [07:08<2:15:34, 42.81s/it]

Step [3000/3000] Loss: 0.011 Time: 42.7

  6%|██▏                                     | 11/200 [07:50<2:14:43, 42.77s/it]


Step [3000/3000] Loss: 0.010 Time: 42.4

  6%|██▍                                     | 12/200 [08:33<2:13:39, 42.65s/it]


Step [3000/3000] Loss: 0.009 Time: 42.4

  6%|██▌                                     | 13/200 [09:15<2:12:41, 42.57s/it]


Step [3000/3000] Loss: 0.009 Time: 43.1

  7%|██▊                                     | 14/200 [09:58<2:12:26, 42.72s/it]


Step [3000/3000] Loss: 0.008 Time: 42.4

  8%|███                                     | 15/200 [10:41<2:11:26, 42.63s/it]


Step [3000/3000] Loss: 0.008 Time: 43.4

  8%|███▏                                    | 16/200 [11:24<2:11:25, 42.86s/it]


Step [3000/3000] Loss: 0.007 Time: 43.3

  8%|███▍                                    | 17/200 [12:07<2:11:05, 42.98s/it]


Step [3000/3000] Loss: 0.007 Time: 43.3

  9%|███▌                                    | 18/200 [12:51<2:10:37, 43.06s/it]


Step [3000/3000] Loss: 0.006 Time: 43.0

 10%|███▊                                    | 19/200 [13:34<2:09:49, 43.03s/it]


Step [3000/3000] Loss: 0.006 Time: 42.4

 10%|████                                    | 20/200 [14:16<2:08:32, 42.84s/it]


Step [3000/3000] Loss: 0.005 Time: 42.5

 10%|████▏                                   | 21/200 [14:58<2:07:30, 42.74s/it]


Step [3000/3000] Loss: 0.005 Time: 43.0

 11%|████▍                                   | 22/200 [15:42<2:07:03, 42.83s/it]


Step [3000/3000] Loss: 0.005 Time: 43.3

 12%|████▌                                   | 23/200 [16:25<2:06:44, 42.96s/it]


Step [3000/3000] Loss: 0.004 Time: 43.4

 12%|████▊                                   | 24/200 [17:08<2:06:27, 43.11s/it]


Step [3000/3000] Loss: 0.004 Time: 42.7

 12%|█████                                   | 25/200 [17:51<2:05:20, 42.97s/it]


Step [3000/3000] Loss: 0.004 Time: 42.8

 13%|█████▏                                  | 26/200 [18:34<2:04:29, 42.93s/it]


Step [3000/3000] Loss: 0.003 Time: 42.6

 14%|█████▍                                  | 27/200 [19:16<2:03:32, 42.85s/it]


Step [3000/3000] Loss: 0.003 Time: 42.1

 14%|█████▌                                  | 28/200 [19:59<2:02:13, 42.64s/it]


Step [3000/3000] Loss: 0.003 Time: 42.8

 14%|█████▊                                  | 29/200 [20:41<2:01:37, 42.68s/it]


Step [3000/3000] Loss: 0.002 Time: 43.0

 15%|██████                                  | 30/200 [21:24<2:01:11, 42.77s/it]


Step [3000/3000] Loss: 0.002 Time: 43.0

 16%|██████▏                                 | 31/200 [22:07<2:00:38, 42.83s/it]


Step [3000/3000] Loss: 0.002 Time: 43.4

 16%|██████▍                                 | 32/200 [22:51<2:00:23, 43.00s/it]


Step [3000/3000] Loss: 0.002 Time: 42.9

 16%|██████▌                                 | 33/200 [23:34<1:59:34, 42.96s/it]


Step [3000/3000] Loss: 0.002 Time: 43.4

 17%|██████▊                                 | 34/200 [24:17<1:59:13, 43.09s/it]


Step [3000/3000] Loss: 0.002 Time: 42.8

 18%|███████                                 | 35/200 [25:00<1:58:13, 42.99s/it]


Step [3000/3000] Loss: 0.001 Time: 43.1

 18%|███████▏                                | 36/200 [25:43<1:57:36, 43.03s/it]


Step [3000/3000] Loss: 0.001 Time: 42.8

 18%|███████▍                                | 37/200 [26:26<1:56:43, 42.96s/it]


Step [3000/3000] Loss: 0.001 Time: 43.2

 19%|███████▌                                | 38/200 [27:09<1:56:13, 43.05s/it]


Step [3000/3000] Loss: 0.001 Time: 42.9

 20%|███████▊                                | 39/200 [27:52<1:55:21, 42.99s/it]


Step [3000/3000] Loss: 0.001 Time: 42.8

 20%|████████                                | 40/200 [28:34<1:54:28, 42.93s/it]


Step [3000/3000] Loss: 0.001 Time: 43.0

 20%|████████▏                               | 41/200 [29:17<1:53:47, 42.94s/it]


Step [3000/3000] Loss: 0.001 Time: 42.8

 21%|████████▍                               | 42/200 [30:00<1:52:58, 42.90s/it]


Step [3000/3000] Loss: 0.001 Time: 43.1

 22%|████████▌                               | 43/200 [30:43<1:52:24, 42.96s/it]


Step [3000/3000] Loss: 0.001 Time: 42.4

 22%|████████▊                               | 44/200 [31:26<1:51:14, 42.78s/it]


Step [3000/3000] Loss: 0.001 Time: 43.0

 22%|█████████                               | 45/200 [32:09<1:50:43, 42.86s/it]


Step [3000/3000] Loss: 0.001 Time: 42.8

 23%|█████████▏                              | 46/200 [32:52<1:49:59, 42.86s/it]


Step [3000/3000] Loss: 0.001 Time: 42.4

 24%|█████████▍                              | 47/200 [33:34<1:48:55, 42.72s/it]


Step [3000/3000] Loss: 0.001 Time: 42.5

 24%|█████████▌                              | 48/200 [34:17<1:48:04, 42.66s/it]


Step [3000/3000] Loss: 0.001 Time: 42.9

 24%|█████████▊                              | 49/200 [34:59<1:47:30, 42.72s/it]


Step [3000/3000] Loss: 0.001 Time: 42.8

 25%|██████████                              | 50/200 [35:42<1:46:53, 42.76s/it]


Step [3000/3000] Loss: 0.001 Time: 43.4

 26%|██████████▏                             | 51/200 [36:26<1:46:38, 42.94s/it]


Step [3000/3000] Loss: 0.001 Time: 43.7

 26%|██████████▍                             | 52/200 [37:09<1:46:28, 43.16s/it]


Step [3000/3000] Loss: 0.001 Time: 43.0

 26%|██████████▌                             | 53/200 [37:52<1:45:35, 43.10s/it]


Step [3000/3000] Loss: 0.000 Time: 43.3

 27%|██████████▊                             | 54/200 [38:36<1:45:03, 43.17s/it]


Step [3000/3000] Loss: 0.000 Time: 42.6

 28%|███████████                             | 55/200 [39:18<1:43:57, 43.01s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 28%|███████████▏                            | 56/200 [40:01<1:43:06, 42.96s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 28%|███████████▍                            | 57/200 [40:44<1:42:21, 42.94s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 29%|███████████▌                            | 58/200 [41:27<1:41:37, 42.94s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 30%|███████████▊                            | 59/200 [42:10<1:40:49, 42.91s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 30%|████████████                            | 60/200 [42:53<1:40:07, 42.91s/it]


Step [3000/3000] Loss: 0.000 Time: 43.2

 30%|████████████▏                           | 61/200 [43:36<1:39:38, 43.01s/it]


Step [3000/3000] Loss: 0.000 Time: 43.0

 31%|████████████▍                           | 62/200 [44:19<1:38:53, 43.00s/it]


Step [3000/3000] Loss: 0.000 Time: 43.4

 32%|████████████▌                           | 63/200 [45:02<1:38:25, 43.11s/it]


Step [3000/3000] Loss: 0.000 Time: 43.0

 32%|████████████▊                           | 64/200 [45:45<1:37:39, 43.08s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 32%|█████████████                           | 65/200 [46:28<1:36:48, 43.02s/it]


Step [3000/3000] Loss: 0.000 Time: 42.7

 33%|█████████████▏                          | 66/200 [47:11<1:35:53, 42.94s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 34%|█████████████▍                          | 67/200 [47:54<1:35:06, 42.91s/it]


Step [3000/3000] Loss: 0.000 Time: 43.1

 34%|█████████████▌                          | 68/200 [48:37<1:34:31, 42.97s/it]


Step [3000/3000] Loss: 0.000 Time: 42.6

 34%|█████████████▊                          | 69/200 [49:19<1:33:34, 42.86s/it]


Step [3000/3000] Loss: 0.000 Time: 42.4

 35%|██████████████                          | 70/200 [50:02<1:32:33, 42.72s/it]


Step [3000/3000] Loss: 0.000 Time: 42.4

 36%|██████████████▏                         | 71/200 [50:44<1:31:40, 42.64s/it]


Step [3000/3000] Loss: 0.000 Time: 43.5

 36%|██████████████▍                         | 72/200 [51:28<1:31:29, 42.88s/it]


Step [3000/3000] Loss: 0.000 Time: 43.0

 36%|██████████████▌                         | 73/200 [52:11<1:30:49, 42.91s/it]


Step [3000/3000] Loss: 0.000 Time: 42.5

 37%|██████████████▊                         | 74/200 [52:53<1:29:51, 42.79s/it]


Step [3000/3000] Loss: 0.000 Time: 43.4

 38%|███████████████                         | 75/200 [53:37<1:29:29, 42.96s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 38%|███████████████▏                        | 76/200 [54:19<1:28:40, 42.91s/it]


Step [3000/3000] Loss: 0.000 Time: 43.0

 38%|███████████████▍                        | 77/200 [55:02<1:28:01, 42.94s/it]


Step [3000/3000] Loss: 0.000 Time: 43.5

 39%|███████████████▌                        | 78/200 [55:46<1:27:40, 43.12s/it]


Step [3000/3000] Loss: 0.000 Time: 43.1

 40%|███████████████▊                        | 79/200 [56:29<1:26:55, 43.10s/it]


Step [3000/3000] Loss: 0.000 Time: 42.6

 40%|████████████████                        | 80/200 [57:12<1:25:54, 42.95s/it]


Step [3000/3000] Loss: 0.000 Time: 43.0

 40%|████████████████▏                       | 81/200 [57:55<1:25:11, 42.95s/it]


Step [3000/3000] Loss: 0.000 Time: 43.1

 41%|████████████████▍                       | 82/200 [58:38<1:24:32, 42.99s/it]


Step [3000/3000] Loss: 0.000 Time: 42.6

 42%|████████████████▌                       | 83/200 [59:20<1:23:35, 42.87s/it]


Step [3000/3000] Loss: 0.000 Time: 42.5

 42%|███████████████▉                      | 84/200 [1:00:03<1:22:39, 42.75s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 42%|████████████████▏                     | 85/200 [1:00:45<1:21:58, 42.77s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 43%|████████████████▎                     | 86/200 [1:01:28<1:21:19, 42.80s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 44%|████████████████▌                     | 87/200 [1:02:11<1:20:40, 42.84s/it]


Step [3000/3000] Loss: 0.000 Time: 43.2

 44%|████████████████▋                     | 88/200 [1:02:54<1:20:10, 42.95s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 44%|████████████████▉                     | 89/200 [1:03:37<1:19:25, 42.93s/it]


Step [3000/3000] Loss: 0.000 Time: 41.9

 45%|█████████████████                     | 90/200 [1:04:19<1:18:07, 42.62s/it]


Step [3000/3000] Loss: 0.000 Time: 42.7

 46%|█████████████████▎                    | 91/200 [1:05:02<1:17:29, 42.66s/it]


Step [3000/3000] Loss: 0.000 Time: 42.4

 46%|█████████████████▍                    | 92/200 [1:05:44<1:16:39, 42.59s/it]


Step [3000/3000] Loss: 0.000 Time: 43.0

 46%|█████████████████▋                    | 93/200 [1:06:27<1:16:10, 42.71s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 47%|█████████████████▊                    | 94/200 [1:07:10<1:15:31, 42.75s/it]


Step [3000/3000] Loss: 0.000 Time: 43.2

 48%|██████████████████                    | 95/200 [1:07:53<1:15:01, 42.88s/it]


Step [3000/3000] Loss: 0.000 Time: 42.6

 48%|██████████████████▏                   | 96/200 [1:08:36<1:14:11, 42.81s/it]


Step [3000/3000] Loss: 0.000 Time: 43.3

 48%|██████████████████▍                   | 97/200 [1:09:19<1:13:45, 42.96s/it]


Step [3000/3000] Loss: 0.000 Time: 43.2

 49%|██████████████████▌                   | 98/200 [1:10:03<1:13:09, 43.04s/it]


Step [3000/3000] Loss: 0.000 Time: 43.1

 50%|██████████████████▊                   | 99/200 [1:10:46<1:12:30, 43.07s/it]


Step [3000/3000] Loss: 0.000 Time: 43.2

 50%|██████████████████▌                  | 100/200 [1:11:29<1:11:51, 43.11s/it]


Step [3000/3000] Loss: 0.000 Time: 43.2

 50%|██████████████████▋                  | 101/200 [1:12:12<1:11:11, 43.14s/it]


Step [3000/3000] Loss: 0.000 Time: 43.4

 51%|██████████████████▊                  | 102/200 [1:12:56<1:10:35, 43.22s/it]


Step [3000/3000] Loss: 0.000 Time: 43.4

 52%|███████████████████                  | 103/200 [1:13:39<1:09:57, 43.27s/it]


Step [3000/3000] Loss: 0.000 Time: 42.6

 52%|███████████████████▏                 | 104/200 [1:14:22<1:08:53, 43.06s/it]


Step [3000/3000] Loss: 0.000 Time: 43.2

 52%|███████████████████▍                 | 105/200 [1:15:05<1:08:15, 43.11s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 53%|███████████████████▌                 | 106/200 [1:15:48<1:07:22, 43.00s/it]


Step [3000/3000] Loss: 0.000 Time: 42.7

 54%|███████████████████▊                 | 107/200 [1:16:30<1:06:29, 42.90s/it]


Step [3000/3000] Loss: 0.000 Time: 43.4

 54%|███████████████████▉                 | 108/200 [1:17:14<1:05:59, 43.04s/it]


Step [3000/3000] Loss: 0.000 Time: 42.5

 55%|████████████████████▏                | 109/200 [1:17:56<1:05:01, 42.88s/it]


Step [3000/3000] Loss: 0.000 Time: 43.0

 55%|████████████████████▎                | 110/200 [1:18:39<1:04:22, 42.92s/it]


Step [3000/3000] Loss: 0.000 Time: 43.0

 56%|████████████████████▌                | 111/200 [1:19:22<1:03:41, 42.94s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 56%|████████████████████▋                | 112/200 [1:20:05<1:02:57, 42.92s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 56%|████████████████████▉                | 113/200 [1:20:48<1:02:10, 42.88s/it]


Step [3000/3000] Loss: 0.000 Time: 42.2

 57%|█████████████████████                | 114/200 [1:21:30<1:01:09, 42.66s/it]


Step [3000/3000] Loss: 0.000 Time: 43.3

 57%|█████████████████████▎               | 115/200 [1:22:13<1:00:42, 42.85s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 58%|█████████████████████▍               | 116/200 [1:22:56<1:00:01, 42.87s/it]


Step [3000/3000] Loss: 0.000 Time: 42.7

 58%|██████████████████████▊                | 117/200 [1:23:39<59:14, 42.82s/it]


Step [3000/3000] Loss: 0.000 Time: 43.1

 59%|███████████████████████                | 118/200 [1:24:22<58:37, 42.90s/it]


Step [3000/3000] Loss: 0.000 Time: 43.0

 60%|███████████████████████▏               | 119/200 [1:25:05<57:58, 42.94s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 60%|███████████████████████▍               | 120/200 [1:25:48<57:10, 42.88s/it]


Step [3000/3000] Loss: 0.000 Time: 43.6

 60%|███████████████████████▌               | 121/200 [1:26:31<56:45, 43.10s/it]


Step [3000/3000] Loss: 0.000 Time: 43.1

 61%|███████████████████████▊               | 122/200 [1:27:14<56:01, 43.09s/it]


Step [3000/3000] Loss: 0.000 Time: 42.6

 62%|███████████████████████▉               | 123/200 [1:27:57<55:06, 42.94s/it]


Step [3000/3000] Loss: 0.000 Time: 43.3

 62%|████████████████████████▏              | 124/200 [1:28:40<54:30, 43.04s/it]


Step [3000/3000] Loss: 0.000 Time: 43.4

 62%|████████████████████████▍              | 125/200 [1:29:24<53:55, 43.14s/it]


Step [3000/3000] Loss: 0.000 Time: 43.0

 63%|████████████████████████▌              | 126/200 [1:30:07<53:09, 43.10s/it]


Step [3000/3000] Loss: 0.000 Time: 42.4

 64%|████████████████████████▊              | 127/200 [1:30:49<52:12, 42.91s/it]


Step [3000/3000] Loss: 0.000 Time: 43.4

 64%|████████████████████████▉              | 128/200 [1:31:32<51:39, 43.05s/it]


Step [3000/3000] Loss: 0.000 Time: 43.1

 64%|█████████████████████████▏             | 129/200 [1:32:16<50:58, 43.07s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 65%|█████████████████████████▎             | 130/200 [1:32:58<50:10, 43.00s/it]


Step [3000/3000] Loss: 0.000 Time: 42.6

 66%|█████████████████████████▌             | 131/200 [1:33:41<49:18, 42.88s/it]


Step [3000/3000] Loss: 0.000 Time: 43.5

 66%|█████████████████████████▋             | 132/200 [1:34:24<48:48, 43.07s/it]


Step [3000/3000] Loss: 0.000 Time: 42.4

 66%|█████████████████████████▉             | 133/200 [1:35:07<47:52, 42.88s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 67%|██████████████████████████▏            | 134/200 [1:35:50<47:07, 42.84s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 68%|██████████████████████████▎            | 135/200 [1:36:33<46:26, 42.87s/it]


Step [3000/3000] Loss: 0.000 Time: 43.2

 68%|██████████████████████████▌            | 136/200 [1:37:16<45:50, 42.98s/it]


Step [3000/3000] Loss: 0.000 Time: 43.0

 68%|██████████████████████████▋            | 137/200 [1:37:59<45:07, 42.98s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 69%|██████████████████████████▉            | 138/200 [1:38:42<44:20, 42.91s/it]


Step [3000/3000] Loss: 0.000 Time: 42.5

 70%|███████████████████████████            | 139/200 [1:39:24<43:30, 42.79s/it]


Step [3000/3000] Loss: 0.000 Time: 43.0

 70%|███████████████████████████▎           | 140/200 [1:40:07<42:51, 42.86s/it]


Step [3000/3000] Loss: 0.000 Time: 42.5

 70%|███████████████████████████▍           | 141/200 [1:40:50<42:02, 42.75s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 71%|███████████████████████████▋           | 142/200 [1:41:33<41:22, 42.79s/it]


Step [3000/3000] Loss: 0.000 Time: 43.0

 72%|███████████████████████████▉           | 143/200 [1:42:16<40:42, 42.86s/it]


Step [3000/3000] Loss: 0.000 Time: 43.5

 72%|████████████████████████████           | 144/200 [1:42:59<40:10, 43.05s/it]


Step [3000/3000] Loss: 0.000 Time: 42.0

 72%|████████████████████████████▎          | 145/200 [1:43:41<39:09, 42.72s/it]


Step [3000/3000] Loss: 0.000 Time: 43.3

 73%|████████████████████████████▍          | 146/200 [1:44:24<38:36, 42.91s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 74%|████████████████████████████▋          | 147/200 [1:45:07<37:51, 42.86s/it]


Step [3000/3000] Loss: 0.000 Time: 43.3

 74%|████████████████████████████▊          | 148/200 [1:45:50<37:16, 43.01s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 74%|█████████████████████████████          | 149/200 [1:46:33<36:31, 42.98s/it]


Step [3000/3000] Loss: 0.000 Time: 42.6

 75%|█████████████████████████████▎         | 150/200 [1:47:16<35:42, 42.86s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 76%|█████████████████████████████▍         | 151/200 [1:47:59<35:01, 42.88s/it]


Step [3000/3000] Loss: 0.000 Time: 43.1

 76%|█████████████████████████████▋         | 152/200 [1:48:42<34:21, 42.94s/it]


Step [3000/3000] Loss: 0.000 Time: 42.3

 76%|█████████████████████████████▊         | 153/200 [1:49:24<33:29, 42.76s/it]


Step [3000/3000] Loss: 0.000 Time: 42.7

 77%|██████████████████████████████         | 154/200 [1:50:07<32:45, 42.73s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 78%|██████████████████████████████▏        | 155/200 [1:50:50<32:04, 42.78s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 78%|██████████████████████████████▍        | 156/200 [1:51:33<31:23, 42.80s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 78%|██████████████████████████████▌        | 157/200 [1:52:16<30:42, 42.84s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 79%|██████████████████████████████▊        | 158/200 [1:52:58<29:59, 42.86s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 80%|███████████████████████████████        | 159/200 [1:53:41<29:16, 42.84s/it]


Step [3000/3000] Loss: 0.000 Time: 43.4

 80%|███████████████████████████████▏       | 160/200 [1:54:25<28:40, 43.01s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 80%|███████████████████████████████▍       | 161/200 [1:55:08<27:56, 42.98s/it]


Step [3000/3000] Loss: 0.000 Time: 42.5

 81%|███████████████████████████████▌       | 162/200 [1:55:50<27:07, 42.83s/it]


Step [3000/3000] Loss: 0.000 Time: 42.5

 82%|███████████████████████████████▊       | 163/200 [1:56:33<26:20, 42.73s/it]


Step [3000/3000] Loss: 0.000 Time: 42.7

 82%|███████████████████████████████▉       | 164/200 [1:57:15<25:37, 42.72s/it]


Step [3000/3000] Loss: 0.000 Time: 43.6

 82%|████████████████████████████████▏      | 165/200 [1:57:59<25:04, 42.99s/it]


Step [3000/3000] Loss: 0.000 Time: 43.1

 83%|████████████████████████████████▎      | 166/200 [1:58:42<24:22, 43.02s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 84%|████████████████████████████████▌      | 167/200 [1:59:25<23:38, 42.97s/it]


Step [3000/3000] Loss: 0.000 Time: 43.0

 84%|████████████████████████████████▊      | 168/200 [2:00:08<22:55, 42.99s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 84%|████████████████████████████████▉      | 169/200 [2:00:51<22:12, 42.97s/it]


Step [3000/3000] Loss: 0.000 Time: 43.7

 85%|█████████████████████████████████▏     | 170/200 [2:01:35<21:35, 43.20s/it]


Step [3000/3000] Loss: 0.000 Time: 43.0

 86%|█████████████████████████████████▎     | 171/200 [2:02:17<20:50, 43.13s/it]


Step [3000/3000] Loss: 0.000 Time: 42.5

 86%|█████████████████████████████████▌     | 172/200 [2:03:00<20:02, 42.95s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 86%|█████████████████████████████████▋     | 173/200 [2:03:43<19:18, 42.91s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 87%|█████████████████████████████████▉     | 174/200 [2:04:26<18:35, 42.92s/it]


Step [3000/3000] Loss: 0.000 Time: 43.3

 88%|██████████████████████████████████▏    | 175/200 [2:05:09<17:55, 43.02s/it]


Step [3000/3000] Loss: 0.000 Time: 43.1

 88%|██████████████████████████████████▎    | 176/200 [2:05:52<17:13, 43.06s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 88%|██████████████████████████████████▌    | 177/200 [2:06:35<16:28, 42.99s/it]


Step [3000/3000] Loss: 0.000 Time: 42.6

 89%|██████████████████████████████████▋    | 178/200 [2:07:18<15:43, 42.87s/it]


Step [3000/3000] Loss: 0.000 Time: 42.1

 90%|██████████████████████████████████▉    | 179/200 [2:08:00<14:55, 42.65s/it]


Step [3000/3000] Loss: 0.000 Time: 43.1

 90%|███████████████████████████████████    | 180/200 [2:08:43<14:15, 42.77s/it]


Step [3000/3000] Loss: 0.000 Time: 42.6

 90%|███████████████████████████████████▎   | 181/200 [2:09:25<13:31, 42.71s/it]


Step [3000/3000] Loss: 0.000 Time: 43.1

 91%|███████████████████████████████████▍   | 182/200 [2:10:08<12:51, 42.84s/it]


Step [3000/3000] Loss: 0.000 Time: 43.0

 92%|███████████████████████████████████▋   | 183/200 [2:10:51<12:08, 42.88s/it]


Step [3000/3000] Loss: 0.000 Time: 43.1

 92%|███████████████████████████████████▉   | 184/200 [2:11:35<11:27, 42.94s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 92%|████████████████████████████████████   | 185/200 [2:12:17<10:43, 42.91s/it]


Step [3000/3000] Loss: 0.000 Time: 43.3

 93%|████████████████████████████████████▎  | 186/200 [2:13:01<10:02, 43.02s/it]


Step [3000/3000] Loss: 0.000 Time: 42.6

 94%|████████████████████████████████████▍  | 187/200 [2:13:43<09:17, 42.91s/it]


Step [3000/3000] Loss: 0.000 Time: 42.5

 94%|████████████████████████████████████▋  | 188/200 [2:14:26<08:33, 42.79s/it]


Step [3000/3000] Loss: 0.000 Time: 43.1

 94%|████████████████████████████████████▊  | 189/200 [2:15:09<07:51, 42.88s/it]


Step [3000/3000] Loss: 0.000 Time: 43.3

 95%|█████████████████████████████████████  | 190/200 [2:15:52<07:10, 43.02s/it]


Step [3000/3000] Loss: 0.000 Time: 43.1

 96%|█████████████████████████████████████▏ | 191/200 [2:16:35<06:27, 43.03s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 96%|█████████████████████████████████████▍ | 192/200 [2:17:18<05:43, 42.97s/it]


Step [3000/3000] Loss: 0.000 Time: 42.7

 96%|█████████████████████████████████████▋ | 193/200 [2:18:01<05:00, 42.88s/it]


Step [3000/3000] Loss: 0.000 Time: 42.4

 97%|█████████████████████████████████████▊ | 194/200 [2:18:43<04:16, 42.73s/it]


Step [3000/3000] Loss: 0.000 Time: 43.5

 98%|██████████████████████████████████████ | 195/200 [2:19:27<03:34, 42.95s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

 98%|██████████████████████████████████████▏| 196/200 [2:20:10<02:51, 42.94s/it]


Step [3000/3000] Loss: 0.000 Time: 42.0

 98%|██████████████████████████████████████▍| 197/200 [2:20:52<02:07, 42.65s/it]


Step [3000/3000] Loss: 0.000 Time: 42.8

 99%|██████████████████████████████████████▌| 198/200 [2:21:34<01:25, 42.70s/it]


Step [3000/3000] Loss: 0.000 Time: 43.3

100%|██████████████████████████████████████▊| 199/200 [2:22:18<00:42, 42.89s/it]


Step [3000/3000] Loss: 0.000 Time: 42.9

100%|███████████████████████████████████████| 200/200 [2:23:01<00:00, 42.91s/it]


CPU times: user 1d 2h 13min 5s, sys: 1min 49s, total: 1d 2h 14min 54s
Wall time: 2h 23min 1s





In [12]:
torch.save(model.state_dict(), f"/home/sju/HyoJun/Creative_semester_system/Model/text2sign1.pth")

In [13]:
words

[['고민'],
 ['뻔뻔'],
 ['수어'],
 ['남아'],
 ['눈'],
 ['독신'],
 ['음료수'],
 ['발가락'],
 ['슬프다'],
 ['자극'],
 ['안타깝다'],
 ['어색하다'],
 ['여아'],
 ['외국인'],
 ['영아'],
 ['신사'],
 ['뉴질랜드'],
 ['나사렛대학교'],
 ['알아서'],
 ['장애인'],
 ['열아홉번째'],
 ['침착'],
 ['성실'],
 ['학교연혁'],
 ['싫어하다'],
 ['급하다'],
 ['필기시험'],
 ['병문안'],
 ['검사'],
 ['결승전'],
 ['낚시터'],
 ['낚시대'],
 ['당뇨병'],
 ['독서'],
 ['매표소'],
 ['면역'],
 ['감기'],
 ['배드민턴'],
 ['변비'],
 ['병명'],
 ['보건소'],
 ['불면증'],
 ['불행'],
 ['붕대'],
 ['사위'],
 ['설사'],
 ['성병'],
 ['방충'],
 ['소화제'],
 ['손녀'],
 ['손자'],
 ['수면제'],
 ['수집가'],
 ['여행지'],
 ['예식장'],
 ['올림픽경기'],
 ['회복'],
 ['첫번째'],
 ['운동경기'],
 ['입원'],
 ['재혼'],
 ['진단서'],
 ['축구장'],
 ['치료'],
 ['치료법'],
 ['친아들'],
 ['퇴원'],
 ['한약'],
 ['한약방'],
 ['빈혈'],
 ['화상'],
 ['가래떡'],
 ['고깃국'],
 ['고추'],
 ['고추가루'],
 ['사골'],
 ['배추국'],
 ['꽈베기'],
 ['벌꿀'],
 ['꿀물'],
 ['냄비'],
 ['찬물'],
 ['다과'],
 ['지방경찰청장'],
 ['된장찌게'],
 ['돼지고기'],
 ['두부'],
 ['딸기'],
 ['떡국'],
 ['라면'],
 ['막걸리'],
 ['무'],
 ['밥그릇'],
 ['밥솥'],
 ['보신탕'],
 ['부엌'],
 ['소불고기'],
 ['비빔밥'],
 ['사과'],
 ['사이다'],
 ['달다'],
 ['소주'],
 ['술잔'],
 [

In [None]:
n = 0

word = [words[n]]
landmark = y_data1[n, ...]

: 

: 

In [None]:
print(word)

: 

: 

In [None]:
landmark.shape

: 

: 

In [24]:
%%time

test_dataset = TestDataset(word, landmark[None, ...])
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)

model.eval()

preds_list = []
for batch in test_dataloader:
    word, first_frame = batch
    word = word
    first_frame = first_frame.cuda()
    
    current_dec_input = first_frame.unsqueeze(1)   #.float()
    
    for step in range(NUM_FRAMES):  

        with torch.no_grad():
            output = model(word, current_dec_input, 'cuda')
            last_frame = output[:, -1, :, :]  # (B, L, D)
            last_frame_unsq = last_frame.unsqueeze(1)  # (B, 1, L, D)
            current_dec_input = torch.cat([current_dec_input, last_frame_unsq], dim=1)

    final_pred = current_dec_input[:, 1:, :, :]  # (batch, NUM_FRAMES, L, D)

    preds_list.append(final_pred.cpu())

preds = torch.cat(preds_list, dim=0).numpy()
print(preds.shape)

(1, 204, 137, 2)
CPU times: user 22 s, sys: 5.29 ms, total: 22 s
Wall time: 973 ms


In [25]:
with open('/home/sju/HyoJun/Creative_semester_system/scalers.pkl', 'rb') as f:
    scalers = pickle.load(f)

In [26]:
y_data_restored = []

for arr_normalized, scaler in zip(preds, scalers):
    arr_restored = scaler.inverse_transform(arr_normalized.reshape(-1, arr_normalized.shape[-1])).reshape(arr_normalized.shape)
    y_data_restored.append(arr_restored)

y_data_restored = np.array(y_data_restored)
print("Restored y_data shape:", y_data_restored.shape)

Restored y_data shape: (1, 204, 137, 2)


In [27]:
preds_inv = y_data_restored
print(preds_inv.shape)
np.save(f"/home/sju/HyoJun/Creative_semester_system/preds/sign_preds{n+1}.npy", preds_inv)

(1, 204, 137, 2)
