In [73]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import json
from tqdm import tqdm

In [74]:
batch_size = 3

In [75]:
def load_data(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)

    frames = sorted(data.keys(), key=int)
    keypoints = []

    for frame in frames:
        joints = []
        for joint in range(133):
            x = data[frame]['keypoints_3d'][str(joint)]['x']
            y = data[frame]['keypoints_3d'][str(joint)]['y']
            z = data[frame]['keypoints_3d'][str(joint)]['z']
            joints.append([x, y, z])
        keypoints.append(joints)

    return np.array(keypoints)

In [76]:
#data_parts = [load_data(f'2Dto3D_train_part{i}.json') for i in range(1, 6)]
data_parts = load_data(f'2Dto3D_train_part1.json')
print(data_parts)

[[[ -272.73312378 -1591.20410156   214.12431335]
  [ -292.15545654 -1631.90344238   225.7348938 ]
  [ -277.43344116 -1605.56066895   158.23718262]
  ...
  [ -216.53497314 -1182.04492188   321.25167847]
  [ -228.36376953 -1181.07006836   329.82846069]
  [ -236.87924194 -1181.34899902   340.75708008]]

 [[  170.22555542 -1572.58642578   387.50262451]
  [  170.06193542 -1613.06005859   410.53143311]
  [  129.96195984 -1593.53295898   351.56286621]
  ...
  [  229.43826294 -1150.8001709    412.71957397]
  [  226.92501831 -1149.75952148   427.10821533]
  [  228.55450439 -1149.44030762   440.86569214]]

 [[  458.55859375 -1305.22167969  -859.23413086]
  [  482.79571533 -1334.77392578  -885.83679199]
  [  463.52896118 -1342.02697754  -814.82409668]
  ...
  [  358.38394165  -897.51580811  -783.16461182]
  [  370.22567749  -891.80187988  -789.61108398]
  [  378.97741699  -886.4911499   -798.95062256]]

 ...

 [[ -241.22601318 -1249.51818848    44.94993973]
  [ -235.17689514 -1287.59643555     4.

In [77]:

#train_data = np.concatenate(data_parts[:-1]) 
#test_data = data_parts[-1] 
train_size = int(len(data_parts) * 0.8)  # Use 80% for training
train_data = data_parts[:train_size]
test_data = data_parts[train_size:]
def prepare_data(data, num_intermediate_frames=30):
    x = []
    y = []

    for i in range(len(data) - 1):  
        first_frame = data[i]         # Frame 0
        last_frame = data[i + 1]      # Frame 31

        x.append(np.stack([first_frame, last_frame]))  

        intermediate_frames = np.linspace(first_frame, last_frame, num=num_intermediate_frames + 2)[1:-1]
        y.append(intermediate_frames)  

    return np.array(x), np.array(y)

In [78]:
x_train, y_train = prepare_data(train_data)
x_test, y_test = prepare_data(test_data)

x_train_tensor = torch.tensor(x_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
x_test_tensor = torch.tensor(x_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)



In [79]:
X, y = next(iter(train_loader))
pred = model(X)

In [80]:
pred[0, 0]

tensor([[   19.7540, -1395.9208,  -316.3627],
        [   20.1959, -1422.4930,  -321.9966],
        [   20.3829, -1421.3575,  -321.8153],
        [   20.8661, -1412.7003,  -320.1758],
        [   20.6675, -1413.3226,  -319.6774],
        [   19.4792, -1253.5916,  -285.7900],
        [   18.4536, -1253.3610,  -282.9143],
        [   17.6798, -1073.6816,  -245.6722],
        [   16.3959, -1081.3912,  -242.6867],
        [   16.9949, -1030.9094,  -236.1678],
        [   14.8895, -1048.4792,  -235.1904],
        [   13.7397,  -821.7813,  -189.1662],
        [   12.7979,  -823.1894,  -187.5557],
        [    8.9092,  -454.2154,  -108.1515],
        [    8.6559,  -460.6029,  -106.0592],
        [    5.6738,   -91.7944,   -27.0470],
        [    3.9451,   -97.2480,   -25.1494],
        [    4.2853,   -43.5341,   -17.2050],
        [    4.9573,   -41.2432,   -17.0306],
        [    5.8786,   -53.4165,   -19.3799],
        [    1.7256,   -47.4402,   -14.2899],
        [    2.1609,   -47.4650,  

In [81]:
# print("x_train shape:", x_train.shape)  # Should be (num_samples, 2, 133, 3)
# print("y_train shape:", y_train.shape)

    
class ASLFrameGenerator(nn.Module):
    def __init__(self, input_size=133*3, hidden_size=512, output_size=133*3, num_layers=2, num_frames=30):
        super(ASLFrameGenerator, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.num_frames = num_frames
    
    def forward(self, x):
        # Shape size of batch, 2, 133, 3
        f_batch_size = x.size(0) 
        x = x.view(f_batch_size, 2, -1)
        
        # print(f'1: {x.shape}')
        lstm_out, _ = self.lstm(x)
        # print(f'2: {lstm_out.shape}')
        outputs = []
        last_output = lstm_out[:, -1, :]  # Take the hidden state from the last time step
        for _ in range(30):
            last_output = last_output.squeeze(1)  # Reshape to (batch_size, 1, hidden_size)
            out = self.fc(last_output)  # Shape: (batch_size, 1, 133*3)
            out = out.unsqueeze(1)
            # print(f'3: {out.shape}')
            outputs.append(out)
            
            last_output, _ = self.lstm(out)  # Feed output back to LSTM for next frame
        
        # Concatenate all outputs to form the final output shape (batch_size, 30, 133*3)
        outputs = torch.cat(outputs, dim=1)
        
        # Reshape output to (batch_size, 30, 133, 3)
        outputs = outputs.view(f_batch_size, 30, 133, 3)
        # print(f'4: {outputs.shape}')
        return outputs

        

In [82]:
model = ASLFrameGenerator()
criterion = nn.MSELoss()
opt = optim.Adam(model.parameters(), lr=0.001)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(10):
    model.train()
    loss = 0.0
    for inputs, targets in tqdm(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        opt.zero_grad()
        outputs = model(inputs)
        cur_loss = criterion(outputs, targets)
        cur_loss.backward()
        opt.step()
        loss += cur_loss.item()
        if (i + 1) % 10 == 0: 
            print(f"Epoch [{epoch + 1}/{10}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}")

    print(f"Epoch [{epoch + 1}/{10}], Avg Loss: {loss / len(train_loader)}")

  3%|▎         | 129/4267 [01:44<56:06,  1.23it/s] 


KeyboardInterrupt: 

In [57]:
def test_model(model, test_loader, criterion):
    model.eval()
    running_loss = 0.0
        
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            running_loss += loss.item()

    average_loss = running_loss / len(test_loader)
    print(f'Test Loss: {average_loss:.4f}')


In [58]:
test_loss = test_model(model, test_loader, criterion)

1: torch.Size([3, 2, 399])
2: torch.Size([3, 2, 512])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399])
4: torch.Size([3, 30, 133, 3])
1: torch.Size([3, 2, 399])
2: torch.Size([3, 2, 512])
3: torch.Size([3, 1, 399])
3: torch.Size([3, 1, 399

KeyboardInterrupt: 