In [2]:
from dragon import dragonV
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import csv
from dragon import dragonV

# define network

In [3]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTM, self).__init__()
        
        self.hidden_size = hidden_size
        
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        _, (hidden, _) = self.lstm(x)
        out = self.fc(hidden[-1])
        out = self.sigmoid(out)
        return out

# fetch data

In [11]:
root_path = 'C:/Users/admin/Desktop/Philadelphia/inputs/'
npz_file_name = '1_B.npz'
video_file_name = 'openpose_render_video.mp4'

In [5]:
data = np.load(root_path + npz_file_name, encoding='latin1', allow_pickle=True)

In [6]:
# read npz
keypoints = data['keypoints']
keypoints = np.array(keypoints, dtype=np.float32)

In [12]:
total_video_frame_len = dragonV.get_total_frame(root_path + video_file_name)

In [14]:
print(f'keypoints_len : {len(keypoints)}')
print(f'video_total_frame_len : {total_video_frame_len}')

keypoints_len : 316
video_total_frame_len : 316


In [16]:
'''
데이터 특성 파악
'''
print(len(keypoints))
print(len(keypoints[0]))
print(keypoints[0])

316
25
[[  0.       0.    ]
 [812.943  171.217 ]
 [924.63   180.099 ]
 [933.513  353.696 ]
 [930.51   480.158 ]
 [715.726  153.646 ]
 [680.387  315.424 ]
 [692.06   441.948 ]
 [798.135  483.197 ]
 [859.91   489.105 ]
 [862.86   712.81  ]
 [851.082  910.06  ]
 [739.244  480.214 ]
 [742.288  712.78  ]
 [765.624  903.996 ]
 [  0.       0.    ]
 [  0.       0.    ]
 [895.244   62.2961]
 [789.377   56.4336]
 [754.016  889.394 ]
 [742.23   901.096 ]
 [765.841  924.655 ]
 [883.582  901.144 ]
 [886.457  907.047 ]
 [836.437  930.696 ]]


# joint_pose_2d_data 형태 변환

In [24]:
keypoints_list = keypoints.tolist()

total_frame_data_list = []
for each_frame in keypoints_list:
    tmp = []
    for joint_idx in range(0, len(each_frame)):
        x = each_frame[joint_idx][0]
        y = each_frame[joint_idx][1]

        tmp.append(x)
        tmp.append(y)
    
    total_frame_data_list.append(tmp)

In [27]:
len(total_frame_data_list)

316

# data 전처리과정 : total_frame_data_list

In [28]:
#using openpose joint idx
seleted_openpose_joint_idx_list = [8, 9, 10, 11, 12, 13, 14, 19, 20, 21, 22, 23, 24]

In [30]:
#normalize
norm_total_frame_data_list = dragonV.nomalize_data(total_frame_data_list)

In [31]:
#extract only lower joint pos(13) from all joint pos(25)
norm_total_frame_lower_joint_list = dragonV.get_selected_joint_pos_frame_list(norm_total_frame_data_list, seleted_openpose_joint_idx_list)

# norm_lower_joint_list -> torch : convert dtype

In [35]:
'''
check data shape
'''
norm_data_len = len(norm_total_frame_lower_joint_list)
print(f'norm_dat_len : {norm_data_len}')

norm_dat_len : 316


In [36]:
np.array(norm_total_frame_lower_joint_list[0])

array([6.91364812e-02, 9.82784883e-01, 9.31170169e-02, 9.83661168e-01,
       2.36435170e-02, 1.00000000e+00, 6.92694090e-02, 1.00000000e+00,
       7.81096969e-02, 9.82798604e-01, 2.75956925e-02, 1.00000000e+00,
       1.48978422e-02, 9.99938293e-01, 0.00000000e+00, 9.74902543e-01,
       0.00000000e+00, 9.93781254e-01, 5.44943929e-04, 9.99862932e-01,
       3.21124959e-02, 9.99922067e-01, 6.14713637e-02, 1.00000000e+00,
       2.28733863e-02, 9.99995927e-01])

In [37]:
trimmed_data = []
for idx in range(0, norm_data_len - 8):
    trimmed_data.append(norm_total_frame_lower_joint_list[idx:idx+9])

In [42]:
print(f'samples_num : {len(trimmed_data)}')
print(f'timesteps : {len(trimmed_data[0])}')
print(f'input_size : {len(trimmed_data[0][0])}')

samples_num : 308
timesteps : 9
input_size : 26


In [41]:
np.array(trimmed_data[0])

array([[6.91364812e-02, 9.82784883e-01, 9.31170169e-02, 9.83661168e-01,
        2.36435170e-02, 1.00000000e+00, 6.92694090e-02, 1.00000000e+00,
        7.81096969e-02, 9.82798604e-01, 2.75956925e-02, 1.00000000e+00,
        1.48978422e-02, 9.99938293e-01, 0.00000000e+00, 9.74902543e-01,
        0.00000000e+00, 9.93781254e-01, 5.44943929e-04, 9.99862932e-01,
        3.21124959e-02, 9.99922067e-01, 6.14713637e-02, 1.00000000e+00,
        2.28733863e-02, 9.99995927e-01],
       [5.20340526e-02, 9.82664078e-01, 9.27148075e-02, 9.83505103e-01,
        2.33795294e-02, 9.99818318e-01, 6.92143949e-02, 9.99964417e-01,
        7.78182478e-02, 9.82645856e-01, 2.75442567e-02, 9.91309539e-01,
        1.48708373e-02, 9.99878919e-01, 1.44383791e-02, 9.81052810e-01,
        0.00000000e+00, 9.93749364e-01, 4.04417049e-04, 9.99807580e-01,
        3.17731532e-02, 9.99922067e-01, 6.12247075e-02, 9.99974148e-01,
        2.28502757e-02, 9.99989878e-01],
       [5.18756868e-02, 9.82756087e-01, 9.26040193e-02

In [43]:
input_data_np = np.array(trimmed_data)
input_data_tensor = torch.tensor(input_data_np, dtype=torch.float32)

In [44]:
#data validation check
has_nan = torch.isnan(input_data_tensor).any().item()
print("NaN 값이 있는지 여부:", has_nan)

NaN 값이 있는지 여부: False


# model load

In [46]:
input_size = 26
output_size = 4
hidden_size = 128

model_weights_path = 'C:/Users/admin/Desktop/Philadelphia/models/1000_epochs.pth'

model = LSTM(input_size, hidden_size, output_size)
model.load_state_dict(torch.load(model_weights_path))

<All keys matched successfully>

In [47]:
model.eval()

LSTM(
  (lstm): LSTM(26, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=4, bias=True)
  (sigmoid): Sigmoid()
)

In [48]:
outputs = model(input_data_tensor)

In [53]:
print(outputs[0].tolist())

[0.9787519574165344, 0.150516539812088, 1.992872707134996e-11, 3.8055914046708494e-06]


# model outputs 해석 코드

In [80]:
threshold = 0.5

In [81]:
predicted = (outputs > threshold).int()

In [118]:
padding_np = np.array([0, 0, 0, 0])

In [113]:
predicted_np = np.array(predicted)

In [124]:
# 새로운 배열 생성
padded_predicted_np = np.vstack([padding_np] * 4 + [predicted_np])

In [117]:
predicted_np

array([[1, 0, 0, 0],
       [1, 0, 0, 0],
       [1, 0, 0, 0],
       ...,
       [0, 1, 1, 1],
       [0, 1, 1, 1],
       [0, 1, 1, 1]])

In [114]:
padded_predicted_np = np.append(padding_np, predicted_np)

In [126]:
len(padded_predicted_np)

312

In [127]:
padded_predicted_list = padded_predicted_np.tolist()

In [128]:
padded_predicted_list

[[0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [1, 0, 0, 0],
 [1, 0, 0, 0],
 [1, 0, 0, 0],
 [1, 0, 0, 0],
 [0, 0, 0, 0],
 [1, 1, 0, 0],
 [0, 1, 0, 0],
 [0, 1, 0, 0],
 [1, 1, 0, 0],
 [1, 1, 0, 0],
 [1, 1, 0, 0],
 [1, 1, 0, 0],
 [1, 1, 0, 0],
 [0, 1, 0, 0],
 [0, 1, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 1],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 1],
 [0, 0, 0, 1],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 1, 1],
 [0, 0, 1, 1],
 [0, 0, 1, 1],
 [0, 0, 1, 1],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [1, 1, 0, 0],
 [0, 1, 0, 0],
 [1, 1, 0, 0],
 [1, 1, 0, 0],
 [0, 1, 0, 0],
 [0, 1, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 1,