In [44]:
import os
import torch
import numpy as np
import pandas as pd

from transformers import BertTokenizer, TFBertModel
from transformers import BertTokenizer, BertModel

In [28]:
label = pd.read_csv("C:/Users/NEULET/Desktop/tharm_ai/sample_data/sample_label.csv")
label['한국어'] = label['한국어'].astype(str)

In [57]:
tokenizer = BertTokenizer.from_pretrained('monologg/kobert')
model = TFBertModel.from_pretrained('monologg/kobert')

def get_embedding(texts):
    # 입력 텍스트를 토큰화하여 Tensor로 변환
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors='tf')
    
    # 텐서를 직접 전달
    outputs = model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
    
    # [CLS] 토큰의 임베딩 벡터 반환
    return outputs.last_hidden_state[:, 0, :]  # [CLS] 토큰의 임베딩 벡터 반환

embeddings = get_embedding(label['한국어'].tolist())

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'KoBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.
All PyTorch model weights were used when initializing TFBertModel.

All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [45]:
POSEDATAPATH = '../local'

pose_datapack = []

for file in os.listdir(POSEDATAPATH):   # Pose Keypoints - "pose" 컬럼 추가
    r_path = os.path.join(POSEDATAPATH, file)
    data = np.load(r_path)
    pose_datapack.append(data)

In [46]:
label["pose"] = pose_datapack

In [47]:
max_len = 0 # keypoints 최대 길이 변수

for idx, data in label.iterrows():
    keypoints_len = data['pose'].shape[0]  # example shape = (141, 33, 3)
    if keypoints_len >= max_len:
        max_len = keypoints_len

print(f"Max Length :", max_len)

padded_poses = []

for idx, data in label.iterrows():  # keypoints padding
    pose = data['pose']
    keypoints_len = pose.shape[0]
    
    pad_width = max_len - keypoints_len  # 필요한 padding값 계산
    
    if pad_width > 0:
        padding = ((0, pad_width), (0, 0), (0, 0))  
        padded_pose = np.pad(pose, padding, mode='constant', constant_values=0) # zero padding
    else:
        padded_pose = pose
    
    padded_poses.append(padded_pose)
pose_array = np.array(padded_poses)
print(f"pose_array Shape : {pose_array.shape}")

Max Length : 210
pose_array Shape : (2000, 210, 33, 3)


In [89]:
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Reshape

embedding_dim = 768  # KoBERT의 임베딩 차원
keypoint_shape = (210, 33, 3)

model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(embedding_dim,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(np.prod(keypoint_shape)),  # 전체 크기
    tf.keras.layers.Reshape(keypoint_shape)  # 출력 형상 변경
])

# 모델 컴파일
model.compile(optimizer='adam', loss='mean_squared_error')

# 모델 훈련
model.fit(embeddings, pose_array, epochs=100, batch_size=32)

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.1408
Epoch 2/100
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0470
Epoch 3/100
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0462
Epoch 4/100
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0462
Epoch 5/100
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0457
Epoch 6/100
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0454
Epoch 7/100
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0442
Epoch 8/100
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0451
Epoch 9/100
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0445
Epoch 10/100
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0461
Epoch 11/10

<keras.src.callbacks.history.History at 0x2b6b8afba90>

In [79]:
new_embeddings = np.expand_dims(embeddings[0], axis=0)

In [88]:
predictions = model.predict(new_embeddings)
predicted_keypoints = predictions.reshape(-1, 210, 33, 3)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step


In [83]:
predicted_keypoints.shape

(1, 210, 33, 3)

In [85]:
np.save("predicted_keypoints.npy", predicted_keypoints)

In [87]:
from scipy.spatial.distance import euclidean

# npy 파일 로드
predicted_keypoints = np.load('C:/Users/NEULET/Desktop/tharm_ai/test/predicted_keypoints.npy')
data2 = np.load('C:/Users/NEULET/Desktop/tharm_ai/local/KETI_SL_0000000001_pose.npy')

if predicted_keypoints.size != data2.size:
    # 예를 들어, 크기를 맞추기 위해 패딩 추가
    min_size = min(predicted_keypoints.size, data2.size)
    predicted_keypoints = predicted_keypoints.flat[:min_size]
    data2 = data2.flat[:min_size]

# 유클리드 거리 계산
distance = euclidean(predicted_keypoints.flatten(), data2.flatten())
print("Euclidean distance:", distance)

Euclidean distance: 19.279643171595705
