In [1]:
import sys
import os
sys.path.append(os.path.abspath(".."))

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric.nn as gnn
from core.dataset import YogaDataset
from torch.utils.data import DataLoader
from torch_geometric.data import Data
import torch.optim as optim


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# ======================== MÔ HÌNH GCN ========================
import torch_geometric.nn as gnn
   
class YogaGCN(nn.Module):
    def __init__(self, in_channels=3, hidden_dim=128, num_classes=4):
        super(YogaGCN, self).__init__()
        self.conv1 = gnn.GCNConv(in_channels, hidden_dim)
        self.conv2 = gnn.GCNConv(hidden_dim, hidden_dim)
        self.conv3 = gnn.GCNConv(hidden_dim, hidden_dim)
        self.conv4 = gnn.GCNConv(hidden_dim, hidden_dim)  # Thêm một lớp nữa
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index).relu()
        x = self.conv3(x, edge_index).relu()
        x = self.conv4(x, edge_index).relu()
        x = gnn.global_mean_pool(x, batch)
        x = self.fc(x)
        return x


# ======================== EDGE INDEX (Mediapipe) ========================
def get_edge_index():
    """
    Trả về ma trận kề (edge_index) cho 33 keypoints của Mediapipe.
    """
    edges = [
        (0, 1), (1, 2), (2, 3), (3, 7),  # Tay trái
        (0, 4), (4, 5), (5, 6), (6, 8),  # Tay phải
        (9, 10), (11, 12),  # Hông
        (11, 13), (13, 15), (15, 17), (15, 19), (15, 21),  # Chân trái
        (12, 14), (14, 16), (16, 18), (16, 20), (16, 22),  # Chân phải
        (11, 23), (12, 24), (23, 24),  # Kết nối hông
        (23, 25), (25, 27), (27, 29), (29, 31),  # Chân trái
        (24, 26), (26, 28), (28, 30), (30, 32)   # Chân phải
    ]
    edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()  # (2, num_edges)
    return edge_index

# ======================== HÀM TRAINING ========================
def train_gcn(model, train_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    edge_index = get_edge_index().to(device)  # Edge index không thay đổi

    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()

        # Giữ đúng batch size
        batch_size, num_frames, num_keypoints, keypoint_dim = X_batch.shape
        X_batch = X_batch.view(batch_size * num_frames * num_keypoints, keypoint_dim)  # (13200, 3)

        # Tạo batch index đúng (1 batch cho mỗi video)
        batch = torch.arange(batch_size, device=device).repeat_interleave(num_frames * num_keypoints)

        # Đưa vào mô hình GCN
        outputs = model(X_batch, edge_index, batch)  # (batch_size, num_classes)

        # Kiểm tra outputs.shape
#         print(f"DEBUG - outputs shape: {outputs.shape}, y_batch shape: {y_batch.shape}")

        # Tính loss
        loss = criterion(outputs, y_batch.long())  # Đảm bảo y_batch có dtype phù hợp
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == y_batch).sum().item()
        total += y_batch.size(0)

    acc = 100 * correct / total
    return total_loss / len(train_loader), acc

In [4]:
# ======================== TRAINING ========================
# Dataset và DataLoader
json_folder = "data/keypoints/public_data"
dataset = YogaDataset(json_folder, max_frames=100)
batch_size = 4
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Khởi tạo mô hình và optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = YogaGCN(in_channels=3, hidden_dim=128, num_classes=4).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
# Train thử
num_epochs = 250
for epoch in range(num_epochs):
    loss, acc = train_gcn(model, dataloader, optimizer, criterion, device)
    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {loss:.4f} - Accuracy: {acc:.2f}%")

Label map: {'Garland_Pose': 0, 'Happy_Baby_Pose': 1, 'Head_To_Knee_Pose': 2, 'Lunge_Pose': 3}
Epoch 1/250 - Loss: 1.3915 - Accuracy: 24.74%
Epoch 2/250 - Loss: 1.3641 - Accuracy: 32.99%
Epoch 3/250 - Loss: 1.3124 - Accuracy: 36.08%
Epoch 4/250 - Loss: 1.0792 - Accuracy: 58.76%
Epoch 5/250 - Loss: 0.8173 - Accuracy: 60.82%
Epoch 6/250 - Loss: 0.6910 - Accuracy: 68.04%
Epoch 7/250 - Loss: 0.6826 - Accuracy: 70.10%
Epoch 8/250 - Loss: 0.7178 - Accuracy: 72.16%
Epoch 9/250 - Loss: 0.6612 - Accuracy: 68.04%
Epoch 10/250 - Loss: 0.7021 - Accuracy: 70.10%
Epoch 11/250 - Loss: 0.7100 - Accuracy: 69.07%
Epoch 12/250 - Loss: 0.6887 - Accuracy: 70.10%
Epoch 13/250 - Loss: 0.5691 - Accuracy: 81.44%
Epoch 14/250 - Loss: 0.5770 - Accuracy: 81.44%
Epoch 15/250 - Loss: 0.5585 - Accuracy: 80.41%
Epoch 16/250 - Loss: 0.5856 - Accuracy: 76.29%
Epoch 17/250 - Loss: 0.5266 - Accuracy: 80.41%
Epoch 18/250 - Loss: 0.5357 - Accuracy: 79.38%
Epoch 19/250 - Loss: 0.5508 - Accuracy: 78.35%
Epoch 20/250 - Loss: 0

In [5]:
# Lưu trọng số mô hình sau khi train xong
torch.save(model.state_dict(), "yoga_gcn_4layer_128.pt")
print("✅ Trọng số đã được lưu vào 'yoga_gcn_weights.pt'")


✅ Trọng số đã được lưu vào 'yoga_gcn_weights.pt'


In [6]:
# # ======================== MÔ HÌNH GCN ========================
# import torch_geometric.nn as gnn

# class YogaGAT(nn.Module):
#     def __init__(self, in_channels=3, hidden_dim=128, num_classes=4):
#         super(YogaGAT, self).__init__()
#         self.conv1 = gnn.GATConv(in_channels, hidden_dim, heads=4, concat=True)
#         self.conv2 = gnn.GATConv(hidden_dim * 4, hidden_dim, heads=4, concat=True)
#         self.conv3 = gnn.GATConv(hidden_dim * 4, hidden_dim, heads=4, concat=True)
#         self.conv4 = gnn.GATConv(hidden_dim * 4, hidden_dim, heads=4, concat=True)
#         self.fc = nn.Linear(hidden_dim * 4, num_classes)

#     def forward(self, x, edge_index, batch):
#         x = self.conv1(x, edge_index).relu()
#         x = self.conv2(x, edge_index).relu()
#         x = self.conv3(x, edge_index).relu()
#         x = self.conv4(x, edge_index).relu()
#         x = gnn.global_mean_pool(x, batch)
#         x = self.fc(x)
#         return x

# # ======================== EDGE INDEX (Mediapipe) ========================
# def get_edge_index():
#     """
#     Trả về ma trận kề (edge_index) cho 33 keypoints của Mediapipe.
#     """
#     edges = [
#         (0, 1), (1, 2), (2, 3), (3, 7),  # Tay trái
#         (0, 4), (4, 5), (5, 6), (6, 8),  # Tay phải
#         (9, 10), (11, 12),  # Hông
#         (11, 13), (13, 15), (15, 17), (15, 19), (15, 21),  # Chân trái
#         (12, 14), (14, 16), (16, 18), (16, 20), (16, 22),  # Chân phải
#         (11, 23), (12, 24), (23, 24),  # Kết nối hông
#         (23, 25), (25, 27), (27, 29), (29, 31),  # Chân trái
#         (24, 26), (26, 28), (28, 30), (30, 32)   # Chân phải
#     ]
#     edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()  # (2, num_edges)
#     return edge_index

# # ======================== HÀM TRAINING ========================
# def train_gcn(model, train_loader, optimizer, criterion, device):
#     model.train()
#     total_loss = 0
#     correct = 0
#     total = 0

#     edge_index = get_edge_index().to(device)  # Edge index không thay đổi

#     for X_batch, y_batch in train_loader:
#         X_batch, y_batch = X_batch.to(device), y_batch.to(device)

#         optimizer.zero_grad()

#         # Giữ đúng batch size
#         batch_size, num_frames, num_keypoints, keypoint_dim = X_batch.shape
#         X_batch = X_batch.view(batch_size * num_frames * num_keypoints, keypoint_dim)  # (13200, 3)

#         # Tạo batch index đúng (1 batch cho mỗi video)
#         batch = torch.arange(batch_size, device=device).repeat_interleave(num_frames * num_keypoints)

#         # Đưa vào mô hình GCN
#         outputs = model(X_batch, edge_index, batch)  # (batch_size, num_classes)

#         # Kiểm tra outputs.shape
# #         print(f"DEBUG - outputs shape: {outputs.shape}, y_batch shape: {y_batch.shape}")

#         # Tính loss
#         loss = criterion(outputs, y_batch.long())  # Đảm bảo y_batch có dtype phù hợp
#         loss.backward()
#         optimizer.step()

#         total_loss += loss.item()
#         _, predicted = torch.max(outputs, 1)
#         correct += (predicted == y_batch).sum().item()
#         total += y_batch.size(0)

#     acc = 100 * correct / total
#     return total_loss / len(train_loader), acc

# # ======================== TRAINING ========================
# # Dataset và DataLoader
# json_folder = "data/keypoints/public_data"
# dataset = YogaDataset(json_folder, max_frames=100)
# batch_size = 4
# dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# # Khởi tạo mô hình và optimizer
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = YogaGCN(in_channels=3, hidden_dim=128, num_classes=4).to(device)
# optimizer = optim.Adam(model.parameters(), lr=0.001)
# criterion = nn.CrossEntropyLoss()

# # Train thử
# num_epochs = 250
# for epoch in range(num_epochs):
#     loss, acc = train_gcn(model, dataloader, optimizer, criterion, device)
#     print(f"Epoch {epoch+1}/{num_epochs} - Loss: {loss:.4f} - Accuracy: {acc:.2f}%")

In [7]:
# # Lưu trọng số mô hình sau khi train xong
# torch.save(model.state_dict(), "yoga_gat_4layer_128.pt")
# print("✅ Trọng số đã được lưu vào 'yoga_gcn_weights.pt'")

In [8]:
# import cv2
# import torch
# import numpy as np
# import mediapipe as mp
# from torch_geometric.data import Data
# import torch_geometric.nn as gnn

# # ======================== 1️⃣ Load Model & Edge Index ========================
# def load_gcn_model(model_path, num_classes=4):
#     """Load mô hình GCN đã huấn luyện"""
#     model = YogaGCN(in_channels=3, hidden_dim=128, num_classes=num_classes)
#     model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
#     model.eval()
#     return model

# def get_edge_index():
#     edges = [
#         (0, 1), (1, 2), (2, 3), (3, 7),  # Tay trái
#         (0, 4), (4, 5), (5, 6), (6, 8),  # Tay phải
#         (9, 10), (11, 12),  # Hông
#         (11, 13), (13, 15), (15, 17), (15, 19), (15, 21),  # Chân trái
#         (12, 14), (14, 16), (16, 18), (16, 20), (16, 22),  # Chân phải
#         (11, 23), (12, 24), (23, 24),  # Kết nối hông
#         (23, 25), (25, 27), (27, 29), (29, 31),  # Chân trái
#         (24, 26), (26, 28), (28, 30), (30, 32)   # Chân phải
#     ]
#     return torch.tensor(edges, dtype=torch.long).t().contiguous()

# # ======================== Trích xuất Skeleton từ Video ========================
# mp_pose = mp.solutions.pose
# pose = mp_pose.Pose()

# def extract_skeleton_from_video(video_path, num_frames=64):
#     """Trích xuất keypoints từ video"""
#     cap = cv2.VideoCapture(video_path)
#     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
#     indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)

#     skeleton_data = []
#     for idx in indices:
#         cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
#         ret, frame = cap.read()
#         if not ret:
#             continue

#         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#         results = pose.process(frame_rgb)

#         keypoints = []
#         if results.pose_landmarks:
#             for lm in results.pose_landmarks.landmark:
#                 keypoints.append([lm.x, lm.y, lm.z])
#         else:
#             keypoints = [[0, 0, 0]] * 33  # Nếu không nhận diện được, điền 0

#         skeleton_data.append(keypoints)

#     cap.release()
#     skeleton_data = np.array(skeleton_data)  # Shape: (num_frames, 33, 3)
#     return skeleton_data

# # ======================== Hàm Chuẩn Hóa Skeleton ========================
# def normalize_skeleton(skeleton):
#     """Chuẩn hóa skeleton bằng cách dịch tọa độ về trung tâm"""
#     mean_pose = np.mean(skeleton[:, :, :2], axis=(0, 1))  # Trung bình trên trục x, y
#     skeleton[:, :, :2] -= mean_pose  # Dịch về trung tâm
#     return skeleton

# # ======================== Hàm Dự Đoán Động Tác ========================
# def predict_action(video_path, model, classes):
#     """
#     Nhận video đầu vào, trích xuất skeleton, và dự đoán động tác.
#     """
#     # Trích xuất skeleton từ video
#     skeleton = extract_skeleton_from_video(video_path)

#     # Chuẩn hóa tọa độ
#     skeleton = normalize_skeleton(skeleton)

#     # Chuyển đổi dữ liệu thành dạng Graph
#     num_frames, num_keypoints, keypoint_dim = skeleton.shape
#     X_batch = torch.tensor(skeleton, dtype=torch.float).view(num_frames * num_keypoints, keypoint_dim)

#     # Tạo batch index cho video
#     batch = torch.zeros(num_frames * num_keypoints, dtype=torch.long)

#     # Load edge index
#     edge_index = get_edge_index()

#     # Đưa vào model để dự đoán
#     with torch.no_grad():
#         output = model(X_batch, edge_index, batch)
#         predicted_class = torch.argmax(output, dim=1).item()

#     return classes[predicted_class]

# # ======================== Chạy thử với video của người dùng ========================
# if __name__ == "__main__":
#     # Load model
#     model_path = "yoga_gcn_4layer_128.pt"
#     model = load_gcn_model(model_path, num_classes=4)

#     # Danh sách lớp động tác yoga
#     classes = ["Garland_Pose", "Happy_Baby_Pose", "Head_To_Knee_Pose", "Lunge_Pose"]
#     # Nhận video đầu vào từ người dùng
#     video_path = input("Nhập đường dẫn video: ")

#     # Chạy dự đoán
#     predicted_action = predict_action(video_path, model, classes)
#     print(f"Động tác được dự đoán: {predicted_action}")

In [13]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch_geometric.nn as gnn
from torch.utils.data import DataLoader
import json

# ======================== ĐỊNH NGHĨA EDGE INDEX ========================
def get_edge_index():
    edges = [
        (0, 1), (1, 2), (2, 3), (3, 7),  # Tay trái
        (0, 4), (4, 5), (5, 6), (6, 8),  # Tay phải
        (9, 10), (11, 12),  # Hông
        (11, 13), (13, 15), (15, 17), (15, 19), (15, 21),  # Chân trái
        (12, 14), (14, 16), (16, 18), (16, 20), (16, 22),  # Chân phải
        (11, 23), (12, 24), (23, 24),  # Kết nối hông
        (23, 25), (25, 27), (27, 29), (29, 31),  # Chân trái
        (24, 26), (26, 28), (28, 30), (30, 32)   # Chân phải
    ]
    return torch.tensor(edges, dtype=torch.long).t().contiguous()

# ======================== KIỂM TRA & LOAD PRE-TRAINED MODEL ========================
def check_and_load_model(model_path, in_channels, hidden_dim, num_classes):
    if not os.path.exists(model_path):
        print("🚨 Không tìm thấy model, cần train lại.")
        return None  # Model chưa tồn tại, cần train lại

    state_dict = torch.load(model_path, map_location=torch.device("cpu"))
    
    # Kiểm tra kiến trúc của model
    try:
        detected_hidden_dim = state_dict["conv1.lin.weight"].shape[0]
        detected_in_channels = state_dict["conv1.lin.weight"].shape[1]
        detected_num_classes = state_dict["fc.weight"].shape[0]

        print(f"📊 Model Found - in_channels: {detected_in_channels}, hidden_dim: {detected_hidden_dim}, num_classes: {detected_num_classes}")

        if detected_in_channels != in_channels or detected_hidden_dim != hidden_dim or detected_num_classes != num_classes:
            print("⚠️ Hyperparameter không khớp! Cần train lại.")
            return None  # Model có kiến trúc khác, cần train lại
    except KeyError:
        print("🚨 Lỗi khi đọc weight, có thể file .pt bị lỗi. Cần train lại!")
        return None

    # Load mô hình
    model = YogaGCN(in_channels, hidden_dim, num_classes)
    model.load_state_dict(state_dict)
    model.eval()
    print("✅ Model loaded successfully!")
    return model

# ======================== TRAINING FUNCTION ========================
def train_gcn(model, train_loader, optimizer, criterion, device, num_epochs=50):
    model.train()
    edge_index = get_edge_index().to(device)

    for epoch in range(num_epochs):
        total_loss = 0
        correct = 0
        total = 0

        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            optimizer.zero_grad()

            # 🚨 Debug: Kiểm tra nhãn trước khi train
            print(f"🔍 y_batch unique values: {y_batch.unique()}")  
            print(f"🔍 Max label: {y_batch.max().item()}, Expected < {model.fc.out_features}")

            if y_batch.max().item() >= model.fc.out_features:
                raise ValueError(f"❌ Lỗi: Nhãn {y_batch.max().item()} vượt quá số lớp {model.fc.out_features}")

            batch_size, num_frames, num_keypoints, keypoint_dim = X_batch.shape
            X_batch = X_batch.view(batch_size * num_frames * num_keypoints, keypoint_dim)

            batch = torch.arange(batch_size, device=device).repeat_interleave(num_frames * num_keypoints)

            outputs = model(X_batch, edge_index, batch)

            # 🚨 Debug: Kiểm tra shape output
            print(f"🔍 Outputs shape: {outputs.shape}, Expected: ({batch_size}, {model.fc.out_features})")

            loss = criterion(outputs, y_batch.long())

            # 🚨 Kiểm tra loss trước khi backward
            print(f"🔍 Loss value: {loss.item()}")

            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == y_batch).sum().item()
            total += y_batch.size(0)

        acc = 100 * correct / total
        print(f"✅ Epoch {epoch+1}/{num_epochs} - Loss: {total_loss:.4f} - Accuracy: {acc:.2f}%")

    return model


In [14]:
# ======================== CHẠY PRE-TRAINED MODEL ========================
if __name__ == "__main__":
    model_path = "yoga_gcn_4layer_128.pt"
    in_channels = 3
    hidden_dim = 64
    num_classes = 2
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = check_and_load_model(model_path, in_channels, hidden_dim, num_classes)

    if model is None:
        print("🔄 Training new model...")
        model = YogaGCN(in_channels, hidden_dim, num_classes).to(device)
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        criterion = nn.CrossEntropyLoss()

        # Khởi tạo DataLoader
        json_folder = "data/keypoints/public_data"
        dataset = YogaDataset(json_folder, max_frames=100)
        batch_size = 4
        train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

        # Train lại
        model = train_gcn(model, train_loader, optimizer, criterion, device)

        # Lưu lại model sau khi fine-tune
        torch.save(model.state_dict(), model_path)
        print(f"✅ Model saved to {model_path}")
    else:
        print("🚀 Using pre-trained model. No need to train.")

  state_dict = torch.load(model_path, map_location=torch.device("cpu"))


📊 Model Found - in_channels: 3, hidden_dim: 128, num_classes: 4
⚠️ Hyperparameter không khớp! Cần train lại.
🔄 Training new model...
Label map: {'Garland_Pose': 0, 'Happy_Baby_Pose': 1, 'Head_To_Knee_Pose': 2, 'Lunge_Pose': 3}
🔍 y_batch unique values: tensor([0, 1, 2], device='cuda:0')
🔍 Max label: 2, Expected < 2


ValueError: ❌ Lỗi: Nhãn 2 vượt quá số lớp 2

In [None]:
a = 1