### **Install all**

In [None]:
!pip install opencv-python
!pip install torch
!pip install requests

In [None]:
!pip install timm


In [None]:
!pip install torch-geometric


In [None]:
cd drive/MyDrive/OpenPose-Pose-Estimation

In [3]:
#import packages

import cv2
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib

from random import randint
from pathlib import Path
import json

from collections import defaultdict

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
from torchvision.models import resnet34
#device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
from custom_datasets import YogaPoseDataset

import model_utils
import plot_utils

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [5]:
#import model file and define pairs of pose
protoFile = "/content/drive/MyDrive/OpenPose-Pose-Estimation/pose/pose_deploy_linevec.prototxt"
weightsFile = "/content/drive/MyDrive/OpenPose-Pose-Estimation/pose/pose_iter_440000.caffemodel"

nPoints = 18
# COCO Output Format
keypointsMapping = ['Nose', 'Neck', 'R-Sho', 'R-Elb', 'R-Wr', 'L-Sho',
                    'L-Elb', 'L-Wr', 'R-Hip', 'R-Knee', 'R-Ank', 'L-Hip',
                    'L-Knee', 'L-Ank', 'R-Eye', 'L-Eye', 'R-Ear', 'L-Ear']

POSE_PAIRS = [[1,2], [1,5], [2,3], [3,4], [5,6], [6,7],
              [1,8], [8,9], [9,10], [1,11], [11,12], [12,13],
              [1,0], [0,14], [14,16], [0,15], [15,17],
              [2,17], [5,16] ]

mapIdx = [[31,32], [39,40], [33,34], [35,36], [41,42], [43,44],
          [19,20], [21,22], [23,24], [25,26], [27,28], [29,30],
          [47,48], [49,50], [53,54], [51,52], [55,56],
          [37,38], [45,46]]

colors = [ [0,100,255], [0,100,255], [0,255,255], [0,100,255], [0,255,255], [0,100,255],
         [0,255,0], [255,200,100], [255,0,255], [0,255,0], [255,200,100], [255,0,255],
         [0,0,255], [255,0,0], [200,200,0], [255,0,0], [200,200,0], [0,0,0]]

In [6]:
#Reading JSON file
pose_list = json.load(Path('pose-list-with-meta.json').open())

In [10]:
#Read classifier csv file
val_csv = pd.read_csv('/content/drive/MyDrive/OpenPose-Pose-Estimation/val_data.csv')

In [7]:
#Encode pose
pose_id_to_name = {0: 'Bharadvajasana I', 1: 'Padangusthasana', 2: 'Paripurna Navasana', 3: 'Baddha Konasana', 4: 'Dhanurasana', 5: 'Setu Bandha Sarvangasana', 6: 'Ustrasana', 7: 'Marjaryasana', 8: 'Chakravakasana', 9: 'Ashtanga Namaskara', 10: 'Utkatasana', 11: 'Balasana', 12: 'Bhujangasana', 13: 'Savasana', 14: 'Gomukhasana', 15: 'Bitilasana', 16: 'Bakasana', 17: 'Makara Adho Mukha Svanasana', 18: 'Ardha Pincha Mayurasana', 19: 'Adho Mukha Svanasana', 20: 'Garudasana', 21: 'Sukhasana', 22: 'Astavakrasana', 23: 'Utthita Hasta Padangustasana', 24: 'Uttana Shishosana', 25: 'Utthita Parsvakonasana', 26: 'Utthita Trikonasana', 27: 'Pincha Mayurasana', 28: 'Agnistambhasana', 29: 'Tittibhasana', 30: 'Matsyasana', 31: 'Chaturanga Dandasana', 32: 'Malasana', 33: 'Parighasana', 34: 'Ardha Bhekasana', 35: 'Ardha Matsyendrasana', 36: 'Supta Matsyendrasana', 37: 'Ardha Chandrasana', 38: 'Adho Mukha Vriksasana', 39: 'Ananda Balasana', 40: 'Janu Sirsasana', 41: 'Virasana', 42: 'Krounchasana', 43: 'Utthita Ashwa Sanchalanasana', 44: 'Parsvottanasana', 45: 'Viparita Karani', 46: 'Salabhasana', 47: 'Natarajasana', 48: 'Padmasana', 49: 'Anjaneyasana', 50: 'Marichyasana III', 51: 'Hanumanasana', 52: 'Tadasana', 53: 'Pasasana', 54: 'Eka Pada Rajakapotasana', 55: 'Eka Pada Rajakapotasana II', 56: 'Mayurasana', 57: 'Kapotasana', 58: 'Phalakasana', 59: 'Halasana', 60: 'Eka Pada Koundinyanasana I', 61: 'Eka Pada Koundinyanasana II', 62: 'Marichyasana I', 63: 'Supta Baddha Konasana', 64: 'Supta Padangusthasana', 65: 'Supta Virasana', 66: 'Parivrtta Janu Sirsasana', 67: 'Parivrtta Parsvakonasana', 68: 'Parivrtta Trikonasana', 69: 'Tolasana', 70: 'Paschimottanasana', 72: 'Parsva Bakasana', 73: 'Vasisthasana', 74: 'Anantasana', 75: 'Salamba Bhujangasana', 76: 'Dandasana', 77: 'Uttanasana', 78: 'Ardha Uttanasana', 79: 'Urdhva Prasarita Eka Padasana', 80: 'Salamba Sirsasana', 81: 'Salamba Sarvangasana', 82: 'Vriksasana', 83: 'Urdhva Dhanurasana', 84: 'Dwi Pada Viparita Dandasana', 85: 'Purvottanasana', 86: 'Urdhva Hastasana', 87: 'Urdhva Mukha Svanasana', 88: 'Virabhadrasana I', 89: 'Virabhadrasana II', 90: 'Virabhadrasana III', 91: 'Upavistha Konasana', 92: 'Prasarita Padottanasana', 93: 'Camatkarasana', 94: 'Yoganidrasana', 95: 'Vrischikasana', 96: 'Vajrasana', 97: 'Tulasana', 98: 'Simhasana', 99: 'Makarasana', 100: 'Lolasana', 101: 'Kurmasana', 102: 'Garbha Pindasana', 103: 'Durvasasana', 71: 'Bhujapidasana', 104: 'Bhekasana', 105: 'Bhairavasana', 106: 'Ganda Bherundasana'}

In [8]:
#Decode pose
pose_name_to_id = {'bharadvajasana i': 0, 'padangusthasana': 1, 'paripurna navasana': 2, 'baddha konasana': 3, 'dhanurasana': 4, 'setu bandha sarvangasana': 5, 'ustrasana': 6, 'marjaryasana': 7, 'chakravakasana': 8, 'ashtanga namaskara': 9, 'utkatasana': 10, 'balasana': 11, 'bhujangasana': 12, 'savasana': 13, 'gomukhasana': 14, 'bitilasana': 15, 'bakasana': 16, 'makara adho mukha svanasana': 17, 'ardha pincha mayurasana': 18, 'adho mukha svanasana': 19, 'garudasana': 20, 'sukhasana': 21, 'astavakrasana': 22, 'utthita hasta padangustasana': 23, 'uttana shishosana': 24, 'utthita parsvakonasana': 25, 'utthita trikonasana': 26, 'pincha mayurasana': 27, 'agnistambhasana': 28, 'tittibhasana': 29, 'matsyasana': 30, 'chaturanga dandasana': 31, 'malasana': 32, 'parighasana': 33, 'ardha bhekasana': 34, 'ardha matsyendrasana': 35, 'supta matsyendrasana': 36, 'ardha chandrasana': 37, 'adho mukha vriksasana': 38, 'ananda balasana': 39, 'janu sirsasana': 40, 'virasana': 41, 'krounchasana': 42, 'utthita ashwa sanchalanasana': 43, 'parsvottanasana': 44, 'viparita karani': 45, 'salabhasana': 46, 'natarajasana': 47, 'padmasana': 48, 'anjaneyasana': 49, 'marichyasana iii': 50, 'hanumanasana': 51, 'tadasana': 52, 'pasasana': 53, 'eka pada rajakapotasana': 54, 'eka pada rajakapotasana ii': 55, 'mayurasana': 56, 'kapotasana': 57, 'phalakasana': 58, 'halasana': 59, 'eka pada koundinyanasana i': 60, 'eka pada koundinyanasana ii': 61, 'marichyasana i': 62, 'supta baddha konasana': 63, 'supta padangusthasana': 64, 'supta virasana': 65, 'parivrtta janu sirsasana': 66, 'parivrtta parsvakonasana': 67, 'parivrtta trikonasana': 68, 'tolasana': 69, 'paschimottanasana': 70, 'parsva bakasana': 72, 'vasisthasana': 73, 'anantasana': 74, 'salamba bhujangasana': 75, 'dandasana': 76, 'uttanasana': 77, 'ardha uttanasana': 78, 'urdhva prasarita eka padasana': 79, 'salamba sirsasana': 80, 'salamba sarvangasana': 81, 'vriksasana': 82, 'urdhva dhanurasana': 83, 'dwi pada viparita dandasana': 84, 'purvottanasana': 85, 'urdhva hastasana': 86, 'urdhva mukha svanasana': 87, 'virabhadrasana i': 88, 'virabhadrasana ii': 89, 'virabhadrasana iii': 90, 'upavistha konasana': 91, 'prasarita padottanasana': 92, 'camatkarasana': 93, 'yoganidrasana': 94, 'vrischikasana': 95, 'vajrasana': 96, 'tulasana': 97, 'simhasana': 98, 'makarasana': 99, 'lolasana': 100, 'kurmasana': 101, 'garbha pindasana': 102, 'durvasasana': 103, 'bhujapidasana': 71, 'bhekasana': 104, 'bhairavasana': 105, 'ganda bherundasana': 106}

In [9]:
#Read classifier csv file
train_csv = pd.read_csv('/content/drive/MyDrive/OpenPose-Pose-Estimation/train_data.csv')

### **Openpose에서 keypoint 받아와서 vector 로 변환**

In [16]:

def getKeypoints(probMap, threshold=0.1):
    mapSmooth = cv2.GaussianBlur(probMap, (3,3), 0, 0)
    mapMask = np.uint8(mapSmooth > threshold)
    keypoints = []

    contours, _ = cv2.findContours(mapMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    for cnt in contours:
        blobMask = np.zeros(mapMask.shape)
        blobMask = cv2.fillConvexPoly(blobMask, cnt, 1)
        maskedProbMap = mapSmooth * blobMask
        _, maxVal, _, maxLoc = cv2.minMaxLoc(maskedProbMap)
        keypoints.append((*maxLoc, maxVal))  # (x, y, confidence)

    return keypoints


In [17]:
import numpy as np
import numbers
import logging
logging.basicConfig(level=logging.DEBUG)
def keypoints_to_vector(detected_keypoints, nPoints=18):
    """
    OpenPose에서 탐지된 관절 위치 데이터를 특징 벡터로 변환

    Parameters:
    detected_keypoints (list): 각 관절의 탐지된 위치 정보를 담고 있는 list
                               각 요소는 (x, y, confidence) 형태의t tuple
    nPoints (int): 모델에서 탐지하는 관절의 총 개수 - coco 18개

    Returns:
    numpy.ndarray: 모든 관절 위치를 표현하는 1D 벡터. 관절이 탐지되지 않은 경우 (0, 0)처리
    """

    # 각 관절의 x, y, confidence를 위한 공간을 확보
    feature_vector = np.zeros(nPoints * 3)  # nPoints에 맞는 크기 확인

    for i, keypoint in enumerate(detected_keypoints):
        if i >= nPoints:
            break  # nPoints 이상의 인덱스에 대해서는 처리하지 않고 종료

        if keypoint:  # 관절이 탐지된 경우
            x, y, confidence = keypoint
            index = i * 3
            feature_vector[index] = x
            feature_vector[index + 1] = y
            feature_vector[index + 2] = confidence

    return feature_vector



In [None]:

# 확인용
detected_keypoints = [
    (100, 200, 0.9),  # Nose
    None,             # Neck (탐지되지 않음)
    (150, 250, 0.8),  # R-Sho
    # ... 나머지 관절들
]

# 특징 벡터 변환
feature_vector = keypoints_to_vector(detected_keypoints)
print("특징 벡터:", feature_vector)


In [19]:
import cv2
import numpy as np
import os

def process_image_with_openpose(image, net, inHeight=368, nPoints=18):
    frameWidth = image.shape[1]
    frameHeight = image.shape[0]
    inWidth = int((inHeight / frameHeight) * frameWidth)

    inpBlob = cv2.dnn.blobFromImage(image, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False)
    net.setInput(inpBlob)
    output = net.forward()

    detected_keypoints = []
    threshold = 0.1
    logging.debug("Processing image for keypoints...")
    for part in range(nPoints):
        probMap = output[0, part, :, :]
        probMap = cv2.resize(probMap, (frameWidth, frameHeight))
        keypoints = getKeypoints(probMap, threshold)
        logging.debug(f"Part {part}: Detected {len(keypoints)} keypoints.")
        for keypoint in keypoints:
            # getKeypoints 함수로부터 (x, y, confidence) 형태로 추출된 각 관절 정보를 detected_keypoints에 추가
            logging.debug(f"Keypoint: {keypoint}")
            detected_keypoints.append(keypoint)

    # 모든 관절 정보를 특징 벡터로 변환
    feature_vector = keypoints_to_vector(detected_keypoints, nPoints)
    logging.debug(f"Feature vector size: {len(feature_vector)}")
    logging.debug(f"Feature vector: {feature_vector}")
    return feature_vector



## **DATA PREPROCESS**

In [20]:
import cv2
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import os
from PIL import Image

class YogaPoseDataset(Dataset):
    '''
    #전체데이터
    def __init__(self, csv_file, img_dir, net, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.net = net
        self.transform = transform

        unique_pose_ids = sorted(self.annotations['pose_id'].unique())
        self.pose_id_to_index = {pose_id: idx for idx, pose_id in enumerate(unique_pose_ids)}
    '''

    #a로시작하는 데이터만
    def __init__(self, csv_file, img_dir, net, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.annotations = self.annotations[self.annotations['pose_name'].str.lower().str.startswith('a')]
        self.img_dir = img_dir
        self.net = net
        self.transform = transform

        unique_pose_ids = sorted(self.annotations['pose_id'].unique())
        self.pose_id_to_index = {pose_id: idx for idx, pose_id in enumerate(unique_pose_ids)}
        print(f"Filtered dataset size: {len(self.annotations)}")

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
        image = Image.open(img_path).convert('RGB')
        pose_id = self.annotations.iloc[index, 1]
        pose_id = self.pose_id_to_index[pose_id]  # pose_id를 인덱스로
        pose_id = torch.tensor(pose_id, dtype=torch.long)

        if self.transform:
            image = self.transform(image)

        # OpenPose에서 피쳐 벡터 추출
        feature_vector = process_image_with_openpose(cv2.imread(img_path), self.net)

        # 피쳐 벡터를 Tensor로 변환
        feature_vector = torch.tensor(feature_vector, dtype=torch.float)

        pose_id = torch.tensor(int(pose_id), dtype=torch.long)


        return image, feature_vector, pose_id




In [21]:
from torch.utils.data import DataLoader

from torchvision import transforms


transform = transforms.Compose([
    #transforms.ToPILImage(),
    transforms.Resize((256, 256)),
    #transforms.Resize((112, 112)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 정규화
])


net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)

train_dataset = YogaPoseDataset(
    csv_file='/content/drive/MyDrive/dataset/train_dataset.csv',
    img_dir='/content/drive/MyDrive/dataset',
    net=net,
    transform=transform
)


train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


Filtered dataset size: 171


### **1. Simple MLP Model** (INPUT - 관절 피쳐벡터 + pose_name)

In [None]:
import torch.nn as nn
import torch.nn.functional as F

import torch
import torch.nn as nn
import torch.nn.functional as F

class PoseClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(PoseClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.bn1 = nn.BatchNorm1d(256)
        self.dropout1 = nn.Dropout(0.5)

        self.fc2 = nn.Linear(256, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.dropout2 = nn.Dropout(0.5)

        self.fc3 = nn.Linear(512, 1024)
        self.bn3 = nn.BatchNorm1d(1024)
        self.dropout3 = nn.Dropout(0.5)

        self.fc4 = nn.Linear(1024, 512)
        self.bn4 = nn.BatchNorm1d(512)
        self.dropout4 = nn.Dropout(0.5)

        self.fc5 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.bn1(x)
        x = self.dropout1(x)

        x = F.relu(self.fc2(x))
        x = self.bn2(x)
        x = self.dropout2(x)

        x = F.relu(self.fc3(x))
        x = self.bn3(x)
        x = self.dropout3(x)

        x = F.relu(self.fc4(x))
        x = self.bn4(x)
        x = self.dropout4(x)

        x = self.fc5(x)
        return x


#(18개 관절 * 3 좌표)
model = PoseClassifier(input_size=54, num_classes=13)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)



In [None]:
import torch
import torch.optim as optim
def calculate_accuracy(outputs, targets):
    _, predictions = torch.max(outputs, 1)
    correct = (predictions == targets).sum().item()
    total = targets.size(0)
    accuracy = 100 * correct / total
    return accuracy

def validate(model, valid_loader, criterion):
    model.eval()
    valid_loss = 0
    valid_accuracy = 0

    with torch.no_grad():
        for images, feature_vectors, pose_ids in valid_loader:
            feature_vectors = feature_vectors.to(device)
            pose_ids = pose_ids.to(device)
            outputs = model(feature_vectors)
            loss = criterion(outputs, pose_ids)
            valid_loss += loss.item()
            valid_accuracy += calculate_accuracy(outputs, pose_ids)

    valid_loss /= len(valid_loader)
    valid_accuracy /= len(valid_loader)
    return valid_loss, valid_accuracy

def train(model, train_loader, valid_loader, criterion, optimizer, epochs=10, print_every=20):
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        correct = 0
        total = 0

        for i, (images, feature_vectors, pose_ids) in enumerate(train_loader):
            feature_vectors = feature_vectors.to(device)
            pose_ids = pose_ids.to(device)

            optimizer.zero_grad()
            outputs = model(feature_vectors)
            loss = criterion(outputs, pose_ids)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += pose_ids.size(0)
            correct += (predicted == pose_ids).sum().item()

            if (i + 1) % print_every == 0:
                print(f'Epoch {epoch+1}, Batch {i+1}, Loss: {train_loss / (i+1):.4f}')

        train_accuracy = 100 * correct / total
        valid_loss, valid_accuracy = validate(model, valid_loader, criterion)
        print(f'End of Epoch {epoch+1}, '
              f'Train Loss: {train_loss / len(train_loader):.4f}, '
              f'Train Accuracy: {train_accuracy:.2f}%, '
              f'Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.2f}%\n')



valid_dataset = YogaPoseDataset(
    csv_file='/content/drive/MyDrive/dataset/valid_dataset.csv',
    img_dir='/content/drive/MyDrive/dataset',
    net=net,
    transform=transform
)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=True)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0008)

train(model, train_loader, valid_loader, criterion, optimizer, epochs=20, print_every=20)



## **2. **4. ResNetFeatures(image) +MLP CLASSIFIER (I관절 피쳐벡터)**

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models

class ImageVectorCombinedModel(nn.Module):
    def __init__(self, vector_input_size, num_classes):
        super(ImageVectorCombinedModel, self).__init__()
        self.resnet = models.resnet18(pretrained=True)
        self.resnet.fc = nn.Identity()  # 마지막 fc layer -> nn.identity로 교체해서 feature 그대로 받아옴
        # 벡터 MLP
        self.vector_processor = nn.Sequential(
            nn.Linear(vector_input_size, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, 1024),
            nn.ReLU(),
        )

        # IMAGE Feature 차원 줄이고 Vector Feature 차원 늘림
        self.image_dim_reducer = nn.Linear(512, 128)
        self.combined_fc = nn.Sequential(
            nn.Linear(128 + 1024, 512),  # Combine
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, num_classes)
        )

    def forward(self, images, vectors):
        image_out = self.resnet(images)
        image_out = self.image_dim_reducer(image_out)

        vector_out = self.vector_processor(vectors)

        combined_out = torch.cat((image_out, vector_out), dim=1)  # Concatenate
        out = self.combined_fc(combined_out)  # Final classification
        return out
model = ImageVectorCombinedModel(vector_input_size=54, num_classes=13)


In [None]:
import torch
import torch.optim as optim


def calculate_accuracy(outputs, targets):
    _, predictions = torch.max(outputs, 1)
    correct = (predictions == targets).sum().item()
    total = targets.size(0)
    accuracy = 100 * correct / total
    return accuracy

def validate(model, valid_loader, criterion, device):
    model.eval()
    valid_loss = 0
    valid_accuracy = 0

    with torch.no_grad():
        for images, vectors, pose_ids in valid_loader:
            images = images.to(device)
            vectors = vectors.to(device)
            pose_ids = pose_ids.to(device)

            outputs = model(images, vectors)
            loss = criterion(outputs, pose_ids)
            valid_loss += loss.item()
            valid_accuracy += calculate_accuracy(outputs, pose_ids)

    valid_loss /= len(valid_loader)
    valid_accuracy /= len(valid_loader)
    return valid_loss, valid_accuracy


import os

def train(model, train_loader, valid_loader, criterion, optimizer, device, epochs, print_every=20):
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0

        for i, (images, vectors, pose_ids) in enumerate(train_loader):
            images, vectors, pose_ids = images.to(device), vectors.to(device), pose_ids.to(device)

            optimizer.zero_grad()
            outputs = model(images, vectors)
            loss = criterion(outputs, pose_ids)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += pose_ids.size(0)
            correct += (predicted == pose_ids).sum().item()

            if (i + 1) % print_every == 0:
                print(f'Epoch {epoch+1}/{epochs}, Step {i+1}/{len(train_loader)}, Loss: {train_loss / (i+1):.4f}')
        train_accuracy = 100 * correct / total
        valid_loss, valid_accuracy = validate(model, valid_loader, criterion, device)
        print(f'End of Epoch {epoch+1}, '
              f'Train Loss: {train_loss / len(train_loader):.4f}, '
              f'Train Accuracy: {train_accuracy:.2f}%, '
              f'Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.2f}%\n')



valid_dataset = YogaPoseDataset(
    csv_file='/content/drive/MyDrive/dataset/valid_dataset.csv',
    img_dir='/content/drive/MyDrive/dataset',
    net=net,
    transform=transform
)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=True)



criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0008)


train(model, train_loader, valid_loader, criterion, optimizer, device = device, epochs=20, print_every=20)


## **3 LSTM CLASSIFIER** (Input : 관절 피쳐벡터+pose name)





In [None]:
class PoseLSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(PoseLSTMClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out) if out.dim() == 2 else self.fc(out[:, -1, :])
        return out



# input_size: 관절 벡터의 차원 (18개 관절 * 3 좌표 = 54)
# hidden_size: LSTM 셀의 hidden state의 크기
# num_layers: LSTM 층의 수
# num_classes: 요가 포즈수
LSTMmodel = PoseLSTMClassifier(input_size=54, hidden_size=128, num_layers=2, num_classes=107).to(device)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
LSTMmodel = LSTMmodel.to(device)



In [None]:
import torch
import torch.optim as optim
def calculate_accuracy(outputs, targets):
    _, predictions = torch.max(outputs, 1)
    correct = (predictions == targets).sum().item()
    total = targets.size(0)
    accuracy = 100 * correct / total
    return accuracy

def validate(model, valid_loader, criterion):
    model.eval()
    valid_loss = 0
    valid_accuracy = 0

    with torch.no_grad():
        for images, feature_vectors, pose_ids in valid_loader:
            feature_vectors = feature_vectors.to(device)
            pose_ids = pose_ids.to(device)
            outputs = model(feature_vectors)
            loss = criterion(outputs, pose_ids)
            valid_loss += loss.item()
            valid_accuracy += calculate_accuracy(outputs, pose_ids)

    valid_loss /= len(valid_loader)
    valid_accuracy /= len(valid_loader)
    return valid_loss, valid_accuracy

def train(model, train_loader, valid_loader, criterion, optimizer, epochs=10, print_every=20):
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        correct = 0
        total = 0

        for i, (images, feature_vectors, pose_ids) in enumerate(train_loader):
            feature_vectors = feature_vectors.to(device)
            pose_ids = pose_ids.to(device)

            optimizer.zero_grad()
            outputs = model(feature_vectors)
            loss = criterion(outputs, pose_ids)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += pose_ids.size(0)
            correct += (predicted == pose_ids).sum().item()

            if (i + 1) % print_every == 0:
                print(f'Epoch {epoch+1}, Batch {i+1}, Loss: {train_loss / (i+1):.4f}')

        train_accuracy = 100 * correct / total
        valid_loss, valid_accuracy = validate(model, valid_loader, criterion)
        print(f'End of Epoch {epoch+1}, '
              f'Train Loss: {train_loss / len(train_loader):.4f}, '
              f'Train Accuracy: {train_accuracy:.2f}%, '
              f'Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.2f}%\n')



valid_dataset = YogaPoseDataset(
    csv_file='/content/drive/MyDrive/dataset/valid_dataset.csv',
    img_dir='/content/drive/MyDrive/dataset',
    net=net,
    transform=transform
)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train(LSTMmodel, train_loader, valid_loader, criterion, optimizer, epochs=10, print_every=20)



## **4. ResNetFeatures(image) + LSTM CLASSIFIER (I관절 피쳐벡터)**

In [None]:
import torchvision.models as models

class ResNetFeatures(nn.Module):
    def __init__(self):
        super(ResNetFeatures, self).__init__()
        original_model = models.resnet18(pretrained=True)
        self.features = nn.Sequential(*list(original_model.children())[:-1])

    def forward(self, x):
        x = self.features(x)

        x = x.view(x.size(0), -1)
        return x

resnet_features = ResNetFeatures().to(device)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ImageVectorCombinedModel(nn.Module):
    def __init__(self, vector_input_size, hidden_size, num_layers, num_classes):
        super(ImageVectorCombinedModel, self).__init__()
        self.resnet = models.resnet18(pretrained=True)
        self.resnet.fc = nn.Identity()

        self.lstm = nn.LSTM(vector_input_size, hidden_size, num_layers, batch_first=True)

        self.combined_fc1 = nn.Linear(512 + hidden_size, 128)
        self.combined_fc2 = nn.Linear(128, num_classes)

    def forward(self, images, vectors):

        image_out = self.resnet(images)

        lstm_out, _ = self.lstm(vectors)
        lstm_out = lstm_out.squeeze(1)


        combined_out = torch.cat((image_out, lstm_out), dim=1)

        combined_out = F.relu(self.combined_fc1(combined_out))
        out = self.combined_fc2(combined_out)
        return out


In [None]:
import torch
import torch.optim as optim
def calculate_accuracy(outputs, targets):
    _, predictions = torch.max(outputs, 1)
    correct = (predictions == targets).sum().item()
    total = targets.size(0)
    accuracy = 100 * correct / total
    return accuracy

def validate(model, valid_loader, criterion, device):
    model.eval()
    valid_loss = 0
    valid_accuracy = 0

    with torch.no_grad():
        for images, vectors, pose_ids in valid_loader:
            images = images.to(device)
            vectors = vectors.to(device)
            pose_ids = pose_ids.to(device)

            outputs = model(images, vectors)
            loss = criterion(outputs, pose_ids)
            valid_loss += loss.item()
            valid_accuracy += calculate_accuracy(outputs, pose_ids)

    valid_loss /= len(valid_loader)
    valid_accuracy /= len(valid_loader)
    return valid_loss, valid_accuracy


import os

def train(model, train_loader, valid_loader, criterion, optimizer, device, epochs, model_save_path, print_every=20):
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0

        for i, (images, vectors, pose_ids) in enumerate(train_loader):
            images, vectors, pose_ids = images.to(device), vectors.to(device), pose_ids.to(device)

            optimizer.zero_grad()
            outputs = model(images, vectors)
            loss = criterion(outputs, pose_ids)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += pose_ids.size(0)
            correct += (predicted == pose_ids).sum().item()

            if (i + 1) % print_every == 0:
                print(f'Epoch {epoch+1}/{epochs}, Step {i+1}/{len(train_loader)}, Loss: {train_loss / (i+1):.4f}')
        train_accuracy = 100 * correct / total
        valid_loss, valid_accuracy = validate(model, valid_loader, criterion, device)
        print(f'End of Epoch {epoch+1}, '
              f'Train Loss: {train_loss / len(train_loader):.4f}, '
              f'Train Accuracy: {train_accuracy:.2f}%, '
              f'Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.2f}%\n')

        save_path = os.path.join(model_save_path, f'model_epoch_{epoch+1}.pth')
        torch.save(model.state_dict(), save_path)
        print(f'Model saved to {save_path}')



valid_dataset = YogaPoseDataset(
    csv_file='/content/drive/MyDrive/dataset/valid_dataset.csv',
    img_dir='/content/drive/MyDrive/dataset',
    net=net,
    transform=transform
)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=True)


In [None]:

image_input_size = 112 #changed 224 to 112
vector_input_size = 54
hidden_size = 128        # LSTM의 hidden state 크기
num_layers = 2           # LSTM 층의 수
num_classes = 13

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ImageVectorCombinedModel(vector_input_size, hidden_size, num_layers, num_classes)
model = model.to(device)


optimizer = optim.Adam(model.parameters(), lr=0.0005)
criterion = nn.CrossEntropyLoss()
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


model_save_path = '/content/drive/MyDrive/OpenPose-Pose-Estimation'
train(model, train_loader, valid_loader, criterion, optimizer, device, epochs=10, model_save_path=model_save_path, print_every=20)

