In [None]:
# requirements: numpy, pandas, torch, torch_geometric, scikit-learn
import torch
from torch.nn import Linear, ReLU, Dropout
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, DataLoader
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from torch_geometric.nn import global_mean_pool

# 데이터 로드 및 전처리
df = pd.read_csv('data.csv')
X = df.iloc[:, :-1].values  # 마지막 컬럼 제외한 모든 컬럼
y = df.iloc[:, -1].values   # 마지막 컬럼 (클래스)

# 클래스 레이블을 숫자로 변환
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# 데이터를 그래프 형태로 변환
def create_graph_data(x, y):
    # 랜드마크 간의 연결 관계 정의 (MediaPipe 포즈 연결)
    edges = [
        # 얼굴 연결
        (0, 1), (1, 2), (2, 3), (3, 4),  # 코
        (5, 6), (6, 7), (7, 8), (8, 9),  # 왼쪽 눈
        (10, 11), (11, 12), (12, 13), (13, 14),  # 오른쪽 눈
        # 몸통 연결
        (11, 12), (11, 23), (12, 24),  # 어깨
        (23, 24), (23, 25), (24, 26),  # 엉덩이
        (25, 27), (26, 28),  # 무릎
        #(27, 29), (28, 30),  # 발목
        #(29, 31), (30, 32)   # 발
    ]
    
    # 양방향 연결 추가
    edges = edges + [(j, i) for i, j in edges]
    
    # 노드 특성 (x, y, z, visibility)
    node_features = x.reshape(-1, 4)
    
    # 엣지 인덱스 생성
    edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
    
    return Data(x=torch.tensor(node_features, dtype=torch.float),
                edge_index=edge_index,
                y=torch.tensor([y], dtype=torch.long))

# 데이터셋 생성
dataset = [create_graph_data(X[i], y[i]) for i in range(len(X))]

# GCN 모델 정의
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, num_classes):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, num_classes)
        self.dropout = Dropout(p=0.3)

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.conv2(x, edge_index)
        x = global_mean_pool(x, batch)
        return self.lin(x)
    
class FocalLoss(torch.nn.Module):
    def __init__(self, alpha=1.0, gamma=2.0, reduction='mean'):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, logits, targets):
        ce_loss = F.cross_entropy(logits, targets, reduction='none')  # [B]
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * ((1 - pt) ** self.gamma) * ce_loss
        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss

# 학습 함수
def train(model, loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for data in loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        loss = criterion(out, data.y.squeeze())
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# 평가 함수
def evaluate(model, loader):
    model.eval()
    correct = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.batch)
        pred = out.argmax(dim=1)
        correct += int((pred == data.y.squeeze()).sum())
    return correct / len(loader.dataset)



# 데이터 분할
train_idx, test_idx = train_test_split(range(len(dataset)), test_size=0.2, stratify=y)
train_dataset = [dataset[i] for i in train_idx]
test_dataset = [dataset[i] for i in test_idx]
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

# 모델 초기화 및 학습
model = GCN(in_channels=4, hidden_channels=64, num_classes=len(np.unique(y)))
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)
#criterion = torch.nn.CrossEntropyLoss()
criterion = FocalLoss(alpha=1.0, gamma=2.0)

for epoch in range(1, 25):
    loss = train(model, train_loader, optimizer, criterion)
    acc = evaluate(model, test_loader)
    print(f"Epoch {epoch:03d}, Loss: {loss:.4f}, Test Acc: {acc:.4f}")



Epoch 001, Loss: 0.6553, Test Acc: 0.6953
Epoch 002, Loss: 0.4010, Test Acc: 0.6342
Epoch 003, Loss: 0.3109, Test Acc: 0.7401
Epoch 004, Loss: 0.2771, Test Acc: 0.7905
Epoch 005, Loss: 0.2508, Test Acc: 0.7678
Epoch 006, Loss: 0.2441, Test Acc: 0.8118
Epoch 007, Loss: 0.2348, Test Acc: 0.7983
Epoch 008, Loss: 0.2224, Test Acc: 0.8111
Epoch 009, Loss: 0.2239, Test Acc: 0.7905
Epoch 010, Loss: 0.2095, Test Acc: 0.8359
Epoch 011, Loss: 0.2096, Test Acc: 0.8594
Epoch 012, Loss: 0.1939, Test Acc: 0.8722
Epoch 013, Loss: 0.1955, Test Acc: 0.8551
Epoch 014, Loss: 0.1862, Test Acc: 0.8786
Epoch 015, Loss: 0.1786, Test Acc: 0.8828
Epoch 016, Loss: 0.1779, Test Acc: 0.8679
Epoch 017, Loss: 0.1689, Test Acc: 0.8821
Epoch 018, Loss: 0.1594, Test Acc: 0.8821
Epoch 019, Loss: 0.1671, Test Acc: 0.8587
Epoch 020, Loss: 0.1548, Test Acc: 0.9027
Epoch 021, Loss: 0.1542, Test Acc: 0.8700
Epoch 022, Loss: 0.1538, Test Acc: 0.8892
Epoch 023, Loss: 0.1524, Test Acc: 0.8970
Epoch 024, Loss: 0.1466, Test Acc:

In [5]:
torch.save(model.state_dict(), 'gcn_model.pth')