In [1]:
import os
import json
from PIL import Image
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

class RobotPoseDataset(Dataset):
    def __init__(self, rgb_dir, depth_dir, angle_dir, transform=None):
        super().__init__()
        self.rgb_dir = rgb_dir
        self.depth_dir = depth_dir
        self.angle_dir = angle_dir
        self.transform = transform
        
        self.samples = []
        angle_file = [f for f in os.listdir(angle_dir) if f.endswith('.json')]
        for i in range(len(angle_file)):
            with open(os.path.join(angle_dir, angle_file[i]), 'r') as f:
                data = json.load(f)
                filename = angle_file[i].rstrip(".json")
                self.samples.append((filename[5:], data))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        # 1) (파일명, 관절각도 리스트) 가져오기
        filename, angles = self.samples[idx]
        
        # 2) 실제 파일 경로
        rgb_path = os.path.join(self.rgb_dir, f"image{filename}.jpg")
        depth_path = os.path.join(self.depth_dir, f"depth{filename}.jpg")
        
        # 3) 이미지 불러오기 (PIL)
        rgb_image = Image.open(rgb_path).convert("RGB")  # (H,W,3)
        depth_image = Image.open(depth_path).convert("L")  # (H,W) grayscale
        
        # 4) numpy array로 변환 (0~1 스케일링 예시)
        rgb_np = np.array(rgb_image, dtype=np.float32) / 255.0   # shape (H, W, 3)
        depth_np = np.array(depth_image, dtype=np.float32) / 255.0  # shape (H, W)
        
        # 5) RGB + Depth 합치기 → shape (H, W, 4)
        combined_np = np.dstack((rgb_np, depth_np))
        
        # 6) PyTorch 텐서 변환 (C, H, W) 순서로 바꿔주기
        combined_np = combined_np.transpose(2, 0, 1)  # (4, H, W)
        combined_tensor = torch.from_numpy(combined_np)  # float32 tensor
        
        # 7) 만약 추가 Transform이 있다면 적용 (예: Resize, Normalize 등)
        #    (transform이 PIL 이미지를 요구한다면, 여기서 PIL -> Tensor 변환 위치를 조정해야 함)
        if self.transform:
            combined_tensor = self.transform(combined_tensor)

        # 8) 관절 각도(라디안 or 도 단위)를 텐서로 변환
        angles_tensor = torch.tensor(angles, dtype=torch.float32)
        
        return combined_tensor, angles_tensor

In [2]:
class RobotJointRegressor(nn.Module):
    def __init__(self, input_channels=4, num_joints=6):
        super(RobotJointRegressor, self).__init__()
        
        self.conv = nn.Sequential(
            nn.Conv2d(input_channels, 8, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),   # 720x1280 -> 360x640

            nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),   # 360x640 -> 180x320

            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),   # 180x320 -> 90x160

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),   # 90x160 -> 45x80

            nn.AdaptiveAvgPool2d((4, 4))   # 최종 (채널=64, 높이=4, 너비=4)
        )
        
        # 최종 Feature map: (Batch, 64, 4, 4) → Flatten: 64*4*4=1024
        self.fc = nn.Sequential(
            nn.Linear(64 * 4 * 4, 128),
            nn.ReLU(),
            nn.Linear(128, num_joints)  # 6개의 관절 각도 회귀
        )

    def forward(self, x):
        # x: (Batch, 4, 720, 1280)
        x = self.conv(x)              # (B, 64, 4, 4)
        x = x.view(x.size(0), -1)     # Flatten (B, 1024)
        x = self.fc(x)                # (B, 6)
        return x


def train_end_to_end(
    dataloader,
    num_epochs=50, 
    lr=1e-3, 
    device='cuda'
):
    model = RobotJointRegressor(input_channels=4, num_joints=6)
    if device == 'cuda' and torch.cuda.device_count() > 1:
        print(f"Using {torch.cuda.device_count()} GPUs via nn.DataParallel...")
        model = nn.DataParallel(model)
        
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    for epoch in range(num_epochs):
        running_loss = 0.0
        total_batches = 0
        
        # dataloader로부터 (images, angles) 배치를 꺼냄
        for batch_idx, (inputs, targets) in enumerate(dataloader):
            inputs = inputs.to(device)   # (B, 4, H, W)
            targets = targets.to(device) # (B, 6)

            # Forward
            preds = model(inputs)        # (B, 6)
            loss = criterion(preds, targets)
            
            # Backprop + Update
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            total_batches += 1
        
        epoch_loss = running_loss / total_batches if total_batches > 0 else 0
        print(f"[Epoch {epoch+1}/{num_epochs}] Loss: {epoch_loss:.4f}")
    
    return model

# -----------------------------
# 4) 실행 예시
# -----------------------------
if __name__ == "__main__":
    from torch.utils.data import DataLoader
    
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    rgb_dir = "/home/najo/NAS/DIP/datasets/FR5_model/image"
    depth_dir = "/home/najo/NAS/DIP/datasets/FR5_model/depth"
    angle_dir = "/home/najo/NAS/DIP/datasets/FR5_model/angle"

    dataset = RobotPoseDataset(rgb_dir, depth_dir, angle_dir)
    dataloader = DataLoader(dataset, batch_size=128, num_workers=16, shuffle=True, pin_memory=True)

    for batch_idx, (images, angles) in enumerate(dataloader):
        print("Batch:", batch_idx)
        print("images shape:", images.shape)  # (B, 4, H, W)
        print("angles shape:", angles.shape)  # (B, 6)   (관절 6개 가정)
        
        break  # 데모로 한 번만 출력해보고 종료
    
    trained_model = train_end_to_end(dataloader, num_epochs=50, lr=1e-3, device=device)
    print("Training Done!")

In [5]:
class RobotJointRegressorMultiHead(nn.Module):
    """
    - 공통 CNN으로 feature 추출
    - 관절(조인트)마다 독립적인 회귀 head (스칼라 출력)
    """
    def __init__(self, input_channels=4, num_joints=6):
        super().__init__()
        self.num_joints = num_joints
        
        # -----------------------
        # 1) 공통 Convolution 백본
        # -----------------------
        self.conv = nn.Sequential(
            nn.Conv2d(input_channels, 8, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),   # 예: 720x1280 -> 360x640

            nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),   # 360x640 -> 180x320

            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),   # 180x320 -> 90x160

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),   # 90x160 -> 45x80

            nn.AdaptiveAvgPool2d((4, 4))   # 최종 (B, 64, 4, 4)
        )
        
        # -----------------------
        # 2) 공통 Fully Connected (차원 축소)
        # -----------------------
        self.common_fc = nn.Sequential(
            nn.Linear(64 * 4 * 4, 128),  # 64*4*4=1024
            nn.ReLU()
        )
        
        # -----------------------
        # 3) 관절별 독립 Head
        #    각 관절이 스칼라(회귀) 하나씩
        # -----------------------
        self.heads = nn.ModuleList([
            nn.Linear(128, 1) for _ in range(num_joints)
        ])
    
    def forward(self, x):
        """
        x shape: (B, 4, H, W)
        return: (B, 6)  # 6개 관절 회귀값
        """
        feat = self.conv(x)              # (B, 64, 4, 4)
        feat = feat.view(feat.size(0), -1)  # (B, 1024)
        common_feat = self.common_fc(feat)  # (B, 128)
        
        # 관절별로 독립 예측
        outputs = []
        for head in self.heads:
            out_joint = head(common_feat)  # (B, 1)
            outputs.append(out_joint)
        
        # (B, 6) 형태로 합치기
        # 각 관절별 (B,1)을 dim=1로 concat
        outputs = torch.cat(outputs, dim=1)  # (B, 6)
        return outputs

def train_multi_regression(
    model,
    dataloader,
    num_epochs=30,
    lr=1e-3,
    device='cuda'
):
    if device == 'cuda' and torch.cuda.device_count() > 1:
        print(f"Using {torch.cuda.device_count()} GPUs via nn.DataParallel...")
        model = nn.DataParallel(model)
    
    model.to(device)
    
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        total_batches = 0

        for batch_idx, (images, angles) in enumerate(dataloader):
            # images: (B, 4, H, W)
            # angles: (B, 6) -> 연속값 (회귀)
            images = images.to(device)
            angles = angles.to(device)   # float
            
            preds = model(images)        # (B, 6)
            loss = criterion(preds, angles)  # MSELoss for multi-output
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            total_batches += 1
        
        epoch_loss = running_loss / total_batches if total_batches > 0 else 0
        print(f"[Epoch {epoch+1}/{num_epochs}] Loss: {epoch_loss:.4f}")
    
    return model

if __name__ == "__main__":
    from torch.utils.data import DataLoader
    
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    rgb_dir = "/home/najo/NAS/DIP/datasets/FR5_model/image"
    depth_dir = "/home/najo/NAS/DIP/datasets/FR5_model/depth"
    angle_dir = "/home/najo/NAS/DIP/datasets/FR5_model/angle"

    dataset = RobotPoseDataset(rgb_dir, depth_dir, angle_dir)
    dataloader = DataLoader(dataset, batch_size=128, num_workers=16, shuffle=True, pin_memory=True)

    model = RobotJointRegressorMultiHead(input_channels=4, num_joints=6).to(device)
    model = train_multi_regression(
        model=model,
        dataloader=dataloader,
        num_epochs=50,
        lr=1e-3,
        device=device
    )
    
    print("Training Done!")

Using 3 GPUs via nn.DataParallel...


[Epoch 1/50] Loss: 6013.4505
[Epoch 2/50] Loss: 5218.6428
[Epoch 3/50] Loss: 1760.5491
[Epoch 4/50] Loss: 871.2631
[Epoch 5/50] Loss: 783.6198
[Epoch 6/50] Loss: 689.6333
[Epoch 7/50] Loss: 665.4722
[Epoch 8/50] Loss: 628.8259
[Epoch 9/50] Loss: 620.0034
[Epoch 10/50] Loss: 620.6606
[Epoch 11/50] Loss: 619.1944


KeyboardInterrupt: 