# Faster R-CNN을 이용한 알약 군집 탐지 및 분류

In [1]:
## 필요한 라이브러리 임포트
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor  # 새로 추가된 줄
from torchvision.transforms import functional as F
import numpy as np
import cv2
import matplotlib.pyplot as plt
from PIL import Image

In [2]:
## 데이터셋 클래스 정의
class PillDataset(torch.utils.data.Dataset):
    def __init__(self, imgs_path, annotations, transforms=None):
        self.imgs_path = imgs_path
        self.annotations = annotations
        self.transforms = transforms
        
    def __getitem__(self, idx):
        # 이미지 로드
        img_path = self.imgs_path[idx]
        img = Image.open(img_path).convert("RGB")
        
        # 바운딩 박스와 라벨 정보
        boxes = self.annotations[idx]['boxes']
        labels = self.annotations[idx]['labels']
        
        # 텐서로 변환
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        
        if self.transforms is not None:
            img, target = self.transforms(img, target)
            
        return img, target
    
    def __len__(self):
        return len(self.imgs_path)


In [3]:
## 모델 정의 및 학습 함수
def get_model(num_classes):
    model = fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

def train_model(model, data_loader, optimizer, num_epochs=10):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        
        for images, targets in data_loader:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
            
            total_loss += losses.item()
            
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(data_loader)}')


In [4]:
## 추론 및 시각화 함수
def visualize_prediction(model, image_path, threshold=0.5):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)
    model.eval()
    
    img = Image.open(image_path).convert("RGB")
    transform = transforms.ToTensor()
    img_tensor = transform(img).unsqueeze(0).to(device)
    
    with torch.no_grad():
        prediction = model(img_tensor)
        
    img_np = np.array(img)
    
    for box, score, label in zip(prediction[0]['boxes'], prediction[0]['scores'], prediction[0]['labels']):
        if score > threshold:
            box = box.cpu().numpy()
            cv2.rectangle(img_np, 
                        (int(box[0]), int(box[1])), 
                        (int(box[2]), int(box[3])), 
                        (255, 0, 0), 2)
            
    plt.figure(figsize=(12, 8))
    plt.imshow(img_np)
    plt.axis('off')
    plt.show()


In [5]:
## 메인 실행 코드
if __name__ == "__main__":
    # 데이터 경로 및 설정
    num_classes = 2  # 배경 + 알약 클래스
    
    # 모델 초기화
    model = get_model(num_classes)
    
    # 옵티마이저 설정
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
    
    # 데이터셋 및 데이터로더 설정
    # TODO: 실제 데이터셋 경로와 어노테이션 추가
    
    # 모델 학습
    # train_model(model, train_dataloader, optimizer)
    
    # 결과 시각화
    # visualize_prediction(model, "test_image.jpg")

