In [2]:
import torch

# GPU 사용 가능 여부 확인
print(f"CUDA is available: {torch.cuda.is_available()}")

# 사용 가능한 GPU 개수 확인
print(f"Number of available GPUs: {torch.cuda.device_count()}")

if torch.cuda.is_available():
    # 현재 사용 중인 GPU의 이름 확인
    print(f"Current GPU: {torch.cuda.get_device_name(0)}")
    
    # 현재 GPU의 메모리 사용량 확인 (단위: bytes)
    print(f"GPU Memory Usage:")
    print(f"Allocated: {torch.cuda.memory_allocated(0)}")
    print(f"Cached: {torch.cuda.memory_reserved(0)}")
else:
    print("GPU is not available. CPU will be used instead.")

# 현재 PyTorch가 사용하도록 설정된 장치 확인
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"PyTorch is using: {device}")

CUDA is available: True
Number of available GPUs: 1
Current GPU: NVIDIA GeForce RTX 4090
GPU Memory Usage:
Allocated: 0
Cached: 0
PyTorch is using: cuda


In [3]:
import os
import cv2
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from ipywidgets import interact
from torch.utils.data import DataLoader
from torchvision import models, transforms
from torchvision.utils import make_grid
from util import CLASS_NAME_TO_ID, CLASS_ID_TO_NAME, visualize, save_model
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from collections import defaultdict
import time

In [4]:
data_dir = './DataSet/'
data_df = pd.read_csv(os.path.join(data_dir, 'df.csv'))


In [5]:
class Detection_dataset():
    def __init__(self, data_dir, phase, transformer=None):
        self.data_dir = data_dir
        self.phase = phase
        self.data_df = pd.read_csv(os.path.join(self.data_dir, 'df.csv'))
        self.image_files = [fn for fn in os.listdir(os.path.join(self.data_dir, phase)) if fn.endswith('jpg')]
        self.transformer = transformer

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, index):
        filename, image = self.get_image(index)
        bboxes, class_ids = self.get_label(filename)
        img_H, img_W, _ = image.shape
        if self.transformer:
            image = self.transformer(image)
            _, img_H, img_W = image.shape
        bboxes[:, [0, 2]] *= img_W
        bboxes[:, [1, 3]] *= img_H
        target = {}
        target['boxes'] = torch.Tensor(bboxes).float()
        target['labels'] = torch.Tensor(class_ids).long()
        return image, target, filename

    def get_image(self, index):
        filename = self.image_files[index]
        image_path = os.path.join(self.data_dir, self.phase, filename)
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        return filename, image

    def get_label(self, filename):
        image_id = filename.split('.')[0]
        meta_data = data_df[data_df['ImageID'] == image_id]
        cate_names = meta_data['LabelName'].values
        class_ids = [CLASS_NAME_TO_ID[cate_name] for cate_name in cate_names]
        bboxes = meta_data[['XMin', 'XMax', 'YMin', 'YMax']].values
        bboxes[:, [1, 2]] = bboxes[:, [2, 1]]
        return bboxes, class_ids

In [6]:
dataset = Detection_dataset(data_dir=data_dir, phase='train', transformer=None)

In [7]:
def collate_fn(batch):
    image_list = []
    target_list = []
    filename_list = []

    for img, target, filename in batch:
        image_list.append(img)
        target_list.append(target)
        filename_list.append(filename)
    
    return image_list, target_list, filename_list

In [8]:
# def build_dataloader(data_dir, batch_size=4, image_size=448):
#     transformer = transforms.Compose([
#         transforms.ToTensor(),
#         transforms.Resize(size=(image_size, image_size)),
#         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
#     ])
#     dataloaders = {}
#     train_dataset = Detection_dataset(data_dir=data_dir, phase='train', transformer=transformer)
#     dataloaders['train'] = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
#     val_dataset = Detection_dataset(data_dir=data_dir, phase='val', transformer=transformer)
#     dataloaders['val'] = DataLoader(train_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)
#     return dataloaders
def build_dataloader(data_dir, batch_size=4, image_size=448):
    transformer = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize(size=(image_size, image_size)),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    dataloaders = {}
    train_dataset = Detection_dataset(data_dir=data_dir, phase='train', transformer=transformer)
    dataloaders['train'] = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    val_dataset = Detection_dataset(data_dir=data_dir, phase='val', transformer=transformer)
    dataloaders['val'] = DataLoader(train_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)
    return dataloaders

In [9]:
BATCH_SIZE = 8
trainset = Detection_dataset(data_dir=data_dir, phase='train', transformer=transformer)
tranloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)

NameError: name 'transformer' is not defined

In [10]:
# def build_model(num_classes):
#     model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
#     in_features = model.roi_heads.box_predictor.cls_score.in_features
#     model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
#     return model
def build_model(num_classes):
    model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

In [11]:
NUM_CLASSES = 2
model = build_model(num_classes=NUM_CLASSES)
model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [12]:
# def train_one_epoch(dataloaders, model, optimizer, device):
#     train_loss = defaultdict(float)
#     val_loss = defaultdict(float)
#     model.train()

#     for phase in ['train', 'val']:
#         for index, batch in enumerate(dataloaders[phase]):
#             images = batch[0]
#             targets = batch[1]
#             filenames = batch[2]

#             images = list(image for image in images)
#             targets = [{k: v for k, v in t.items()} for t in targets]

#             with torch.set_grad_enabled(phase == 'train'):
#                 loss = model(images, targets)

#         total_loss = sum(each_loss for each_loss in loss.values())

#         if phase == 'train':
#             optimizer.zero_grad()
#             total_loss.backward()
#             optimizer.step()

#             if(index > 0) and (index % VERBOSE_FREQ == 0):
#                 text = f"{index}/{len(dataloaders[phase])} - "
#                 for k, v in loss.item():
#                     text += f"{k}: {v.item():.4f} "
#                 print(text)

#             for k, v, in loss.items():
#                 train_loss[k] += v.item()
#             train_loss['total_loss'] += total_loss.item()
#         else:
#             for k, v, in loss.items():
#                 val_loss[k] += v.item()
#             val_loss['total_loss'] += total_loss.item()
        
#     for k in train_loss.keys():
#         train_loss[k] /= len(dataloaders['train'])
#         val_loss[k] /= len(dataloaders['val'])
#     return train_loss, val_loss
def train_one_epoch(dataloaders, model, optimizer, device):
    train_loss = defaultdict(float)
    val_loss = defaultdict(float)
    model.train()

    for phase in ['train', 'val']:
        for index, batch in enumerate(dataloaders[phase]):
            images = batch[0]
            targets = batch[1]
            filenames = batch[2]

            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            with torch.set_grad_enabled(phase == 'train'):
                loss_dict = model(images, targets)
                loss = sum(loss for loss in loss_dict.values())

            if phase == 'train':
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                if(index > 0) and (index % VERBOSE_FREQ == 0):
                    text = f"{index}/{len(dataloaders[phase])} - "
                    for k, v in loss_dict.items():
                        text += f"{k}: {v.item():.4f} "
                    print(text)

            for k, v in loss_dict.items():
                if phase == 'train':
                    train_loss[k] += v.item()
                else:
                    val_loss[k] += v.item()
            
            if phase == 'train':
                train_loss['total_loss'] += loss.item()
            else:
                val_loss['total_loss'] += loss.item()
        
    for k in train_loss.keys():
        train_loss[k] /= len(dataloaders['train'])
        val_loss[k] /= len(dataloaders['val'])
    return train_loss, val_loss

In [13]:
# is_cuda = False

# NUM_CLASSES = 2
# IMAGE_SIZE = 448
# BATCH_SIZE = 8
# VERBOSE_FREQ = 100
# DEVICE = torch.device('cuda' if torch.cuda.is_available and is_cuda else 'cpu')

# dataloaders = build_dataloader(data_dir=data_dir, batch_size=BATCH_SIZE, image_size=IMAGE_SIZE)
# model = build_model(num_classes=NUM_CLASSES)
# model = model.to(DEVICE)
# optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
is_cuda = True  # CUDA 사용 설정

NUM_CLASSES = 2
IMAGE_SIZE = 448
BATCH_SIZE = 8
VERBOSE_FREQ = 100
DEVICE = torch.device('cuda' if torch.cuda.is_available() and is_cuda else 'cpu')

dataloaders = build_dataloader(data_dir=data_dir, batch_size=BATCH_SIZE, image_size=IMAGE_SIZE)
model = build_model(num_classes=NUM_CLASSES)
model = model.to(DEVICE)
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [17]:
from datetime import datetime
datetime.now()

datetime.datetime(2024, 7, 28, 12, 20, 23, 511292)

In [None]:
# num_epochs = 1
# train_losses = []
# val_losses = []

# for epoch in range(num_epochs):
#     train_loss, val_loss = train_one_epoch(dataloaders, model, optimizer, DEVICE)
#     train_losses.append(train_loss)
#     val_losses.append(val_loss)

#     print(f"epoch:{epoch+1}/{num_epochs} - Train Loss:{train_loss['total_loss']:.4f}, Val Loss:{val_loss['total_loss']:.4f}")
#     if(epoch+1) % 10 == 0:
#         save_model(model.stat_dict(), f"model_{epoch+1}.pth")
timelist = []

num_epochs = 50
train_losses = []
val_losses = []
fstartTime = time.time()
for epoch in range(num_epochs):
    startTime = time.time()
    train_loss, val_loss = train_one_epoch(dataloaders, model, optimizer, DEVICE)
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    endTime = time.time()
    print(f"epoch:{epoch+1}/{num_epochs} - Train Loss:{train_loss['total_loss']:.4f}, Val Loss:{val_loss['total_loss']:.4f}")
    print('='*10, endTime - startTime, '='*10)
    timelist.append(endTime - startTime)
    if(epoch+1) % 10 == 0:
        save_model(model.state_dict(), f"model_{epoch+1}.pth")
        print('='*10, endTime - fstartTime, '='*10)
        

100/1713 - loss_classifier: 0.1031 loss_box_reg: 0.1061 loss_objectness: 0.0639 loss_rpn_box_reg: 0.0308 
200/1713 - loss_classifier: 0.0718 loss_box_reg: 0.0996 loss_objectness: 0.0080 loss_rpn_box_reg: 0.0041 
300/1713 - loss_classifier: 0.0548 loss_box_reg: 0.0559 loss_objectness: 0.0144 loss_rpn_box_reg: 0.0050 
400/1713 - loss_classifier: 0.0333 loss_box_reg: 0.0314 loss_objectness: 0.0066 loss_rpn_box_reg: 0.0061 
500/1713 - loss_classifier: 0.0620 loss_box_reg: 0.0666 loss_objectness: 0.0117 loss_rpn_box_reg: 0.0052 
600/1713 - loss_classifier: 0.0395 loss_box_reg: 0.0531 loss_objectness: 0.0860 loss_rpn_box_reg: 0.0216 
700/1713 - loss_classifier: 0.0396 loss_box_reg: 0.0165 loss_objectness: 0.0056 loss_rpn_box_reg: 0.0033 
800/1713 - loss_classifier: 0.0858 loss_box_reg: 0.0665 loss_objectness: 0.0606 loss_rpn_box_reg: 0.0123 
900/1713 - loss_classifier: 0.0359 loss_box_reg: 0.0236 loss_objectness: 0.0103 loss_rpn_box_reg: 0.0029 
1000/1713 - loss_classifier: 0.0345 loss_box_r

In [None]:
tr_loss_classifier = []
tr_loss_box_reg = []
tr_loss_objectness = []
tr_loss_rpn_box_reg = []
tr_loss_total = []

for tr_loss in train_losses:
    tr_loss_classifier.append(tr_loss['loss_classifier'])
    tr_loss_box_reg.append(tr_loss['loss_box_reg'])
    tr_loss_objectness.append(tr_loss['loss_objectness'])
    tr_loss_rpn_box_reg.append(tr_loss['loss_rpn_box_reg'])
    tr_loss_total.append(tr_loss['total_loss'])

val_loss_classifier = []
val_loss_box_reg = []
val_loss_objectness = []
val_loss_rpn_box_reg = []
val_loss_total = []

for vl_loss in val_losses:
    val_loss_classifier.append(vl_loss['loss_classifier'])
    val_loss_box_reg.append(vl_loss['loss_box_reg'])
    val_loss_objectness.append(vl_loss['loss_objectness'])
    val_loss_rpn_box_reg.append(vl_loss['loss_rpn_box_reg'])
    val_loss_total.append(vl_loss['total_loss'])

In [None]:
plt.figure(figsize=(8, 4))
plt.plot(tr_loss_total, label="train_total_loss")
plt.plot(tr_loss_classifier, label="train_loss_classifier")
plt.plot(tr_loss_box_reg,  label="train_loss_box_reg")
plt.plot(tr_loss_objectness, label="train_loss_objectness")
plt.plot(tr_loss_rpn_box_reg,  label="train_loss_rpn_box_reg")

plt.plot(val_loss_total, label="val_total_loss")
plt.plot(val_loss_classifier, label="val_loss_classifier")
plt.plot(val_loss_box_reg,  label="val_loss_box_reg")
plt.plot(val_loss_objectness, label="val_loss_objectness")
plt.plot(val_loss_rpn_box_reg,  label="val_loss_rpn_box_reg")
plt.xlabel("epoch")
plt.ylabel("loss")
plt.grid("on")
plt.legend(loc='upper right')
plt.tight_layout()