In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms, datasets
from torchvision.transforms import Resize, Compose, ToTensor
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None, subset='train'):
        """
        root_dir: 데이터셋의 최상위 디렉터리 경로
        transform: 이미지에 적용할 전처리
        """
        self.root_dir = root_dir
        self.transform = transform
        self.subset = subset
        self.filenames = os.listdir(os.path.join(root_dir, "original"))  # 원본 이미지 폴더를 기준으로 파일 이름 리스트 생성

        if subset == 'train':
            self.filenames = self.filenames[:5000]
        elif subset == 'test':
            self.filenames = self.filenames[5000:5100]

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        img_name = self.filenames[idx]
        original_path = os.path.join(self.root_dir, "original", img_name)  # 원본 이미지 경로
        masked_path = os.path.join(self.root_dir, "overlay", img_name)      # 마스킹된 이미지 경로
        mask_path = os.path.join(self.root_dir, "mask", img_name)          # 마스크 이미지 경로

        original_img = Image.open(original_path).convert('RGB')
        masked_img = Image.open(masked_path).convert('RGB')
        mask_img = Image.open(mask_path).convert('L')

        resize = Resize((256, 256))
        mask_img = resize(mask_img)
        
        mask_img = np.array(mask_img)
        mask_img = (mask_img > 128).astype(np.float32)  # 흰색은 1, 검은색은 0
        mask_img = torch.from_numpy(mask_img).unsqueeze(0)

        if self.transform:
            original_img = self.transform(original_img)
            masked_img = self.transform(masked_img)

        return masked_img, mask_img, original_img


transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

root_dir = 'C:/Users/Serin Kim/workspace/AISYS/data'
train_dataset = CustomDataset(root_dir=root_dir, transform=transform, subset='train')
test_dataset = CustomDataset(root_dir=root_dir, transform=transform, subset='test')

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [3]:
def check_dimensions(dataloader):
    # 데이터 로더에서 첫 번째 배치를 가져옵니다.
    masked_imgs, masks, _ = next(iter(dataloader))
    
    # 마스킹된 이미지와 마스크의 차원을 출력합니다.
    print("Masked images dimensions:", masked_imgs.shape)
    print("Masks dimensions:", masks.shape)

# train_dataloader를 사용하여 차원을 확인합니다.
check_dimensions(train_dataloader)

Masked images dimensions: torch.Size([32, 3, 256, 256])
Masks dimensions: torch.Size([32, 1, 256, 256])


Model

In [None]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(4, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            # 추가적인 레이어
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 3, kernel_size=3, stride=1, padding=1),
            nn.Tanh(),
            # 추가적인 레이어
        )

    def forward(self, masked_img, mask):
        x = torch.cat([masked_img, mask], dim=1)  # 마스킹된 이미지와 마스크를 채널 차원에서 결합
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# 판별기 정의
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Flatten(),
            nn.Linear(64 * 128 * 128, 1),
            nn.Sigmoid()
        )

    def forward(self, img):
        return self.main(img)


Train

In [None]:
def train(generator, discriminator, criterion, g_optimizer, d_optimizer, dataloader, device, num_epochs, interval):
    generator.train()
    discriminator.train()
    saved_images = []

    for epoch in range(num_epochs):
        for i, (masked_imgs, masks, original_imgs) in enumerate(dataloader):
            # 장치 할당
            masked_imgs = masked_imgs.to(device)
            masks = masks.to(device)
            original_imgs = original_imgs.to(device)
            real_labels = torch.ones(original_imgs.size(0), 1).to(device)
            fake_labels = torch.zeros(original_imgs.size(0), 1).to(device)

            # 진짜 이미지로 디스크리미네이터 학습
            outputs = discriminator(original_imgs)
            d_loss_real = criterion(outputs, real_labels)
            real_score = outputs

            # 가짜 이미지 생성 및 디스크리미네이터 학습
            fake_images = generator(masked_imgs, masks)
            outputs = discriminator(fake_images.detach())
            d_loss_fake = criterion(outputs, fake_labels)
            fake_score = outputs

            # 디스크리미네이터 손실 계산 및 역전파
            d_loss = d_loss_real + d_loss_fake
            discriminator.zero_grad()
            d_loss.backward()
            d_optimizer.step()

            # 생성기 학습
            outputs = discriminator(fake_images)
            g_loss = criterion(outputs, real_labels)
            
            generator.zero_grad()
            g_loss.backward()
            g_optimizer.step()
            
            if (i + 1) % 100 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(dataloader)}], '
                      f'D Loss: {d_loss.item()}, G Loss: {g_loss.item()}, '
                      f'Real Score: {real_score.mean().item()}, Fake Score: {fake_score.mean().item()}')

        if (epoch + 1) % interval == 0:
            with torch.no_grad():
                saved_images.append(generator(masked_imgs, masks).detach().cpu())
    
    fig, axes = plt.subplots(nrows=1, ncols=len(saved_images), figsize=(15, 5))
    for img, ax in zip(saved_images, axes):
        ax.imshow(img[0].permute(1, 2, 0))  # 첫 번째 이미지의 [C, H, W]를 [H, W, C]로 변경
        ax.axis('off')
    plt.show()

def test(generator, dataloader, device):
    generator.eval()
    with torch.no_grad():
        for i, (masked_imgs, masks, _) in enumerate(dataloader):
            masked_imgs = masked_imgs.to(device)
            masks = masks.to(device)
            fake_images = generator(masked_imgs, masks)
            # 여기서 가짜 이미지를 평가하거나 저장할 수 있습니다.
            if i == 0:  # 예시로 한 배치의 결과만 저장
                torchvision.utils.save_image(fake_images, 'test_samples.png', normalize=True)


In [None]:
# 모델, 옵티마이저, 손실 함수 초기화
generator = Generator().to(device)
discriminator = Discriminator().to(device)
g_optimizer = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
d_optimizer = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))
criterion = nn.BCELoss()

# 학습 및 테스트 실행
train(generator, discriminator, criterion, g_optimizer, d_optimizer, train_dataloader, device, num_epochs=50, interval = 5)
test(generator, test_dataloader, device)



DMFN Implementation

In [1]:
#DMFN
from models import *
import argparse
import os
from utils import get_config, prepare_sub_folder, _write_images, write_html
from data import create_dataset, create_dataloader
import math
from models.inpainting_model import InpaintingModel
#from tensorboardX import SummaryWriter
import torch


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Setup training configurations")

    # 기본 설정
    parser.add_argument('--fineSize', type=int, default=256, help='image dimensions')
    parser.add_argument('--img_shape', type=int, nargs=3, default=[3, 256, 256], help='[channel, height, width]')
    parser.add_argument('--is_train', type=bool, default=True, help='training mode')
    parser.add_argument('--gpu_ids', type=int, nargs='*', default=[], help='GPU IDs to use')
    parser.add_argument('--pretrained_model_G', type=str, default='outputs/celeba-hq/checkpoints/58000_G.pth', help='path to generator pretrained model')
    parser.add_argument('--pretrained_model_D', type=str, default='', help='path to discriminator pretrained model')
    parser.add_argument('--val_iter', type=int, default=200, help='validation interval')
    parser.add_argument('--log_iter', type=int, default=50, help='logging interval')
    parser.add_argument('--save_image_iter', type=int, default=500, help='save image interval')
    parser.add_argument('--save_model_iter', type=int, default=2000, help='save model interval')
    parser.add_argument('--display_num', type=int, default=8, help='number of images to display')
    parser.add_argument('--f',type=str, default=r"c:\Users\Serin Kim\AppData\Roaming\jupyter\runtime\kernel-v2-8212WMzxoKBEaBMj.json")

    # 생성기 네트워크 설정
    parser.add_argument('--which_model_G', type=str, default='inpainting_resnet', help='type of generator model')
    parser.add_argument('--in_nc', type=int, default=4, help='input channel number for G')
    parser.add_argument('--out_nc', type=int, default=3, help='output channel number for G')
    parser.add_argument('--nf', type=int, default=64, help='number of filters for G and D')
    parser.add_argument('--n_res', type=int, default=8, help='number of residual blocks in G')

    # 판별기 네트워크 설정
    parser.add_argument('--which_model_D', type=str, default='discriminator', help='type of discriminator model')
    parser.add_argument('--in_nc_D', type=int, default=3, help='input channel number for D')

    # 훈련 옵션

    parser.add_argument('--train',type=bool, default=True)
    # Adding arguments specifically related to training
    parser.add_argument('--pixel_weight', type=float, default=1, help='Weight for pixel loss')
    parser.add_argument('--pixel_criterion', type=str, default='l1', choices=['l1', 'ml1'], help='Criterion for pixel loss')
    parser.add_argument('--feature_weight', type=float, default=25, help='Weight for feature loss')
    parser.add_argument('--feature_criterion', type=str, default='l1', help='Criterion for feature loss')
    parser.add_argument('--center_weight', type=float, default=1, help='Weight for center loss')
    parser.add_argument('--dis_feature_weight', type=float, default=5, help='Weight for discriminator feature matching loss')
    parser.add_argument('--dis_feature_criterion', type=str, default='l1', help='Criterion for discriminator feature matching loss')
    parser.add_argument('--gan_weight', type=float, default=0.003, help='Weight for GAN loss')
    parser.add_argument('--gan_type', type=str, default='vanilla', choices=['vanilla', 'lsgan'], help='Type of GAN loss')
    parser.add_argument('--lr_G', type=float, default=0.0002, help='Learning rate for G')
    parser.add_argument('--lr_D', type=float, default=0.0002, help='Learning rate for D')
    parser.add_argument('--lr_policy', type=str, default='MultiStepLR', help='Learning rate policy')
    parser.add_argument('--lr_steps', type=int, nargs='*', default=[100000], help='Steps at which the learning rate is decayed')
    parser.add_argument('--lr_gamma', type=float, default=0.5, help='Gamma rate for learning rate decay')

    args = parser.parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = '1'
    #config = get_config(args.config)
    torch.backends.cudnn.benchmark = True

    # tensorboard
    #model_name = os.path.splitext(os.path.basename(args))[0].split('_')[0]
    #train_writer = SummaryWriter(os.path.join(args.output_path + '/logs', model_name))

    #output_dir = os.path.join(args.output_path + '/outputs', model_name)
    #    checkpoint_dir, image_dir = prepare_sub_folder(output_dir)
    #args['checkpoint_dir'] = checkpoint_dir

    print(args)
    args_dict = vars(args)
    print(args_dict['train'])
    model = InpaintingModel(args_dict)




  from .autonotebook import tqdm as notebook_tqdm
  if load_path_G is not '':
  if load_path_D is not '':


Namespace(center_weight=1, dis_feature_criterion='l1', dis_feature_weight=5, display_num=8, f='"c:\\Users\\Serin Kim\\AppData\\Roaming\\jupyter\\runtime\\kernel-v2-82123qJ19yl6ROW3.json"', feature_criterion='l1', feature_weight=25, fineSize=256, gan_type='vanilla', gan_weight=0.003, gpu_ids=[], img_shape=[3, 256, 256], in_nc=4, in_nc_D=3, is_train=True, log_iter=50, lr_D=0.0002, lr_G=0.0002, lr_gamma=0.5, lr_policy='MultiStepLR', lr_steps=[100000], n_res=8, nf=64, out_nc=3, pixel_criterion='l1', pixel_weight=1, pretrained_model_D='', pretrained_model_G='outputs/celeba-hq/checkpoints/58000_G.pth', save_image_iter=500, save_model_iter=2000, train=True, val_iter=200, which_model_D='discriminator', which_model_G='inpainting_resnet')
True


  if load_path_G is not '':
  if load_path_D is not '':
  if load_path_G is not '':
  if load_path_D is not '':
  if load_path_G is not '':
  if load_path_D is not '':


NotImplementedError: Unsupported generator model: inpainting_resnet

: 

In [16]:
args['net_G']



TypeError: 'Namespace' object is not subscriptable