## import

In [5]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import pandas as pd

# load data

### define directory path

In [6]:
import os
import shutil

# 데이터셋 경로 설정하기
dataset_path = './usg1_dataset/train_images/'
dir = "./usg1_dataset/"
image_dir = dir+'train_images/'
train_csv_dir = dir+'train.csv'
train_copy_dir = './usg1_dataset/train_copy/'
test_dir = './usg1_dataset/test_images/'
split_label_dir = dir + 'train_split_label/'

### split train/valid/test

In [7]:
os.makedirs(split_label_dir, exist_ok=True)

train_dir = split_label_dir + 'train/'
valid_dir = split_label_dir + 'valid/'
test_dir = split_label_dir + 'test/'

os.makedirs(split_label_dir + '/train', exist_ok=True)
os.makedirs(split_label_dir + '/valid', exist_ok=True)
os.makedirs(split_label_dir + '/test', exist_ok=True)


shutil.rmtree(train_dir)
shutil.rmtree(valid_dir)
shutil.rmtree(test_dir)

os.makedirs(split_label_dir + '/train', exist_ok=True)
os.makedirs(split_label_dir + '/valid', exist_ok=True)
os.makedirs(split_label_dir + '/test', exist_ok=True)


# 레이블 이름 설정하기
train_labels = pd.read_csv(train_csv_dir)
labels = train_labels['label'].to_list() # 레이블 리스트
labels_str = []
for label in labels:
    labels_str.append(str(label))

# 하위 폴더 생성하기
for label in labels_str:
    os.makedirs(train_dir + label, exist_ok=True)
    os.makedirs(valid_dir + label, exist_ok=True)

### copy : train, test

In [8]:
import os
from natsort import natsorted

# 파일 이름 변경하기 ""

# 1) copy : train_images => train_copy (and test)

# train
#os.makedirs(train_copy_dir)
shutil.rmtree(train_copy_dir)
os.makedirs(train_copy_dir)

filenames = natsorted(os.listdir(image_dir))
for filename in filenames:
    if filename.endswith('.jpg'):  # 변경할 파일 확장자 지정

        name = os.path.join(image_dir, filename)
        shutil.copy(name, train_copy_dir)


# test
test_dir = './usg1_dataset/test_images/'
split_test_dir = split_label_dir + 'test/'

test_filenames = natsorted(os.listdir(test_dir))

for filename in test_filenames:
    if filename.endswith('.jpg'):  # 변경할 파일 확장자 지정

        name = os.path.join(test_dir, filename)
        shutil.copy(name, split_test_dir)

### rename : train

In [9]:
filenames = natsorted(os.listdir(train_copy_dir))

# 2) rename
for idx, filename in enumerate(filenames):
        label = labels_str[idx]

        name = os.path.join(train_copy_dir, filename)
        labeled_name = os.path.join(train_copy_dir, os.path.splitext(filename)[0]+ '_' +label+'.jpg')
        os.rename(name, labeled_name)

### split : train, valid

In [10]:
# 2) split 
from sklearn.model_selection import train_test_split

filenames = natsorted(os.listdir(train_copy_dir))

train_filenames, valid_filenames = train_test_split(filenames, test_size=0.1, random_state=42)

train_filenames = natsorted(train_filenames)
valid_filenames = natsorted(valid_filenames)

print('train\n',train_filenames[:5])
print('valid\n',valid_filenames[:5])

train
 ['000000_1.jpg', '000001_8.jpg', '000002_0.jpg', '000003_1.jpg', '000004_8.jpg']
valid
 ['000008_9.jpg', '000014_1.jpg', '000019_0.jpg', '000031_0.jpg', '000033_6.jpg']


In [11]:
# 이미지 파일 이동하기

# train
for file in train_filenames:
    if file.endswith('.jpg'):
        filename = os.path.splitext(file)[0]
        label = filename.split('_')[1]
        shutil.copy(train_copy_dir+ '/' +file, train_dir + label + '/' + file)

# valid
for file in valid_filenames:
    if file.endswith('.jpg'):
        filename = os.path.splitext(file)[0]
        label = filename.split('_')[1]
        shutil.copy(train_copy_dir+ '/' +file, valid_dir + label + '/' + file)

# Model

### transforms

In [12]:
import tqdm

import tqdm

from torchvision.transforms import Compose, ToTensor, Resize
from torchvision.transforms import RandomHorizontalFlip, RandomCrop, Normalize, RandomVerticalFlip, RandomRotation, RandomResizedCrop, RandomGrayscale, RandomSolarize
from torch.optim.adam import Adam

transforms = Compose([
   Resize(224),
   RandomCrop((224, 224), padding=4),
   #RandomGrayscale(p=0.75),
   #RandomSolarize,
   #RandomVerticalFlip(), # 상하반전
   # RandomRotation(180), # 회전
   RandomHorizontalFlip(p=0.5), # 좌우반전
   ToTensor(),
   Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261))
])

### load DataLoader

In [13]:
# 데이터셋 불러오기
train_dataset = datasets.ImageFolder(root=train_dir,
                                      transform=transforms)

valid_dataset = datasets.ImageFolder(root=valid_dir,
                                      transform=transforms)


### Define model

In [14]:
import torch
import torch.nn as nn
from torchvision.models.densenet import densenet121



device = torch.device('mps')

net = densenet121
model = net(pretrained=True) 
# densenet121 : (classifier): Linear(in_features=1024, out_features=1000, bias=True)

# fc
'''fc = nn.Sequential( # ❷ 분류층의 정의
       nn.Linear(1000, 1000),
       nn.ReLU(),
       nn.Dropout(), #❷ 드롭아웃층 정의
       nn.Linear(1000, 1000),
       nn.ReLU(),
       nn.Dropout(),
       nn.Linear(1000, 10),
   )


model.classifier = fc # ➍ VGG의 classifier를 덮어씀'''
model.to(device)



DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

In [26]:
# parameter
lr = 1e-4
epochs = 10
batch_size = 16

# model describe
#model_name = f"0311_model_{str(net)[10:21]}_(lr={lr}_epochs={epochs}_batch_size={batch_size})"


import datetime
now = str(datetime.datetime.now())[:16]
model_name = f"{now}_{str(net)[10:21]}_(lr={lr}_epochs={epochs}_batch_size={batch_size})"

print(model_name)

2023-03-11 04:47_densenet121_(lr=0.0001_epochs=10_batch_size=16)


### fit

In [23]:
from torch.optim.adam import Adam

optim = Adam(model.parameters(), lr=lr)


# 데이터로더 생성하기
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)
valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)


for epoch in range(epochs):
   iterator = tqdm.tqdm(train_loader)
   for data, label in iterator:
       # 최적화를 위해 기울기를 초기화
       optim.zero_grad()

       # 모델의 예측값
       preds = model(data.to(device))

       # 손실 계산 및 역전파
       loss = nn.CrossEntropyLoss()(preds, label.to(device))
       loss.backward()
       optim.step()

       iterator.set_description(f"epoch:{epoch+1} loss:{loss.item()}")


# save model
torch.save(model.state_dict(), model_name+".pth")

epoch:1 loss:0.011602206155657768: 100%|██████████| 716/716 [04:04<00:00,  2.93it/s]
epoch:2 loss:2.1642894744873047: 100%|██████████| 716/716 [04:06<00:00,  2.91it/s]  
epoch:3 loss:0.1884927898645401: 100%|██████████| 716/716 [04:05<00:00,  2.91it/s]  
epoch:4 loss:0.23507782816886902: 100%|██████████| 716/716 [04:04<00:00,  2.92it/s]  
epoch:5 loss:0.1390557885169983: 100%|██████████| 716/716 [04:04<00:00,  2.92it/s]   
epoch:6 loss:0.6421804428100586: 100%|██████████| 716/716 [04:03<00:00,  2.94it/s]   
epoch:7 loss:0.03201158717274666: 100%|██████████| 716/716 [04:05<00:00,  2.92it/s]  
epoch:8 loss:0.415319561958313: 100%|██████████| 716/716 [04:04<00:00,  2.92it/s]    
epoch:9 loss:0.02925320714712143: 100%|██████████| 716/716 [04:01<00:00,  2.96it/s]  
epoch:10 loss:0.21219846606254578: 100%|██████████| 716/716 [04:00<00:00,  2.97it/s]  


### Accuracy

In [25]:
print(model_name)
model.load_state_dict(torch.load(model_name+".pth", map_location=device))

num_corr = 0

with torch.no_grad():
   for data, label in valid_loader:

       output = model(data.to(device))
       preds = output.data.max(1)[1]
       corr = preds.eq(label.to(device).data).sum().item()
       num_corr += corr

   print(f"Accuracy:{num_corr/len(valid_dataset)}")

0311_model_densenet121_(lr=0.0001_epochs=10_batch_size=16)
Accuracy:0.9512578616352201


densenet121  
- (lr=0.0001_epochs=1_batch_size=16) Accuracy:0.9158805031446541
- 

resnet34
- (batch=16/epochs=10) Accuracy:0.9504716981132075  
- (batch=16/epochs=30) Accuracy:0.9559748427672956 👍


resnet50  
- (batch=16/epochs=10) Accuracy:0.9323899371069182
- (epochs=30_batch_size=32) Accuracy:0.9520440251572327

 
resnet152   
- 

### Submit

In [28]:
from torch.utils.data import Dataset
from PIL import Image

class UnlabeledDataset(Dataset):
    def __init__(self, data_path, transform=None):
        self.data_path = data_path
        self.transform = transform

        self.filelist = natsorted(os.listdir(data_path))

    def __len__(self):
        return len(self.filelist)

    def __getitem__(self, idx):
        img_path = os.path.join(self.data_path, self.filelist[idx])
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image

In [29]:
test_dataset = UnlabeledDataset(data_path=split_test_dir, transform=transforms)

batch_size = batch_size

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=batch_size,
                                           shuffle=False)



# predict
model.load_state_dict(torch.load(model_name+".pth", map_location=device))

sub = []

with torch.no_grad():
   for data in test_loader:

       output = model(data.to(device))
       preds = output.data.max(1)[1]

       preds = preds.cpu()
       preds = preds.numpy()

       for i in range(len(preds)):
        sub.append(preds[i])

submit_df = pd.DataFrame(data=natsorted(os.listdir(split_test_dir)), columns=['image_name'])
submit_df['label'] = sub

In [30]:
submit_df.to_csv(f'{model_name}.csv')

In [31]:
model_name

'2023-03-11 04:47_densenet121_(lr=0.0001_epochs=10_batch_size=16)'