In [12]:
import torch # 파이토치 기본 라이브러리

# torchvision : 데이터셋, 모델 아키텍처, 컴퓨터 비전의 이미지 변환 기능 제공
from torchvision import datasets # torchvision에서 제공하는 데이터셋
from torchvision import transforms # 이미지 변환기능을 제공하는 패키지

# torch.utils.data : 파이토치 데이터 로딩 유틸리티
from torch.utils.data import DataLoader # 모델 훈련에 사용할 수 있는 미니 배치 구성하고
                                        # 매 epoch마다 데이터를 샘플링, 병렬처리 등의 일을 해주는 함수

from torch.utils.data import random_split

import numpy as np
import matplotlib.pyplot as plt

from torch.utils.tensorboard import SummaryWriter
from copy import deepcopy
from sklearn.model_selection import train_test_split


In [13]:
!nvidia-smi

Mon Apr 17 05:27:48 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.161.03   Driver Version: 470.161.03   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [14]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [19]:
import glob
all_img_list = glob.glob('/kaggle/input/doabe/open/train/*/*')

In [20]:
test_img_list = glob.glob('/kaggle/input/doabe/open/test/*')

In [21]:
name_list = list(set([i.split('/')[-2] for i in all_img_list]))

In [174]:
name_list

['오타공',
 '피스',
 '석고수정',
 '가구수정',
 '울음',
 '틈새과다',
 '터짐',
 '몰딩수정',
 '이음부불량',
 '걸레받이수정',
 '오염',
 '면불량',
 '반점',
 '녹오염',
 '창틀,문틀수정',
 '꼬임',
 '들뜸',
 '곰팡이',
 '훼손']

In [23]:
label_dict = {v:i for i, v in enumerate(name_list)}
label_dict

{'오타공': 0,
 '피스': 1,
 '석고수정': 2,
 '가구수정': 3,
 '울음': 4,
 '틈새과다': 5,
 '터짐': 6,
 '몰딩수정': 7,
 '이음부불량': 8,
 '걸레받이수정': 9,
 '오염': 10,
 '면불량': 11,
 '반점': 12,
 '녹오염': 13,
 '창틀,문틀수정': 14,
 '꼬임': 15,
 '들뜸': 16,
 '곰팡이': 17,
 '훼손': 18}

In [24]:
train_list, valid_list = train_test_split(all_img_list, test_size= 0.2, random_state=42)

In [25]:
from torch.utils.data import Dataset
import glob
from PIL import Image # Image.open(path)

class Dobaehaja(Dataset):
    def __init__(self, img_list, transform):
        self.filepaths = img_list
        self.transform = transform


    def __len__(self):  # len(MyDataset)
        return len(self.filepaths)

    def __getitem__(self, index): # MyDataset[index]

        # (1) image 준비
        image_path = self.filepaths
        img_path = self.filepaths[index]
        image = Image.open(img_path)

        # https://pytorch.org/vision/stable/transforms.html
        # 이미지 변환 (파이토치 transforms 에서 제공하는 변환기들은 PIL, tensor 타입을 기대)
        transformed_image = self.transform(image) # Resize -> To Tensor

        # # (2) label 준비
        dir_label = img_path.split('/')[-2]
        name_list = list(set([i.split('/')[-2] for i in self.filepaths]))
        label_dict = {v:i for i, v in enumerate(name_list)}
        label = label_dict[dir_label]
        

        return transformed_image, label

In [26]:
class Dobaehaja_test(Dataset):
    def __init__(self, img_list, transform):
        self.filepaths = img_list
        self.transform = transform


    def __len__(self):  # len(MyDataset)
        return len(self.filepaths)

    def __getitem__(self, index): # MyDataset[index]

        # (1) image 준비
        image_path = self.filepaths
        img_path = self.filepaths[index]
        image = Image.open(img_path)

        # https://pytorch.org/vision/stable/transforms.html
        # 이미지 변환 (파이토치 transforms 에서 제공하는 변환기들은 PIL, tensor 타입을 기대)
        transformed_image = self.transform(image) # Resize -> To Tensor

        # # # (2) label 준비
        # dir_label = img_path.split('/')[-2]
        # name_list = list(set([i.split('/')[-2] for i in self.filepaths]))
        # label_dict = {v:i for i, v in enumerate(name_list)}
        # label = label_dict[dir_label]
        

        return transformed_image

In [27]:
from torchvision import datasets # torchvision에서 제공하는 데이터셋
from torchvision import transforms # 이미지 변환기능을 제공하는 패키지
transform = transforms.Compose([transforms.Resize([224, 224]), 
                                transforms.RandomHorizontalFlip(p=0.3),
                                transforms.ToTensor()])

In [28]:
dataset = Dobaehaja(img_list = all_img_list, transform=transform)

In [None]:
train

In [176]:
train_set = Dobaehaja(img_list = train_list, transform=transform)
valid_set = Dobaehaja(img_list = valid_list, transform=transform)

In [179]:
train_set[0][0]

tensor([[[0.7020, 0.6784, 0.6863,  ..., 0.5608, 0.5647, 0.5608],
         [0.6941, 0.6941, 0.6745,  ..., 0.5608, 0.5647, 0.5608],
         [0.6745, 0.6902, 0.6784,  ..., 0.5608, 0.5647, 0.5294],
         ...,
         [0.7647, 0.7529, 0.7529,  ..., 0.6157, 0.6235, 0.6078],
         [0.7529, 0.7490, 0.7569,  ..., 0.6196, 0.6157, 0.6000],
         [0.7373, 0.7529, 0.7529,  ..., 0.6235, 0.6000, 0.6039]],

        [[0.7098, 0.6863, 0.6941,  ..., 0.5373, 0.5451, 0.5412],
         [0.7020, 0.7020, 0.6824,  ..., 0.5412, 0.5451, 0.5412],
         [0.6824, 0.6980, 0.6863,  ..., 0.5412, 0.5451, 0.5098],
         ...,
         [0.7608, 0.7490, 0.7490,  ..., 0.6196, 0.6275, 0.6118],
         [0.7490, 0.7451, 0.7529,  ..., 0.6235, 0.6196, 0.6039],
         [0.7333, 0.7490, 0.7490,  ..., 0.6275, 0.6039, 0.6078]],

        [[0.7059, 0.6824, 0.6902,  ..., 0.5137, 0.5216, 0.5176],
         [0.6980, 0.6980, 0.6784,  ..., 0.5137, 0.5216, 0.5176],
         [0.6784, 0.6941, 0.6863,  ..., 0.5176, 0.5216, 0.

In [30]:
test_set = Dobaehaja_test(img_list = test_img_list, transform=transform)

In [31]:
len(test_set)

792

In [32]:
print(type(train_set), len(train_set))
print(type(valid_set), len(valid_set))

<class '__main__.Dobaehaja'> 2765
<class '__main__.Dobaehaja'> 692


In [33]:
print(train_set[0][0].size(), train_set[0][1])

torch.Size([3, 224, 224]) 9


## 3. 데이터 적재

In [34]:
batch_size = 4 # 16 -> 4
# dataloader = DataLoader(데이터셋, 배치사이즈, 셔플여부.....)
trainloader = DataLoader(train_set, batch_size=batch_size, shuffle=True) # 훈련용 50000개의 데이터를 100개씩 준비
validloader = DataLoader(valid_set, batch_size=batch_size, shuffle=False) # 검증용 10000개의 데이터를 100개씩 준비
testloader = DataLoader(test_set, batch_size=batch_size, shuffle=False) # 테스트용 10000개의 데이터를 100개씩 준비

In [35]:
print(type(trainloader), len(trainloader))
print(type(validloader), len(validloader))
print(type(testloader), len(testloader))

<class 'torch.utils.data.dataloader.DataLoader'> 692
<class 'torch.utils.data.dataloader.DataLoader'> 173
<class 'torch.utils.data.dataloader.DataLoader'> 198


In [36]:
train_iter = iter(trainloader)
images, labels = next(train_iter)
images.size(), labels.size()

(torch.Size([4, 3, 224, 224]), torch.Size([4]))

## 4. 모델 생성

In [9]:
import torch.nn as nn # 파이토치에서 제공하는 다양한 계층 (Linear Layer, ....)
import torch.optim as optim # 옵티마이저 (경사하강법...)
import torch.nn.functional as F # 파이토치에서 제공하는 함수(활성화 함수...)

In [46]:
import torchvision.models as models

# https://github.com/pytorch/vision/tree/6db1569c89094cf23f3bc41f79275c45e9fcb3f3/torchvision/models

model = models.vgg19_bn(weights=True)

In [47]:
for parameter in model.parameters():
  print(parameter.requires_grad)
# parameter들의 requires_grad 속성이 True라는 것은 
# 오차 역전파를 통해 gradient를 전달할 수 있는 상태(즉, 학습이 가능한 상태태)  

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


In [48]:
for parameter in model.parameters():
  parameter.requires_grad = False # 학습이 안되게 고정

for parameter in model.classifier.parameters():
  parameter.requires_grad = True # 학습이 가능한 상태

In [49]:
model.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [50]:
model.classifier[3] = nn.Linear(in_features=4096, out_features=512, bias=True)
model.classifier[6] = nn.Linear(in_features=512, out_features=19, bias=True)
model.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=512, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=512, out_features=19, bias=True)
)

In [51]:
model.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

In [52]:
out = model(images.to(device))
out.shape

torch.Size([4, 19])

## 5. 모델 컴파일

In [54]:
learning_rate = 0.0001
# 손실함수
loss_fn = nn.CrossEntropyLoss()

# 옵티마이저(최적화함수, 예:경사하강법)
# optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# 규제의 강도 설정 weight_decay
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.001)

# Learning Rate Schedule
# https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.ReduceLROnPlateau.html

# 모니터링하고 있는 값(예:valid_loss)의 최소값(min) 또는 최대값(max) patience 기간동안 줄어들지 않을 때(OnPlateau) lr에 factor(0.1)를 곱해주는 전략
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=4, verbose=True)

In [55]:
pip install torchsummary

[0mNote: you may need to restart the kernel to use updated packages.


In [56]:
from torchsummary import summary

In [57]:
# summary(모델, (채널, 인풋사이즈))
summary(model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
       BatchNorm2d-2         [-1, 64, 224, 224]             128
              ReLU-3         [-1, 64, 224, 224]               0
            Conv2d-4         [-1, 64, 224, 224]          36,928
       BatchNorm2d-5         [-1, 64, 224, 224]             128
              ReLU-6         [-1, 64, 224, 224]               0
         MaxPool2d-7         [-1, 64, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]          73,856
       BatchNorm2d-9        [-1, 128, 112, 112]             256
             ReLU-10        [-1, 128, 112, 112]               0
           Conv2d-11        [-1, 128, 112, 112]         147,584
      BatchNorm2d-12        [-1, 128, 112, 112]             256
             ReLU-13        [-1, 128, 112, 112]               0
        MaxPool2d-14          [-1, 128,

## 6. 모델훈련

In [60]:
def validate(model, validloader, loss_fn):
    total = 0   
    correct = 0
    valid_loss = 0
    valid_accuracy = 0

  # 전방향 예측을 구할 때는 gradient가 필요가 없음음
    with torch.no_grad():
        for images, labels in validloader: # 이터레이터로부터 next()가 호출되며 미니배치를 반환(images, labels)      
            # images, labels : (torch.Size([16, 3, 224, 224]), torch.Size([16]))
            # 0. Data를 GPU로 보내기
            images, labels = images.to(device), labels.to(device)

            # 1. 입력 데이터 준비
            # not Flatten !!
            # images.resize_(images.size()[0], 784)

            # 2. 전방향(Forward) 예측
            logit = model(images) # 예측 점수
            _, preds = torch.max(logit, 1) # 배치에 대한 최종 예측
            # preds = logit.max(dim=1)[1] 
            correct += int((preds == labels).sum()) # 배치 중 맞은 것의 개수가 correct에 누적
            total += labels.shape[0] # 배치 사이즈만큼씩 total에 누적

            loss = loss_fn(logit, labels)
            valid_loss += loss.item() # tensor에서 값을 꺼내와서, 배치의 loss 평균값을 valid_loss에 누적

        valid_accuracy = correct / total
  
    return valid_loss, valid_accuracy

In [63]:
writer = SummaryWriter()

def train_loop(model, trainloader, loss_fn, epochs, optimizer):  
    steps = 0
    steps_per_epoch = len(trainloader) 
    min_loss = 1000000
    max_accuracy = 0
    trigger = 0
    patience = 7 

    for epoch in range(epochs):
        model.train() # 훈련 모드
        train_loss = 0
        for images, labels in trainloader: # 이터레이터로부터 next()가 호출되며 미니배치를 반환(images, labels)
            steps += 1
            # images, labels : (torch.Size([16, 3, 224, 224]), torch.Size([16]))
            # 0. Data를 GPU로 보내기
            images, labels = images.to(device), labels.to(device)

            # 1. 입력 데이터 준비
            # not Flatten !!
            # images.resize_(images.shape[0], 784) 

            # 2. 전방향(forward) 예측
            predict = model(images) # 예측 점수
            loss = loss_fn(predict, labels) # 예측 점수와 정답을 CrossEntropyLoss에 넣어 Loss값 반환

            # 3. 역방향(backward) 오차(Gradient) 전파
            optimizer.zero_grad() # Gradient가 누적되지 않게 하기 위해
            loss.backward() # 모델파리미터들의 Gradient 전파

            # 4. 경사 하강법으로 모델 파라미터 업데이트
            optimizer.step() # W <- W -lr*Gradient

            train_loss += loss.item()
            if (steps % steps_per_epoch) == 0 : 
                model.eval() # 평가 모드 : 평가에서 사용하지 않을 계층(배치 정규화, 드롭아웃)들을 수행하지 않게 하기 위해서
                valid_loss, valid_accuracy = validate(model, validloader, loss_fn)

                # tensorboard 시각화를 위한 로그 이벤트 등록
                writer.add_scalar('Train Loss', train_loss/len(trainloader), epoch+1)
                writer.add_scalar('Valid Loss', valid_loss/len(validloader), epoch+1)
                writer.add_scalars('Train Loss and Valid Loss',
                                  {'Train' : train_loss/len(trainloader),
                                    'Valid' : valid_loss/len(validloader)}, epoch+1)
                writer.add_scalar('Valid Accuracy', valid_accuracy, epoch+1)
                # -------------------------------------------

                print('Epoch : {}/{}.......'.format(epoch+1, epochs),            
                      'Train Loss : {:.3f}'.format(train_loss/len(trainloader)), 
                      'Valid Loss : {:.3f}'.format(valid_loss/len(validloader)), 
                      'Valid Accuracy : {:.3f}'.format(valid_accuracy)            
                      )
        
                # Best model 저장    
                # option 1 : valid_loss 모니터링
                # if valid_loss < min_loss: # 바로 이전 epoch의 loss보다 작으면 저장하기
                #   min_loss = valid_loss
                #   best_model_state = deepcopy(model.state_dict())          
                #   torch.save(best_model_state, 'best_checkpoint.pth')     

                # option 2 : valid_accuracy 모니터링      
                if valid_accuracy > max_accuracy : # 바로 이전 epoch의 accuracy보다 크면 저장하기
                    max_accuracy = valid_accuracy
                    best_model_state = deepcopy(model.state_dict())          
                    torch.save(best_model_state, 'best_checkpoint.pth')  
                # -------------------------------------------

                # Early Stopping (조기 종료)
                if valid_loss > min_loss: # valid_loss가 min_loss를 갱신하지 못하면
                    trigger += 1
                    print('trigger : ', trigger)
                    if trigger > patience:
                        print('Early Stopping !!!')
                        print('Training loop is finished !!')
                        writer.flush()   
                        return
                    else:
                        trigger = 0
                        min_loss = valid_loss
                # -------------------------------------------

                # Learning Rate Scheduler
                scheduler.step(valid_loss)
                # -------------------------------------------

    writer.flush()
    return  

In [64]:
epochs = 55
%time train_loop(model, trainloader, loss_fn, epochs, optimizer)
writer.close()

Epoch : 1/55....... Train Loss : 1.710 Valid Loss : 1.670 Valid Accuracy : 0.523
Epoch : 2/55....... Train Loss : 1.290 Valid Loss : 1.747 Valid Accuracy : 0.553
Epoch : 3/55....... Train Loss : 1.041 Valid Loss : 1.628 Valid Accuracy : 0.618
Epoch : 4/55....... Train Loss : 0.918 Valid Loss : 1.795 Valid Accuracy : 0.588
Epoch : 5/55....... Train Loss : 0.795 Valid Loss : 1.764 Valid Accuracy : 0.613
Epoch : 6/55....... Train Loss : 0.722 Valid Loss : 1.904 Valid Accuracy : 0.614
Epoch : 7/55....... Train Loss : 0.605 Valid Loss : 1.835 Valid Accuracy : 0.637
Epoch : 8/55....... Train Loss : 0.577 Valid Loss : 2.008 Valid Accuracy : 0.621
Epoch 00008: reducing learning rate of group 0 to 1.0000e-05.
Epoch : 9/55....... Train Loss : 0.396 Valid Loss : 2.017 Valid Accuracy : 0.627
Epoch : 10/55....... Train Loss : 0.348 Valid Loss : 1.945 Valid Accuracy : 0.645
Epoch : 11/55....... Train Loss : 0.328 Valid Loss : 2.028 Valid Accuracy : 0.647
Epoch : 12/55....... Train Loss : 0.298 Valid

## 7. 모델 예측

In [67]:
test_iter = iter(testloader)
images = next(test_iter)


images = images.to(device)
print(images.size())
rnd_idx = 2
image = images[rnd_idx:rnd_idx+1]
# key_pt = key_pts[rnd_idx:rnd_idx+1]

with torch.no_grad():
    model.eval() # 배치 정규화가 들어가면서 전방향 연산이 학습시와는 달라지므로 반드시 eval() 넣어야 함
    logit = model(image)


torch.Size([4, 3, 224, 224])


In [68]:
logit

tensor([[  0.3577,   5.7451,  -1.4568, -11.2089,  -7.0485,  -9.7361,  -8.1101,
           1.9421,  -7.2599,  -6.8746,  -2.3503,   0.1004, -13.6727,   1.3665,
          -9.5940,  -8.1931, -10.4324, -11.2269,   4.0575]], device='cuda:0')

In [70]:
model.eval()
with torch.no_grad():
    logit = model(images[rnd_idx].unsqueeze(0)) # model.forward()에서는 배치가 적용된 4차원 입력 기대

pred = logit.max(dim=1)[1]
print(pred == labels[rnd_idx]) # True : 잘 예측

tensor([False], device='cuda:0')


In [73]:
images[rnd_idx].shape

torch.Size([3, 224, 224])

In [77]:
model.eval()
with torch.no_grad():
    logit = model(images[rnd_idx].unsqueeze(0)) # model.forward()에서는 배치가 적용된 4차원 입력 기대

pred = logit.max(dim=1)[1]
print(pred == labels[rnd_idx])

tensor([False], device='cuda:0')


## 8. 모델 평가

In [103]:
pred_list = []
def predict_batch(model, testloader, loss_fn):
    total = 0   
    correct = 0
    test_loss = 0
    test_accuracy = 0

  # 전방향 예측을 구할 때는 gradient가 필요가 없음음
    with torch.no_grad():
        for images in testloader: # 이터레이터로부터 next()가 호출되며 미니배치를 반환(images, labels)
            # 0. Data를 GPU로 보내기
            images = images.to(device)
            # 1. 입력 데이터 준비
            # not Flatten
            # images.resize_(images.size()[0], 784)

            # 2. 전방향(Forward) 예측
            logit = model(images) # 예측 점수
            
            _, preds = torch.max(logit, 1) # 배치에 대한 최종 예측
            pred_list.append(list(preds.to(device = 'cpu').item()))
            # preds = logit.max(dim=1)[1] 
            
    return pred_list
model.eval()
semi_final_pred = predict_batch(model, testloader, loss_fn)  

ValueError: only one element tensors can be converted to Python scalars

In [143]:
semi_final_pred[1][2].item()

18

In [144]:
final_pred = []
for i in range(len(semi_final_pred)):
    for j in range(4):
        final_pred.append(semi_final_pred[i][j].item())

In [146]:
len(final_pred)

792

In [149]:
import pandas as pd

In [151]:
test = pd.read_csv('/kaggle/input/doabe/open/test.csv')

In [152]:
test

Unnamed: 0,id,img_path
0,TEST_000,./test/000.png
1,TEST_001,./test/001.png
2,TEST_002,./test/002.png
3,TEST_003,./test/003.png
4,TEST_004,./test/004.png
...,...,...
787,TEST_787,./test/787.png
788,TEST_788,./test/788.png
789,TEST_789,./test/789.png
790,TEST_790,./test/790.png


In [147]:
semi_final_pred[1].index(9)

3

In [118]:
a = [[1, 2, 3, 4], [5,6,7,8]]

In [139]:
import re
numbers = re.findall(r'\d+', semi_final_pred[1][2])
print(numbers)

TypeError: expected string or bytes-like object

In [153]:
submit = pd.read_csv('/kaggle/input/doabe/open/sample_submission.csv')

In [155]:
submit['label'] = 

Unnamed: 0,id,label
0,TEST_000,훼손
1,TEST_001,훼손
2,TEST_002,훼손
3,TEST_003,훼손
4,TEST_004,훼손
...,...,...
787,TEST_787,훼손
788,TEST_788,훼손
789,TEST_789,훼손
790,TEST_790,훼손


In [156]:
submit['label'] = final_pred

In [158]:
submit

Unnamed: 0,id,label
0,TEST_000,9
1,TEST_001,18
2,TEST_002,1
3,TEST_003,1
4,TEST_004,10
...,...,...
787,TEST_787,10
788,TEST_788,18
789,TEST_789,18
790,TEST_790,15


In [159]:
label_dict = {i:v for i, v in enumerate(name_list)}
label_dict

{0: '오타공',
 1: '피스',
 2: '석고수정',
 3: '가구수정',
 4: '울음',
 5: '틈새과다',
 6: '터짐',
 7: '몰딩수정',
 8: '이음부불량',
 9: '걸레받이수정',
 10: '오염',
 11: '면불량',
 12: '반점',
 13: '녹오염',
 14: '창틀,문틀수정',
 15: '꼬임',
 16: '들뜸',
 17: '곰팡이',
 18: '훼손'}

In [164]:
label_dict[0]

'오타공'

In [165]:
final_pred_kr = []
for i in final_pred:
    final_pred_kr.append(label_dict[i])

In [170]:
submit['label'] = final_pred

In [171]:
submit

Unnamed: 0,id,label
0,TEST_000,9
1,TEST_001,18
2,TEST_002,1
3,TEST_003,1
4,TEST_004,10
...,...,...
787,TEST_787,10
788,TEST_788,18
789,TEST_789,18
790,TEST_790,15


In [173]:
submit

Unnamed: 0,id,label
0,TEST_000,9
1,TEST_001,18
2,TEST_002,1
3,TEST_003,1
4,TEST_004,10
...,...,...
787,TEST_787,10
788,TEST_788,18
789,TEST_789,18
790,TEST_790,15


In [169]:
submit.to_csv('./baseline_submit.csv', index=False)

In [182]:
submit.loc[submit['label'] == 0, 'label'] = '가구수정'
submit.loc[submit['label'] == 1, 'label'] = '걸레받이수정'
submit.loc[submit['label'] == 2, 'label'] = '곰팡이'
submit.loc[submit['label'] == 3, 'label'] = '꼬임'
submit.loc[submit['label'] == 4, 'label'] = '녹오염'
submit.loc[submit['label'] == 5, 'label'] = '들뜸'
submit.loc[submit['label'] == 6, 'label'] = '면불량'
submit.loc[submit['label'] == 7, 'label'] = '몰딩수정'
submit.loc[submit['label'] == 8, 'label'] = '반점'
submit.loc[submit['label'] == 9, 'label'] = '석고수정'
submit.loc[submit['label'] == 10, 'label'] = '오염'
submit.loc[submit['label'] == 11, 'label'] = '오타공'
submit.loc[submit['label'] == 12, 'label'] = '울음'
submit.loc[submit['label'] == 13, 'label'] = '이음부불량'
submit.loc[submit['label'] == 14, 'label'] = '창틀,문틀수정'
submit.loc[submit['label'] == 15, 'label'] = '터짐'
submit.loc[submit['label'] == 16, 'label'] = '틈새과다'
submit.loc[submit['label'] == 17, 'label'] = '피스'
submit.loc[submit['label'] == 18, 'label'] = '훼손'

In [184]:
submit

Unnamed: 0,id,label
0,TEST_000,석고수정
1,TEST_001,훼손
2,TEST_002,걸레받이수정
3,TEST_003,걸레받이수정
4,TEST_004,오염
...,...,...
787,TEST_787,오염
788,TEST_788,훼손
789,TEST_789,훼손
790,TEST_790,터짐


In [186]:
submit.to_csv('./baseline_submit2.csv', index=False)