In [1]:
import os
import mmcv
import torch
from mmcv import Config
from mmseg.datasets import build_dataloader, build_dataset
from mmseg.models import build_segmentor
from mmseg.apis import single_gpu_test
from mmcv.runner import load_checkpoint
from mmcv.parallel import MMDataParallel

import pandas as pd
import numpy as np
import json

In [2]:
# epoch 지정해주기
epoch = 'best_mIoU_epoch_33'
# work dir 설정해주기
work_dir = '/opt/ml/mmsegmentation/work_dirs/twins_svt-l_uperhead_8x2_512x512_160k_ade20k'

# config file 들고오기
# config file은 수정해서 사용
cfg = Config.fromfile('/opt/ml/mmsegmentation/work_dirs/twins_svt-l_uperhead_8x2_512x512_160k_ade20k/twins_svt-l_uperhead_8x2_512x512_160k_ade20k.py')
root='/opt/ml/input/mmseg/images/test'

# dataset config 수정
cfg.data.test.img_dir = root
# cfg.data.test.pipeline[1]['img_scale'] = (512,512)
cfg.data.test.test_mode = True

cfg.data.samples_per_gpu = 1
cfg.work_dir = work_dir

cfg.optimizer_config.grad_clip = dict(max_norm=35, norm_type=2)
cfg.model.train_cfg = None

In [3]:
dataset = build_dataset(cfg.data.test)
data_loader = build_dataloader(
        dataset,
        samples_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False)

2022-05-11 01:05:03,490 - mmseg - INFO - Loaded 624 images


In [4]:
# checkpoint path
checkpoint_path = os.path.join(cfg.work_dir, f'{epoch}.pth')

model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg'))
checkpoint = load_checkpoint(model, checkpoint_path, map_location='cpu')
model.CLASSES = dataset.CLASSES
model = MMDataParallel(model.cuda(), device_ids=[0])



load checkpoint from local path: /opt/ml/mmsegmentation/work_dirs/twins_svt-l_uperhead_8x2_512x512_160k_ade20k/best_mIoU_epoch_33.pth


In [5]:
output = single_gpu_test(model, data_loader)

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 624/624, 8.4 task/s, elapsed: 74s, ETA:     0s

In [6]:
# submission 양식에 맞게 output 후처리
input_size = 512
output_size = 256

submission = pd.read_csv("/opt/ml/input/code/submission/sample_submission.csv", index_col=None)
json_dir = os.path.join("/opt/ml/input/data/test.json")

with open(json_dir, "r", encoding="utf8") as outfile:
    datas = json.load(outfile)

# PredictionString 대입
for image_id, predict in enumerate(output):

    image_id = datas["images"][image_id]
    file_name = image_id["file_name"]

    temp_mask = []
    predict = predict.reshape(1, 512, 512)
    mask = predict.reshape((1, output_size, input_size//output_size, output_size, input_size//output_size)).max(4).max(2) # resize to 256*256
    temp_mask.append(mask)
    oms = np.array(temp_mask)
    oms = oms.reshape([oms.shape[0], output_size*output_size]).astype(int)
    string = oms.flatten()
    submission = pd.concat([submission, pd.DataFrame([{"image_id" : file_name, "PredictionString" : ' '.join(str(e) for e in string.tolist())}])]
                                   , ignore_index=True)


submission.to_csv(os.path.join(cfg.work_dir, f'submission_{epoch}.csv'), index=False)

In [8]:
submission.head()

Unnamed: 0,image_id,PredictionString
0,batch_01_vt/0021.jpg,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
1,batch_01_vt/0028.jpg,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
2,batch_01_vt/0031.jpg,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
3,batch_01_vt/0032.jpg,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
4,batch_01_vt/0076.jpg,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...


In [19]:
# submission 양식에 맞게 output 후처리
input_size = 512
output_size = 256

submission = pd.read_csv("/opt/ml/mmsegmentation/_MyModel/submission/sample_submission.csv", index_col=None)
json_dir = os.path.join("/opt/ml/input/data/test.json")

submission_check = pd.read_csv("/opt/ml/mmsegmentation/_MyModel/submission/sample_submission.csv", index_col=None)
check_dir = '/opt/ml/input/mmseg/test_check'
os.makedirs(check_dir,exist_ok=True)
os.makedirs(check_dir+'/csv',exist_ok=True)
os.makedirs(check_dir+'/image',exist_ok=True)


with open(json_dir, "r", encoding="utf8") as outfile:
    datas = json.load(outfile)
import matplotlib.pyplot as plt
from PIL import Image
# PredictionString 대입
palette = [[0,0,0],[192,0,128],[0,128,192],[0,128,64],[128,0,0],[64,0,128],[64,0,192],[192,128,64],[192,192,128],[64,64,128],[128,0,192]]

for image_id, predict in enumerate(output):

    image_id = datas["images"][image_id]
    file_name = image_id["file_name"]
    
    temp_mask = []
    predict = predict.reshape(1, 512, 512)

    mask = predict.reshape((1, output_size, input_size//output_size, output_size, input_size//output_size)).max(4).max(2) # resize to 256*256
    temp_mask.append(mask)
    oms = np.array(temp_mask)
    oms = oms.reshape([oms.shape[0], output_size*output_size]).astype(int)
    string = oms.flatten()

    submission = pd.concat([submission, pd.DataFrame([{"image_id" : file_name, "PredictionString" : ' '.join(str(e) for e in string.tolist())}])]
                                   , ignore_index=True)

    temp_mask_check = []
    mask_check = predict
    temp_mask_check.append(mask_check)
    oms_check = np.array(temp_mask_check)
    oms_check = oms_check.reshape([oms_check.shape[0], input_size*input_size]).astype(int)
    string_check = oms_check.flatten()

    submission_check = pd.concat([submission_check, pd.DataFrame([{"image_id" : file_name, "PredictionString" : ' '.join(str(e) for e in string_check.tolist())}])]
                                   , ignore_index=True)
                                   


submission.to_csv(os.path.join(cfg.work_dir, f'submission_{epoch}.csv'), index=False)
submission_check.to_csv(os.path.join(check_dir, f'csv/submission_{epoch}.csv'), index=False)

In [None]:
import pandas as pd
from tqdm import tqdm
import os

# 앙상블할 output.csv 파일들을 한 폴더에 넣어줍니다.
# 파일명 앞을 점수로 넣어서 성능이 좋은 순서대로 정렬되도록 했습니다.
output_list = os.listdir('./output_csv')
output_list.sort(reverse=True)

# pandas dataframe으로 만들어줍니다.
df_list = []

for output in output_list:
    df_list.append(pd.read_csv(f'./output_csv/{output}'))

# submission dataframe
submission = pd.DataFrame()
submission['image_id'] = df_list[0]['image_id']

# pixel-wise hard voting 진행
PredictionString = []

for idx in tqdm(range(len(df_list[0]))):
    # 각 모델이 뽑은 pixel 넣을 리스트
    pixel_list = []
    
    for i in range(len(df_list)):
        pixel_list.append(df_list[i]['PredictionString'][idx].split(' '))

    result = ''

    for i in range(len(pixel_list[0])):
        pixel_count = {'0' : 0, '1' : 0, '2' : 0, 
                      '3' : 0, '4' : 0, '5' : 0,
                      '6' : 0, '7' : 0, '8' : 0,
                      '9' : 0, '10' : 0}
        
        # 각 모델이 뽑은 pixel count
        for j in range(len(pixel_list)):
            pixel_count[pixel_list[j][i]] += 1
        
        # 제일 많이 vote된 pixel 값
        voted_pixel = [key for key, value in pixel_count.items() if value == max(pixel_count.values())]

        # voted_pixel이 1개인 경우
        if len(voted_pixel) == 1:
            result += voted_pixel[0] + ' '
        # 동점이 나온 경우
        else:
            # 성능이 좋았던 모델부터 값이 voted_pixel에 있다면 result로 고르기
            for j in range(len(pixel_list)):
                pixel_candidate = pixel_list[j][i]

                if pixel_candidate in voted_pixel:
                    result += pixel_candidate + ' '
                    break
    
    # 마지막 공백 제거
    result = result[:-1]

    PredictionString.append(result)

# submission csv 만들기
submission['PredictionString'] = PredictionString
submission.to_csv('./hard_voted_output.csv', index=False)