# 대회 데이터셋 시각화 작업

In [2]:
import importlib
import labs

importlib.reload(labs)
from labs import *

# 자기 할당 작업 가져오기

In [35]:
def get_my_turn(csv_path, idx=0, chunk_size=100, encoding='utf-8'):
    targets = []
    with open(csv_path, mode='r', encoding=encoding) as f:
        reader = csv.DictReader(f)
        for row in reader:
            key = row['ID']
            targets.append(key)

    targets = sorted(targets)
    start_idx = idx* chunk_size
    
    return targets[start_idx: start_idx+chunk_size]

In [111]:
targets = get_my_turn('./tts.csv', idx=3)

In [112]:
targets[:3]

['1a6b433e34bc9a9d.jpg', '1a6c830fc871d21a.jpg', '1a85c97dc8b3f6a1.jpg']

# 시각화 레이블

In [113]:
import ipywidgets as widgets
from IPython.display import display, clear_output

In [142]:
class Inspector:
    def __init__(self, target_names, ds_dir_path, label_dict, label_trans, prev_answers=None):
        self.image_paths = [os.path.join(ds_dir_path, n) for n in target_names]
        self.label_dict = label_dict
        self.label_trans = label_trans
        
        self.current_index = 0
        self.label_answers = [None] * len(self.image_paths) if prev_answers is None else prev_answers
        
        self.buttons = []
        self.next_button = widgets.Button(description='Next')
        self.prev_button = widgets.Button(description='Previous')
        self.selected_label_widget = widgets.HTML()
        
        self.out = widgets.Output()
        self._setup_buttons()

    def _setup_buttons(self):
        self.buttons = []
        for label, label_id in self.label_dict.items():
            display_text = self.label_trans.get(label, label)
            button = widgets.Button(
                description=f"{display_text} ({label_id})",
                layout=widgets.Layout(width='auto', min_width='80px', max_width='300px', margin='3px 3px 3px 3px'),
                style={'button_color': '#f0f0f0', 'font_size': '18px'}
            )
            button.on_click(self._make_label_handler(label_id))
            self.buttons.append(button)

        # 모름 버튼 추가
        button = widgets.Button(description='모름/어려움 (-1)',
           layout=widgets.Layout(width='auto', min_width='80px', max_width='300px', margin='3px 3px 3px 3px'),
            style={'button_color': '#f0f0f0', 'font_size': '18px'}
        )
        button.on_click(self._make_label_handler(-1))
        self.buttons.append(button)
            
        self.next_button.layout = widgets.Layout(width='80px', margin='0 0 10px 10px')
        self.prev_button.layout = widgets.Layout(width='80px', margin='0 10px 10px 0')
        self.next_button.on_click(self._on_next)
        self.prev_button.on_click(self._on_prev)


    def _make_label_handler(self, label_id):
        def handler(b):
            item = (self.image_paths[self.current_index], label_id)
            self.label_answers[self.current_index] = item

            if label_id == -1:
                kor_label = "모름 (어려움)"
            else:
                label_name = self._get_label_name(label_id)
                kor_label = self.label_trans.get(label_name, label_name)
                
            self.selected_label_widget.value = f"<b>선택된 레이블:</b> {kor_label} ({label_id})"
            with self.out:
                clear_output(wait=True)
                print(f"Image {self.current_index+1}/{len(self.image_paths)} 레이블: '{kor_label}' ({label_id})")
                
        return handler

    def _get_label_name(self, label_id):
        for k, v in self.label_dict.items():
            if v == label_id:
                return k
        return str(label_id)

    def _display_image(self):
        with self.out:
            clear_output(wait=True)
            print(f"Image {self.current_index+1} / {len(self.image_paths)}")
            path = self.image_paths[self.current_index]
            if os.path.exists(path):
                img = Image.open(path)
                display(img)
            else:
                print(f"Image file {path} does not exist.")
            
        item = self.label_answers[self.current_index]
        if item is not None:
            # 하위 호환성 체크 및 상위 구조 변환
            if type(item) is not tuple:
                label_id = item
                item = (self.image_paths[self.current_index], label_id)
                self.label_answers[self.current_index] = item
            else:
                label_id = item[1]
            label_name = self._get_label_name(label_id)
            kor_label = self.label_trans.get(label_name, label_name)
            self.selected_label_widget.value = f"<b>선택된 레이블:</b> {kor_label} ({label_id})"
        else:
            self.selected_label_widget.value = "<b>선택된 레이블:</b> 없음"

    def _on_next(self, b):
        if self.current_index < len(self.image_paths) - 1:
            self.current_index += 1
            self._show()
        else:
            with self.out:
                print("This is the last image.")

    def _on_prev(self, b):
        if self.current_index > 0:
            self.current_index -= 1
            self._show()
        else:
            with self.out:
                print("This is the first image.")

    def _show(self):
        self._display_image()
        rows = []
        n_per_row = 7
        for i in range(0, len(self.buttons), n_per_row):
            rows.append(widgets.HBox(self.buttons[i:i+n_per_row]))
        nav = widgets.HBox([self.prev_button, self.next_button])
        display(self.selected_label_widget)  # 최상단에 선택 레이블 표시
        display(nav)
        for row in rows:
            display(row)
        display(self.out)

    def inspect(self):
        self._show()

In [115]:
ds_dir_path = "/data/ephemeral/home/dataset/dtc/test/"
label2id, id2label = make_doc_class_mapper("/data/ephemeral/home/dataset/dtc/doc_classes.json")

label_trans = {
    "account_number":"계좌번호",
    "application_for_payment_of_pregnancy_medical_expenses": "임신 의료비 신청서",
    "car_dashboard": "자동차 계기판",
    "confirmation_of_admission_and_discharge": "입퇴원 확인서",
    "diagnosis": "진단서",
    "driver_lisence": "운전면허증",
    "medical_bill_receipts": "의료비 영수증",
    "medical_outpatient_certificate": "외래 진료(통원치료) 확인서",
    "national_id_card": "주민등록증",
    "passport": "여권",
    "payment_confirmation": "납입 확인서",
    "pharmaceutical_receipt": "약국 영수증",
    "prescription": "처방전",
    "resume": "이력서",
    "statement_of_opinion": "소견서",
    "vehicle_registration_certificate": "자동차 등록증",
    "vehicle_registration_plate": "자동차 번호판"
}

# 혼동
- 약제비 영수증은 납입 확인서인가?

In [148]:
inspector = Inspector(targets, ds_dir_path, label2id, label_trans, prev_answers)

In [149]:
inspector.inspect()

HTML(value='<b>선택된 레이블:</b> 소견서 (14)')

HBox(children=(Button(description='Previous', layout=Layout(margin='0 10px 10px 0', width='80px'), style=Butto…

HBox(children=(Button(description='계좌번호 (0)', layout=Layout(margin='3px 3px 3px 3px', max_width='300px', min_w…

HBox(children=(Button(description='외래 진료(통원치료) 확인서 (7)', layout=Layout(margin='3px 3px 3px 3px', max_width='30…

HBox(children=(Button(description='소견서 (14)', layout=Layout(margin='3px 3px 3px 3px', max_width='300px', min_w…

Output()

In [122]:
prev_answers = inspector.label_answers

In [151]:
prev_answers

[('/data/ephemeral/home/dataset/dtc/test/1a6b433e34bc9a9d.jpg', 14),
 ('/data/ephemeral/home/dataset/dtc/test/1a6c830fc871d21a.jpg', 11),
 ('/data/ephemeral/home/dataset/dtc/test/1a85c97dc8b3f6a1.jpg', 6),
 ('/data/ephemeral/home/dataset/dtc/test/1a8a90676e40da26.jpg', 5),
 ('/data/ephemeral/home/dataset/dtc/test/1ab901039c1c7950.jpg', 13),
 ('/data/ephemeral/home/dataset/dtc/test/1ac5d385313e414b.jpg', 16),
 ('/data/ephemeral/home/dataset/dtc/test/1acade48e30258c8.jpg', 13),
 ('/data/ephemeral/home/dataset/dtc/test/1acbab3967fe133b.jpg', 15),
 ('/data/ephemeral/home/dataset/dtc/test/1acc40852f90aa76.jpg', 5),
 ('/data/ephemeral/home/dataset/dtc/test/1ad9ed5392307687.jpg', 9),
 ('/data/ephemeral/home/dataset/dtc/test/1b45268518d29b07.jpg', 3),
 ('/data/ephemeral/home/dataset/dtc/test/1b5039c18de0c674.jpg', 6),
 ('/data/ephemeral/home/dataset/dtc/test/1b7d7bfb252d66f9.jpg', 2),
 ('/data/ephemeral/home/dataset/dtc/test/1b808e977d7299f9.jpg', 16),
 ('/data/ephemeral/home/dataset/dtc/test/