#  labs.py와 augs.py 불러오기

In [1]:
import importlib
import augs
import labs

importlib.reload(labs)
importlib.reload(augs)

from labs import *
from augs import *

# INIT. Processor 

In [2]:
from transformers import LayoutLMv3Processor

In [3]:
processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)

# DEF. Dataset and DataModule 
### 주의
- 또한, bbox의 좌표가 layoutlm v3 (0,1000) 스케일이 아닌 픽셀 스케일을 원함
  - 이에 따라, norm_box 변환 시점을 augment 뒤로 미뤄야 함

```
return_tensors (str, optional, defaults to "pt") — The type of Tensor to return. Allowable values are “np”, “pt” and “tf”.
```

In [4]:
def prepare_example(image_path, processor, transform=None):
    # load image
    if not transform:
        image = Image.open(image_path).convert("RGB")
        image = ImageOps.exif_transpose(image)  # correct orientation
    else:
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # load metas
    json_path = Path(image_path).with_suffix(".json")
    meta = load_json(json_path)

    # words and boxes
    words, boxes = get_words_and_boxes(image, meta, use_norm=transform is None)

    if transform is not None:
        try:
            augmented = transform(image=image, bboxes=boxes, words=words)
            image = augmented['image']
            words = augmented['words']
            boxes = augmented['bboxes']
            h, w = image.shape[:2]
            boxes = [to_norm_box_with_size(b, h, w) for b in boxes]
        except Exception as e:
            print(">>>>>>>>>>>>>>>>>>>>>..", image_path)

    encoding = processor(
        images=image,
        text=words,
        boxes=boxes,
        max_length=512,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    )
    
    return encoding

In [5]:
class D4Dataset(Dataset):
    def __init__(self, image_paths, targets, processor, transform=None):
        self.targets = targets
        self.processor = processor
        self.transform = transform
        self.image_paths = image_paths

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        encoding = prepare_example(image_path, self.processor, self.transform)
        target = int(self.targets[os.path.basename(image_path)])

        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "bbox": encoding["bbox"].squeeze(0),
            "pixel_values": encoding["pixel_values"].squeeze(0),
            "labels": torch.tensor(target, dtype=torch.long)
        }

In [6]:
class D4DataModule(LightningDataModule):
    def __init__(
        self,
        train_paths,
        valid_paths,
        trial_paths,
        target_dict,
        processor,
        batch_size=32,
        num_workers=4,
    ):
        super().__init__()
        self.train_paths = train_paths
        self.valid_paths = valid_paths
        self.trial_paths = trial_paths
        self.targets = target_dict
        self.processor = processor
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.transforms = Transforms(target_size=384)

    def setup(self, stage=None):
        if stage == "fit":
            self.train_ds = D4Dataset(self.train_paths, 
                                      self.targets, 
                                      self.processor,
                                      self.transforms.make(50))
            self.valid_ds = D4Dataset(self.valid_paths, 
                                      self.targets, 
                                      self.processor,
                                      self.transforms.make(50))
        if stage == "test" or stage is None:
            self.trial_ds = D4Dataset(self.trial_paths, self.targets, self.processor)

    def train_dataloader(self):
        return DataLoader(
            self.train_ds,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=self.num_workers,
            collate_fn=default_data_collator
        )

    def val_dataloader(self):
        return DataLoader(
            self.valid_ds,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=self.num_workers,
            collate_fn=default_data_collator 
        )

    def test_dataloader(self):
        return DataLoader(
            self.trial_ds,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=self.num_workers,
            collate_fn=default_data_collator 
        )

# INIT. DM

In [7]:
image_paths = grep_files("/root/upstg_CV/data/train", exts=['jpg'])
target_dict = load_csv_targets("/root/upstg_CV/data/train.csv")
label_path = "/root/upstg_CV/data/doc_classes.json"
label2id, id2label = make_doc_class_mapper(label_path)

0it [00:00, ?it/s]

In [8]:
train_images, valid_images, trial_images = split_ds(image_paths,  train_ratio=0.6,  valid_ratio=0.4, test_ratio=0)

data_module = D4DataModule(
    train_paths=train_images,
    valid_paths=valid_images,
    trial_paths=trial_images,
    target_dict=target_dict,
    processor=processor,
    batch_size=16,
    num_workers=8
)

# DEF) Model
- ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight'] 크기 조절

In [9]:
from transformers import LayoutLMv3ForSequenceClassification as LyLmv3, LayoutLMv3Processor

In [10]:
class Lym(pl.LightningModule):
    def __init__(self, label2id, id2label):
        super().__init__()
        num_classes = len(label2id)
        self.model = LyLmv3.from_pretrained("microsoft/layoutlmv3-base", num_labels=num_classes)
        self.model.config.label2id = label2id
        self.model.config.id2label = id2label

        metrics = {
            "accuracy": Accuracy(task="multiclass", num_classes=num_classes),
            "per-class-accuracy" : MulticlassAccuracy(num_classes=num_classes, average=None), 
            "roc_auc": AUROC(task="multiclass", num_classes=num_classes),
            "precision": Precision(task="multiclass", num_classes=num_classes, average="macro"),
            "recall": Recall(task="multiclass", num_classes=num_classes, average="macro"),
            "F1": F1Score(task="multiclass", num_classes=num_classes, average="macro"),
        }

        self.train_metrics = MetricCollection(metrics, prefix="train_")
        self.valid_metrics = MetricCollection(metrics, prefix="valid_")

    def forward(self, input_ids, attention_mask, bbox, pixel_values, labels=None):
        return self.model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            bbox=bbox,
            pixel_values=pixel_values,
            labels=labels
        )

    def feed(self, batch):
        return self(
            batch["input_ids"],
            batch["attention_mask"],
            batch["bbox"],
            batch["pixel_values"],
            batch["labels"]
        )

    def training_step(self, batch, batch_idx):
        labels = batch["labels"]
        outputs = self.feed(batch)
    
        self.train_metrics.update(outputs.logits, labels)
        
        self.log("train_loss", outputs.loss)
        for name, metric in self.train_metrics.items():
            if name == 'train_per-class-accuracy':
                continue
            self.log(name, metric.compute(), prog_bar=True)
        
        return outputs.loss

    def validation_step(self, batch, batch_idx):
        labels = batch["labels"]
        outputs = self.feed(batch)

        self.valid_metrics.update(outputs.logits, labels)
        #for 루프를 돔, 출력을 할 때 if문이 필요함 vaild-per class acuuracy면 출력을 안하도록 
        self.log("valid_loss", outputs.loss)
        for name, metric in self.valid_metrics.items():
            if name == 'valid_per-class-accuracy':
                continue
            self.log(name, metric.compute(), prog_bar=True)
        return outputs.loss

    def configure_optimizers(self):
        return torch.optim.AdamW(self.model.parameters(), lr=1e-5)
        
    def on_train_epoch_start(self):
        self.train_metrics.reset()

    def on_train_epoch_end(self):
        metrics = self.train_metrics.compute()
        for name, value in metrics.items():
            if name == 'train_per-class-accuracy':  # 일단 단일 지표들만 모두 출력
                continue
            self.log(name, value)
        # 아래 모델 id2label말고 한글 레이블로 하면 wandb에서 보기는 더 편함
        per_class_acc = metrics['train_per-class-accuracy'] # 클래스별 지표
        for i, acc in enumerate(per_class_acc):  # 각 지표별로 class_id -> 레이블로 변환하여 로그
            label_name = self.model.config.id2label[i]  
            self.log(f'train_acc_class_{label_name}', acc)

    def on_validation_epoch_start(self):
        self.valid_metrics.reset()
    
    def on_validation_epoch_end(self):
        try:
            metrics = self.valid_metrics.compute()
            for name, value in metrics.items():
                if name == 'valid_per-class-accuracy':  # 일단 단일 지표들만 모두 출력
                    continue
                self.log(name, value)
        except Exception as e:
            print(f"Metric compute error: {e}")
            
        # 아래 모델 id2label말고 한글 레이블로 하면 wandb에서 보기는 더 편함
        per_class_acc = metrics['valid_per-class-accuracy'] # 클래스별 지표
        for i, acc in enumerate(per_class_acc):  # 각 지표별로 class_id -> 레이블로 변환하여 로그
            label_name = self.model.config.id2label[i]  
            self.log(f'valid_acc_class_{label_name}', acc)

# Init Dashboard

In [11]:
import wandb

In [12]:
exp_name = 'exp-llv3-aug-ost-test4'
wandb.init(project='docsy', name=exp_name)
wandb_logger = WandbLogger()

[34m[1mwandb[0m: Currently logged in as: [33mdhtmdxo12345[0m ([33mdhtmdxo12345-kyonggi-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


# RUN. Train

In [13]:
early_stopping = EarlyStopping(monitor='valid_loss', patience=5, mode='min')
model_checkpoint = ModelCheckpoint(monitor="valid_loss", mode="min", save_top_k=3)

trainer = pl.Trainer(
    accelerator="gpu",
    precision="16-mixed",
    max_epochs=100,
    logger=wandb_logger,
    callbacks=[model_checkpoint, early_stopping]
)

model = Lym(label2id, id2label)
trainer.fit(model, datamodule=data_module)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Some weights of LayoutLMv3ForSequenceClassification were not initialized from the model checkpoint at microsoft/layoutlmv3-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
/data/ephemeral/home/.pyenv/versions/py12/lib/python3.12/site-packages/pytorch_lightning/loggers/wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finis

Sanity Checking: |          | 0/? [00:00<?, ?it/s]



Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

### 체크포인트 저장

In [14]:
trainer.save_checkpoint(f"./{exp_name}-last_epoch.ckpt")

In [15]:
wandb.finish()

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇██████
train_F1,▁▄▅▆▆▆▆▆▆▇▆▆▇▇▆▇▇▇▇▇▇▇▇▇██▇█▇█▇█▇████▇█▇
train_acc_class_account_number,▁▇▄█▇█▇██▇▇▇██████████████████████
train_acc_class_application_for_payment_of_pregnancy_medical_expenses,▁▁▁▂▅▂▇██▇█████████████▇██████▇███
train_acc_class_car_dashboard,▁▅▅█▇█████████████████▇█▇█████████
train_acc_class_confirmation_of_admission_and_discharge,██▂▁▄▅▇█▂▇▅▆▄▁▇▅▆▄▂▅▄▆▆▂▄▇▆▆▄▇▇▅▇▅
train_acc_class_diagnosis,▁▂▅▅▄▇▇▄▇▇█▇▇▇▆▆█▆▇▇▇▆▅▇▅▆▆▆█▆▇▆▇▇
train_acc_class_driver_lisence,▁▄▅██▇████████████████████████████
train_acc_class_medical_bill_receipts,▁▅▇▆████▇█████████████████████████
train_acc_class_medical_outpatient_certificate,▁▁▇▇▅▂▂▂▆▂▄▁▆▇▂▆▅▇▅▅▅▆▇█▆▅▆▆▆▆▃▆▅▆

0,1
epoch,33.0
train_F1,0.90351
train_acc_class_account_number,1.0
train_acc_class_application_for_payment_of_pregnancy_medical_expenses,1.0
train_acc_class_car_dashboard,0.97143
train_acc_class_confirmation_of_admission_and_discharge,0.55556
train_acc_class_diagnosis,0.80488
train_acc_class_driver_lisence,1.0
train_acc_class_medical_bill_receipts,0.97872
train_acc_class_medical_outpatient_certificate,0.7381


## 테스트 데이터 예측

### bbox정규화 및 json파일에서 추출하기

In [23]:
def normalize_bbox(bbox, width, height):
    """
    LayoutLMv3에서 요구하는 0~1000 정규화된 bbox
    """
    x1, y1, x2, y2 = bbox
    return [
        int(x1 / width * 1000),
        int(y1 / height * 1000),
        int(x2 / width * 1000),
        int(y2 / height * 1000)
    ]

    
def to_norm_box_with_size(box, h, w):
    x0, y0, x1, y1 = box

    x0_norm = int((x0 / w) * 1000)
    y0_norm = int((y0 / h) * 1000)
    x1_norm = int((x1 / w) * 1000)
    y1_norm = int((y1 / h) * 1000)

    x0_norm = max(0, min(1000, x0_norm))
    y0_norm = max(0, min(1000, y0_norm))
    x1_norm = max(0, min(1000, x1_norm))
    y1_norm = max(0, min(1000, y1_norm))

    return [x0_norm, y0_norm, x1_norm, y1_norm]

def load_ocr_from_json(image_path):
    """
    test 이미지 경로를 입력받아 동일한 이름의 JSON에서 OCR 정보를 추출하고,
    LayoutLMv3에 맞게 정규화된 text와 bbox 리스트를 반환
    """
    json_path = image_path.replace(".jpg", ".json")

    # 이미지 크기 확인
    img = Image.open(image_path).convert("RGB")
    width, height = img.size

    # JSON 읽기
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    words = []
    boxes = []

    for ann in data["annotation"]:
        word = ann["text"]
        bbox = ann["box"]  # [x1, y1, x2, y2]
        score = ann.get("score", 1.0)

        # 조건 필터링 (예: score가 너무 낮은 항목 제외)
        if word.strip() and score > 0.5:
            words.append(word)
            boxes.append(to_norm_box_with_size(bbox, width, height))

    return words, boxes

### processor 구성하기

In [17]:
from transformers import LayoutLMv3ImageProcessor, AutoTokenizer, LayoutLMv3Processor

# 1. ImageProcessor는 apply_ocr=False로 생성
image_processor = LayoutLMv3ImageProcessor(apply_ocr=False)

# 2. 토크나이저 로드
tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlmv3-base")

# 3. 최종 processor 구성
processor = LayoutLMv3Processor(image_processor=image_processor, tokenizer=tokenizer)


### 테스트 데이터 예측

In [18]:
# ckpt_path = "/root/upstg_CV/chy/llv3-aug/exp-llv3-aug-ost-test1-last_epoch.ckpt"
# model = Lym.load_from_checkpoint(ckpt_path, id2label=id2label, label2id=label2id)

In [24]:
# 모델 및 processor 불러오기
model.eval().cuda()
# 테스트 이미지 경로
test_img_paths = grep_files("/root/upstg_CV/deskewed-test", exts=['jpg'])

preds = []

# ✅ 추론 루프
for path in tqdm(test_img_paths):
    img = cv2.imread(path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    words, boxes = load_ocr_from_json(path)

    inputs = processor(
        images=img,
        text=words,
        boxes=boxes,
        return_tensors="pt",
        truncation=True,
        padding="max_length",
        max_length=512
    )


    inputs = {k: v.cuda() for k, v in inputs.items()}

    # 추론
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        pred_class = torch.argmax(logits, dim=1).item()

    preds.append((path, pred_class))



0it [00:00, ?it/s]

  0%|          | 0/3140 [00:00<?, ?it/s]

### submission 데이터에 저장

In [25]:
import pandas as pd
# ✅ 예측 결과를 sample_submission.csv에 반영
def write_csv_value(csv_path, preds):
    df = pd.read_csv(csv_path)
    for path, pred in preds:
        fname = os.path.basename(path)
        df.loc[df['ID'] == fname, 'target'] = pred
    df.to_csv(csv_path, index=False, encoding='utf-8')

write_csv_value("/root/upstg_CV/data/lymv3_ver3.csv", preds)

# 분류기가 예측한 데이터의 클래스별 확률분포

In [26]:
def make_proba_map_layoutlmv3(image_paths, model, processor, load_ocr_from_json_no_normalize, class_names=None):
    """
    LayoutLMv3 모델을 사용하여 이미지 분류 확률 맵을 생성하는 함수
    
    Args:
        image_paths: 이미지 경로 리스트
        model: 학습된 LayoutLMv3 모델
        processor: LayoutLMv3 processor
        load_ocr_from_json: OCR 데이터를 로드하는 함수
        class_names: 클래스 이름 리스트 (인덱스 순서대로). None이면 기본값 사용
    
    Returns:
        rows: 각 이미지별 분류 확률 정보가 담긴 딕셔너리 리스트
    """
    # 17개 클래스에 맞게 수정
    ids = list(range(17))  # [0, 1, 2, ..., 16]
    
    # 클래스 이름이 제공되지 않으면 기본값 사용
    if class_names is None:
        # 주의: 이 순서가 모델 학습 시 사용한 순서와 일치해야 합니다!
        class_names = [
            "계좌번호",
            "임신/출산 신청서", 
            "자동차 계기판",
            "입/퇴원 확인서",
            "진단서",
            "운전면허증",
            "진료/의료비 영수증",
            "외래/진료/통원/치료 확인서",
            "주민등록증",
            "여권",
            "(진료비/약제비) 납입 확인서",
            "약국/영수증",
            "처방전",
            "이력서",
            "소견서",
            "자동차 등록증",
            "자동차 번호판"
        ]
    
    labels = class_names
    
    model.eval()
    rows = []
    
    # ✅ 추론 루프 - 확률 분포 계산
    for path in tqdm(image_paths):
        img = cv2.imread(path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        words, boxes = load_ocr_from_json_no_normalize(path)

        inputs = processor(
            images=img,
            text=words,
            boxes=boxes,
            return_tensors="pt",
            truncation=True,
            padding="max_length",
            max_length=512
        )

        inputs = {k: v.cuda() for k, v in inputs.items()}

        # 추론 - 확률 분포 계산
        with torch.no_grad():
            outputs = model(**inputs)
            logits = outputs.logits
            
            # 소프트맥스를 적용하여 확률 분포 계산
            probs = F.softmax(logits, dim=1)
            probs_np = probs.cpu().numpy().squeeze()  # (num_classes,)
            
            # 가장 높은 확률의 클래스 찾기
            pred_class = torch.argmax(logits, dim=1).item()
        
        # 확률을 소수점 2자리로 반올림
        probs_rounded = [float(f"{p:.2f}") for p in probs_np]
        
        # 예측 결과 생성
        guess = f"{labels[pred_class]} [{ids[pred_class]}]"
        
        # 결과 딕셔너리 생성
        item = {
            'code': os.path.basename(path),
            'guess': guess
        }
        
        # 각 클래스별 확률 추가
        for j in range(len(ids)):
            label = f"{labels[j]} [{ids[j]}]"
            item[label] = probs_rounded[j]
        
        rows.append(item)
    
    # 파일명 기준으로 정렬
    rows = sorted(rows, key=lambda x: x['code'])
    return rows

In [27]:
def save_results_to_csv(results, filename):
    """
    결과를 CSV 파일로 저장하는 함수
    
    Args:
        results: make_proba_map_layoutlmv3의 결과 (딕셔너리 리스트)
        filename: 저장할 파일명
    """
    import pandas as pd
    
    # 딕셔너리 리스트를 DataFrame으로 변환
    df = pd.DataFrame(results)
    
    # CSV로 저장
    df.to_csv(filename, index=False, encoding='utf-8-sig')  # 한글 지원을 위해 utf-8-sig 사용
    print(f"결과가 {filename}에 저장되었습니다.")
    
    return df

### 확률 분포 및 추론 함수 실행

In [28]:
# 1. 추론 실행
results = make_proba_map_layoutlmv3(
    test_img_paths, 
    model, 
    processor, 
    load_ocr_from_json
)

# 2. CSV 저장
df = save_results_to_csv(results, "[ost]lym3-proba-map2.csv")

# 3. 결과 확인
print(f"총 {len(df)}개 이미지 처리 완료")
print(df.head())

  0%|          | 0/3140 [00:00<?, ?it/s]



결과가 [ost]lym3-proba-map2.csv에 저장되었습니다.
총 3140개 이미지 처리 완료
                   code         guess  계좌번호 [0]  임신/출산 신청서 [1]  자동차 계기판 [2]  \
0  0008fdb22ddce0ce.jpg  자동차 번호판 [16]      0.01            0.0         0.01   
1  00091bffdffd83de.jpg      처방전 [12]      0.00            0.0         0.00   
2  00396fbc1f6cc21d.jpg     운전면허증 [5]      0.00            0.0         0.00   
3  00471f8038d9c4b6.jpg      이력서 [13]      0.01            0.0         0.00   
4  00901f504008d884.jpg   자동차 계기판 [2]      0.00            0.0         0.99   

   입/퇴원 확인서 [3]  진단서 [4]  운전면허증 [5]  진료/의료비 영수증 [6]  외래/진료/통원/치료 확인서 [7]  \
0          0.00     0.00       0.00            0.00                 0.00   
1          0.00     0.00       0.00            0.01                 0.01   
2          0.00     0.00       0.97            0.00                 0.00   
3          0.34     0.01       0.01            0.01                 0.04   
4          0.00     0.00       0.00            0.00                 0.00   

   주민등록증 [8