### 0. Load Library & Data

In [1]:
# Libraries
import os
import re
import jsonlines
import pandas as pd
import ast
import ast
import copy
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Load Original LitBank Json Data
litbank_jsons=[]
with jsonlines.open("../data/LitBank/train.english.jsonlines") as read_file:
    for line in read_file.iter():
        litbank_jsons.append(line)

# Load IAA LitBank Data
# 폴더 내 파일 정렬
litbank_dfs_folder = os.listdir("../data/NewLitBank_IAA")
litbank_dfs_folder = sorted(litbank_dfs_folder, key=lambda x: int(re.search(r'\d+', x).group()))

# 케이스별 처리 함수
def safe_literal_eval(val):
    try:
        return ast.literal_eval(val)
    except (ValueError, SyntaxError):
        return val  

def load_and_process_litbank(case_value):
    dfs = []
    for fname in litbank_dfs_folder:
        df = pd.read_csv(f"../data/NewLitBank_IAA/{fname}").drop(
            columns=['Unnamed: 0', 'labels', 'inference_offsets'], errors='ignore'
        )
        df = df.loc[df['cases'] == case_value].copy()
        for col in df.columns:
            df[col] = df[col].apply(safe_literal_eval)
        dfs.append(df)
    return dfs

best_litbank_dfs = load_and_process_litbank(0)
worst_litbank_dfs = load_and_process_litbank(1)
weird_litbank_dfs = load_and_process_litbank(2)

### 1. 증강된 coref의 offset 변화량을 json cluster에 반영 

In [3]:
file_idx = 0
litbank_df = best_litbank_dfs[file_idx]
litbank_json = litbank_jsons[file_idx]

clusters = copy.deepcopy(litbank_json['clusters'])
for cluster in clusters:
    cluster.sort(key=lambda x: x[0])

In [4]:
## 1. update_coref의 offset 변화량을 coref에 반영 -> reverse_coref 생성

# 하나의 데이터프레임 적용
reverse_coref = []
# 정렬
litbank_df['coref'] = litbank_df['coref'].apply(lambda cluster: sorted(cluster, key=lambda x: x[0]))
for i in range(len(litbank_df)):
    # 문자열 → 리스트 변환
    coref = litbank_df.iloc[i]['coref']
    adjusted_offsets = litbank_df.iloc[i]['adjusted_offsets']
    update_coref = litbank_df.iloc[i]['update_coref']

    reverse = []
    for j in range(len(update_coref)):
        start_delta = update_coref[j][0] - adjusted_offsets[j][0] # start offset 증가량
        end_delta = update_coref[j][1] - adjusted_offsets[j][1] # end offset 증가량
        new_start = coref[j][0] + start_delta # start offset 증가량 반영
        new_end = coref[j][1] + end_delta # end offset 증가량 반영
        reverse.append([new_start, new_end]) # 증가량 반영해서 coref offset 변형한 offset
        
    reverse_coref.append(reverse)
litbank_df['reverse_coref'] = reverse_coref

litbank_df[['coref','adjusted_offsets','update_coref','reverse_coref']]

Unnamed: 0,coref,adjusted_offsets,update_coref,reverse_coref
1,"[[43, 53], [144, 145], [605, 606]]","[[2, 12], [30, 31], [39, 40]]","[[2, 12], [30, 32], [40, 42]]","[[43, 53], [144, 146], [606, 608]]"
2,"[[209, 209], [393, 393]]","[[8, 8], [92, 92]]","[[8, 8], [92, 93]]","[[209, 209], [393, 394]]"
4,"[[296, 296], [382, 382]]","[[69, 69], [125, 125]]","[[69, 69], [125, 127]]","[[296, 296], [382, 384]]"


In [5]:
## 2. reverse_coref를 json의 clusters로 교체

# 변화 발생 기준점 및 shift 저장용 리스트
change_points = []

# coref → reverse_coref 변환 + 변화량 기록
for row in litbank_df.itertuples():
    coref = row.coref
    reverse_coref = row.reverse_coref

    # 문자열일 경우 변환
    if isinstance(coref, str):
        coref = ast.literal_eval(coref)
    if isinstance(reverse_coref, str):
        reverse_coref = ast.literal_eval(reverse_coref)

    # 변화량 확인
    for j in range(len(coref)):
        original = coref[j]
        updated = reverse_coref[j]

        original_length = original[1] - original[0]
        updated_length = updated[1] - updated[0]
        shift = updated_length - original_length

        # 변화량이 있을 경우만 기록
        if shift != 0:
            change_points.append((original[0], shift))  # 기준 offset = original start

    # coref 일치하는 클러스터 교체
    for idx, cluster in enumerate(clusters):
        if cluster == coref:
            clusters[idx] = reverse_coref
            break

# shift를 기준 offset보다 큰 mention 전체에 적용 (간격 유지)
for base_offset, shift in change_points:
    for cluster in clusters:
        for mention in cluster:
            if mention[0] > base_offset:
                mention[0] += shift
                mention[1] += shift

# linkbank_json의 clusters에 반영  
litbank_json['clusters'] = clusters

In [6]:
def apply_reverse_coref_to_json(litbank_df, litbank_json):
    # litbank_json의 cluster 정렬
    clusters = copy.deepcopy(litbank_json['clusters'])
    for cluster in clusters:
        cluster.sort(key=lambda x: x[0])

    # litbank_df의 coref 정렬
    litbank_df['coref'] = litbank_df['coref'].apply(
        lambda cluster: sorted(cluster, key=lambda x: x[0])
    )

    # 1. update_coref의 offset 변화량을 coref에 반영 -> reverse_coref 생성
    reverse_coref = []
    for i in range(len(litbank_df)):
        coref = litbank_df.iloc[i]['coref']
        adjusted_offsets = litbank_df.iloc[i]['adjusted_offsets']
        update_coref = litbank_df.iloc[i]['update_coref']

        if isinstance(coref, str):
            coref = ast.literal_eval(coref)
        if isinstance(adjusted_offsets, str):
            adjusted_offsets = ast.literal_eval(adjusted_offsets)
        if isinstance(update_coref, str):
            update_coref = ast.literal_eval(update_coref)

        reverse = []
        for j in range(len(update_coref)):
            start_delta = update_coref[j][0] - adjusted_offsets[j][0]
            end_delta = update_coref[j][1] - adjusted_offsets[j][1]
            new_start = coref[j][0] + start_delta
            new_end = coref[j][1] + end_delta
            reverse.append([new_start, new_end])
        reverse_coref.append(reverse)

    litbank_df['reverse_coref'] = reverse_coref

    # 2. reverse_coref를 json의 clusters로 교체
    change_points = []
    for row in litbank_df.itertuples():
        coref = row.coref
        reverse_coref = row.reverse_coref

        if isinstance(coref, str):
            coref = ast.literal_eval(coref)
        if isinstance(reverse_coref, str):
            reverse_coref = ast.literal_eval(reverse_coref)

        for j in range(len(coref)):
            original = coref[j]
            updated = reverse_coref[j]

            original_length = original[1] - original[0]
            updated_length = updated[1] - updated[0]
            shift = updated_length - original_length

            if shift != 0:
                print(f"✅ coref[{original[0]}, {original[1]}] → [{updated[0]}, {updated[1]}] | shift: +{shift}")
                change_points.append((original[0], shift))

        for idx, cluster in enumerate(clusters):
            if cluster == coref:
                clusters[idx] = reverse_coref
                print(f" 클러스터 교체 완료: {coref} → {reverse_coref}")
                break

    for base_offset, shift in change_points:
        for cluster in clusters:
            for mention in cluster:
                if mention[0] > base_offset:
                    mention[0] += shift
                    mention[1] += shift
                    print(f"↪ 이후 클러스터 mention 업데이트: base > {base_offset} → [{mention[0] - shift}, {mention[1] - shift}] → [{mention[0]}, {mention[1]}]")

    litbank_json['clusters'] = clusters
    return litbank_json

In [7]:
# 예: 첫 번째 파일에 적용
test_json = litbank_jsons[0]
test_json_result = apply_reverse_coref_to_json(best_litbank_dfs[0], test_json)

✅ coref[144, 145] → [144, 146] | shift: +1
✅ coref[605, 606] → [606, 608] | shift: +1
✅ coref[393, 393] → [393, 394] | shift: +1
✅ coref[382, 382] → [382, 384] | shift: +2
↪ 이후 클러스터 mention 업데이트: base > 144 → [905, 905] → [906, 906]
↪ 이후 클러스터 mention 업데이트: base > 144 → [910, 910] → [911, 911]
↪ 이후 클러스터 mention 업데이트: base > 144 → [462, 463] → [463, 464]
↪ 이후 클러스터 mention 업데이트: base > 144 → [465, 466] → [466, 467]
↪ 이후 클러스터 mention 업데이트: base > 144 → [685, 686] → [686, 687]
↪ 이후 클러스터 mention 업데이트: base > 144 → [730, 738] → [731, 739]
↪ 이후 클러스터 mention 업데이트: base > 144 → [749, 749] → [750, 750]
↪ 이후 클러스터 mention 업데이트: base > 144 → [879, 880] → [880, 881]
↪ 이후 클러스터 mention 업데이트: base > 144 → [248, 251] → [249, 252]
↪ 이후 클러스터 mention 업데이트: base > 144 → [454, 458] → [455, 459]
↪ 이후 클러스터 mention 업데이트: base > 144 → [449, 451] → [450, 452]
↪ 이후 클러스터 mention 업데이트: base > 144 → [611, 613] → [612, 614]
↪ 이후 클러스터 mention 업데이트: base > 144 → [163, 164] → [164, 165]
↪ 이후 클러스터 mention 업데이트: base > 144 

In [8]:
final_litbank_jsons = [
    apply_reverse_coref_to_json(best_litbank_dfs[i], litbank_jsons[i])
    for i in range(len(best_litbank_dfs))
]

✅ coref[144, 145] → [144, 146] | shift: +1
✅ coref[605, 606] → [606, 608] | shift: +1
✅ coref[393, 393] → [393, 394] | shift: +1
✅ coref[382, 382] → [382, 384] | shift: +2
↪ 이후 클러스터 mention 업데이트: base > 144 → [910, 910] → [911, 911]
↪ 이후 클러스터 mention 업데이트: base > 144 → [915, 915] → [916, 916]
↪ 이후 클러스터 mention 업데이트: base > 144 → [466, 467] → [467, 468]
↪ 이후 클러스터 mention 업데이트: base > 144 → [469, 470] → [470, 471]
↪ 이후 클러스터 mention 업데이트: base > 144 → [690, 691] → [691, 692]
↪ 이후 클러스터 mention 업데이트: base > 144 → [735, 743] → [736, 744]
↪ 이후 클러스터 mention 업데이트: base > 144 → [754, 754] → [755, 755]
↪ 이후 클러스터 mention 업데이트: base > 144 → [884, 885] → [885, 886]
↪ 이후 클러스터 mention 업데이트: base > 144 → [249, 252] → [250, 253]
↪ 이후 클러스터 mention 업데이트: base > 144 → [458, 462] → [459, 463]
↪ 이후 클러스터 mention 업데이트: base > 144 → [453, 455] → [454, 456]
↪ 이후 클러스터 mention 업데이트: base > 144 → [616, 618] → [617, 619]
↪ 이후 클러스터 mention 업데이트: base > 144 → [164, 165] → [165, 166]
↪ 이후 클러스터 mention 업데이트: base > 144 

### 2. 증강된 update sentence를 json sentences에 반영 

In [9]:
def apply_update_text_mentions(litbank_df, litbank_json):

    # 문장별 토큰 접근
    for row in litbank_df.itertuples():
        text = row.text
        update_text = row.update_text

        # 문자열이면 리스트로 변환
        if isinstance(text, str):
            text = ast.literal_eval(text)
        if isinstance(update_text, str):
            update_text = ast.literal_eval(update_text)

        # mention 단위 비교
        for original, updated in zip(text, update_text):
            for s_idx, sentence in enumerate(litbank_json['sentences']):
                n = len(original)
                for i in range(len(sentence) - n + 1):
                    if sentence[i:i + n] == original:
                        litbank_json['sentences'][s_idx] = (
                            sentence[:i] + updated + sentence[i + n:]
                        )
                        print(f"✅ 문장 {s_idx}에서 '{' '.join(original)}' → '{' '.join(updated)}'")
                        break
                else:
                    continue
                break  # 한 문장에 하나만 교체
    return litbank_json


In [16]:
# 예: 첫 번째 파일에 적용
# test_json = litbank_jsons[0] 
# test_df = best_litbank_dfs[0]
# test_json_result = apply_update_text_mentions(test_df, test_json)

for i in range(80):
    litbank_jsons[i] = apply_update_text_mentions(best_litbank_dfs[i], litbank_jsons[i])

✅ 문장 7에서 'Derbyshire' → 'Derbyshire'
✅ 문장 7에서 'Derbyshire' → 'picturesque Derbyshire'
✅ 문장 11에서 'Selby' → 'Selby'
✅ 문장 11에서 'Selby' → 'the nearby Selby'
✅ 문장 2에서 'Chicago' → 'Chicago'
✅ 문장 2에서 'Chicago' → 'the thriving Chicago'
✅ 문장 11에서 'America' → 'America'
✅ 문장 11에서 'America' → 'the vast America'
✅ 문장 70에서 'Baker Street' → 'Baker Street'
✅ 문장 70에서 'Baker Street' → 'the famous Baker Street'
✅ 문장 80에서 'Bradshaw' → 'Bradshaw'
✅ 문장 50에서 'Kirby' → 'Kirby'
✅ 문장 50에서 'Kirby' → 'the busy Kirby'
✅ 문장 20에서 'here' → 'here'
✅ 문장 5에서 'a large rabbit-hole under the hedge' → 'a large rabbit-hole under the hedge'
✅ 문장 16에서 'Normandy' → 'Normandy'
✅ 문장 16에서 'Normandy' → 'the historic Normandy'
✅ 문장 22에서 'England' → 'England'
✅ 문장 22에서 'England' → 'historic England'
✅ 문장 23에서 'Brian' → 'Brian'
✅ 문장 0에서 'Bramblehurst' → 'Bramblehurst'
✅ 문장 0에서 'Bramblehurst' → 'the busy Bramblehurst'
✅ 문장 7에서 'they' → 'they'
✅ 문장 12에서 'It' → 'It'
✅ 문장 12에서 'a queer little garden' → 'a queer little garden'
✅ 문장 6에서 'th

- 변경된 coref와 sentence를 반영한 litbank_json을 저장

In [17]:
with jsonlines.open("../data/train.litbank_iaa.jsonlines", mode='w') as writer:
    for item in final_litbank_jsons:
        writer.write(item)

- LitBank IAA Json 복원 잘 되었는지 확인

In [82]:
# Load NewLitBank Data
litbank_dfs_folder = os.listdir("../data/NewLitBank_IAA")
litbank_dfs_folder = sorted(litbank_dfs_folder, key=lambda x: int(re.search(r'\d+', x).group()))
def safe_literal_eval(val):
    try:
        return ast.literal_eval(val)
    except (ValueError, SyntaxError):
        return val  

def load_and_process_litbank(case_value):
    dfs = []
    for fname in litbank_dfs_folder:
        df = pd.read_csv(f"../data/NewLitBank_IAA/{fname}").drop(
            columns=['Unnamed: 0', 'labels', 'inference_offsets'], errors='ignore'
        )
        df = df.loc[df['cases'] == case_value].copy()
        for col in df.columns:
            df[col] = df[col].apply(safe_literal_eval)
        dfs.append(df)
    return dfs

best_litbank_dfs = load_and_process_litbank(0)
litbank_df = best_litbank_dfs[0]
litbank_df[0:1]

Unnamed: 0,coref,extracted_sentence,adjusted_offsets,text,update_text,update_sentence,update_coref,cases
1,"[[43, 53], [605, 606], [144, 145]]","[There, lived, the, colliers, who, worked, in,...","[[2, 12], [30, 31], [39, 40]]","[[the, colliers, who, worked, in, the, little,...","[[the, colliers, who, worked, in, the, little,...","[There, lived, the, colliers, who, worked, in,...","[[2, 12], [30, 32], [40, 42]]",0


In [87]:
# 원본 text
litbank_df[0:1]['text'].iloc[0]

[['the',
  'colliers',
  'who',
  'worked',
  'in',
  'the',
  'little',
  'gin-pits',
  'two',
  'fields',
  'away'],
 ['the', 'colliers'],
 ['these', 'coal-miners']]

In [84]:
# 업데이트된 text
# hardworking, dedicated 수식어구 추가됨
litbank_df[0:1]['update_text'].iloc[0]

[['the',
  'colliers',
  'who',
  'worked',
  'in',
  'the',
  'little',
  'gin-pits',
  'two',
  'fields',
  'away'],
 ['the', 'hardworking', 'colliers'],
 ['these', 'dedicated', 'coal-miners']]

In [97]:
# Load LitBank IAA Json Data
original_docs=[]
with jsonlines.open("../data/train.litbank_iaa.jsonlines") as read_file:
    for line in read_file.iter():
        original_docs.append(line)
flat_tokens = sum(original_docs[0]['sentences'], [])    
        
litbank_iaa_json = original_docs[0]
litbank_iaa_df = pd.DataFrame(original_docs)[['doc_key', 'sentences', 'clusters']]
litbank_iaa_df[:1]

Unnamed: 0,doc_key,sentences,clusters
0,217_sons_and_lovers_brat_0,"[[PART, ONE, CHAPTER, I, THE, EARLY, MARRIED, ...","[[[9, 10], [915, 915], [920, 920]], [[12, 13],..."


In [100]:
print(litbank_iaa_df['sentences'].iloc[0][5])

['And', 'the', 'cottages', 'of', 'these', 'dedicated', 'coal-miners', ',', 'in', 'blocks', 'and', 'pairs', 'here', 'and', 'there', ',', 'together', 'with', 'odd', 'farms', 'and', 'homes', 'of', 'the', 'stockingers', ',', 'straying', 'over', 'the', 'parish', ',', 'formed', 'the', 'village', 'of', 'Bestwood', '.']


In [None]:
def get_mentions_from_coref(coref_spans, flat_tokens):
    mentions = []
    for span in coref_spans:
        start, end = span
        mention = flat_tokens[start:end+1]  # end 포함
        mentions.append(mention)
    return mentions

In [101]:
# 0번째 문서의 문장과 클러스터
litbank_iaa_df_0 = litbank_iaa_df.iloc[0]
sentences_0 = litbank_iaa_df_0['sentences']
clusters_0 = litbank_iaa_df_0['clusters']

In [108]:
flat_tokens[605:606+1]

['for', 'the']

In [78]:
for i, cluster in enumerate(clusters_0):
    mentions = get_mentions_from_coref(cluster, flat_tokens)
    print(f"{i}번째 cluster offset : {cluster}")
    print(f"{i}번째 cluster에 해당하는 mention : {mentions}")

0번째 cluster offset : [[9, 10], [915, 915], [920, 920]]
0번째 cluster에 해당하는 mention : [['THE', 'MORELS'], ['Her'], ['.']]
1번째 cluster offset : [[12, 13], [470, 471], [473, 474], [695, 696], [740, 748], [759, 759], [889, 890]]
1번째 cluster에 해당하는 mention : [['THE', 'BOTTOMS'], ['the', 'site'], ['Hell', 'Row'], ['the', 'actual'], ['was', 'not', 'anxious', 'to', 'move', 'into', 'the', 'Bottoms', ','], ['path'], ['little', 'from']]
2번째 cluster offset : [[18, 19], [22, 23], [41, 41], [250, 253], [462, 466]]
2번째 cluster에 해당하는 mention : [['Hell', 'Row'], ['Hell', 'Row'], ['There'], ['Forest', '.', 'About', 'this'], ['then', ',', 'in', 'the', 'brook']]
3번째 cluster offset : [[35, 36], [457, 459]]
3번째 cluster에 해당하는 mention : [['the', 'brookside'], ['hillside', 'of', 'Bestwood']]
4번째 cluster offset : [[38, 39]]
4번째 cluster에 해당하는 mention : [['Greenhill', 'Lane']]
5번째 cluster offset : [[25, 39]]
5번째 cluster에 해당하는 mention : [['a', 'block', 'of', 'thatched', ',', 'bulging', 'cottages', 'that', 'stood', 'b

In [72]:
coref_spans = clusters_0
coref_spans

[[9, 10], [915, 915], [920, 920]]

In [73]:
get_mentions_from_coref(coref_spans, flat_tokens)

[['THE', 'MORELS'], ['Her'], ['.']]

### 3. Splitt

In [904]:
# Load LitBank Json Data
litbank_jsons=[]
with jsonlines.open("../data/LitBank/train.english.jsonlines") as read_file:
    for line in read_file.iter():
        litbank_jsons.append(line)

# Load LitBank Splitted Json Data
litbank_splitted_jsons=[]
with jsonlines.open("../data/LitBank/train-splitted.english.jsonlines") as read_file:
    for line in read_file.iter():
        litbank_splitted_jsons.append(line)       
        
# Load LitBank IAA Json Data
litbank_iaa_jsons=[]
with jsonlines.open("../data/train.litbank_iaa.jsonlines") as read_file:
    for line in read_file.iter():
        litbank_iaa_jsons.append(line)

In [905]:
print("LitBank Json Data : ", len(litbank_jsons))
print("LitBank Splitted Json Data : ", len(litbank_splitted_jsons))
print("LitBank IAA Json Data : ", len(litbank_iaa_jsons))

LitBank Json Data :  80
LitBank Splitted Json Data :  160
LitBank IAA Json Data :  80


In [922]:
# Preprocess: flatten sentences & speakers (for internal use)
def preprocess_litbank_doc(doc):
    if "sentences" not in doc or "speakers" not in doc or "clusters" not in doc:
        print(f"❌ Skipping {doc.get('doc_key', 'unknown')} due to missing keys.")
        return None

    tokens = []
    speakers = []
    sentence_spans = []  # (start_token_index, end_token_index)
    idx = 0
    for sentence, speaker_line in zip(doc["sentences"], doc["speakers"]):
        tokens.extend(sentence)
        speakers.extend(speaker_line)
        start_idx = idx
        end_idx = idx + len(sentence) - 1
        sentence_spans.append((start_idx, end_idx))
        idx = end_idx + 1

    return {
        "doc_key": doc["doc_key"],
        "sentences": doc["sentences"],
        "speakers": doc["speakers"],
        "clusters": doc["clusters"],
        "tokens": tokens,
        "sentence_spans": sentence_spans
    }


# Split into two halves based on tokens
def split_document(doc):
    tokens = doc["tokens"]
    sentence_spans = doc["sentence_spans"]
    num_tokens = len(tokens)
    split_point = num_tokens // 2

    # 문장을 기준으로 앞뒤 반 자르기
    split_idx = 0
    for i, (start, end) in enumerate(sentence_spans):
        if end >= split_point:
            split_idx = i + 1
            break

    split_docs = []
    for i in range(2):
        if i == 0:
            sent_start, sent_end = 0, split_idx
            token_offset = 0
        else:
            sent_start, sent_end = split_idx, len(doc["sentences"])
            token_offset = sentence_spans[split_idx][0]

        new_sentences = doc["sentences"][sent_start:sent_end]
        new_speakers = doc["speakers"][sent_start:sent_end]

        # 문장 전체 토큰 이어붙여서 새 token index 범위 만듦
        new_token_count = sum(len(s) for s in new_sentences)

        # clusters 재구성 (해당 범위 안에 완전히 포함된 span만)
        new_clusters = []
        for cluster in doc["clusters"]:
            new_cluster = []
            for span in cluster:
                if token_offset <= span[0] <= span[1] < token_offset + new_token_count:
                    new_cluster.append([span[0] - token_offset, span[1] - token_offset])
            if new_cluster:
                new_clusters.append(new_cluster)

        new_doc = {
            "doc_key": f"{doc['doc_key']}_{i}",
            "sentences": new_sentences,
            "speakers": new_speakers,
            "clusters": new_clusters,
        }
        split_docs.append(new_doc)

    return split_docs

In [923]:
# 전체 파이프라인 실행
litbank_iaa_jsons = []
for doc in litbank_jsons:
    processed = preprocess_litbank_doc(doc)
    if processed:
        litbank_iaa_jsons.append(processed)

litbank_iaa_splitted_jsons = []
for doc in litbank_iaa_jsons:
    litbank_iaa_splitted_jsons.extend(split_document(doc))


# 결과 저장
print(f"✅ 원래 문서 수: {len(litbank_iaa_jsons)}")
print(f"✅ 분할된 문서 수: {len(litbank_iaa_splitted_jsons)}")  # 기대: 160

with jsonlines.open("../data/train-splitted.litbank_iaa.jsonlines", mode='w') as writer:
    for doc in litbank_iaa_splitted_jsons:
        writer.write(doc)

✅ 원래 문서 수: 80
✅ 분할된 문서 수: 160
