# KG 성능 비교 실험 (ROUGE 기반 평가)


In [1]:
import pandas as pd
from rouge_score import rouge_scorer

# 데이터 불러오기
kg = pd.read_csv("kg_triples_test.csv")
test_df = pd.read_csv("test.csv")

# 트리플 결합 텍스트 만들기
kg["triple_text"] = kg["subject"] + " " + kg["predicate"] + " " + kg["object"]


In [2]:
# 간단한 유사도 기반 KG 검색 함수
def find_relevant_kg(text, kg_texts, top_k=3):
    matches = []
    for t in kg_texts:
        score = sum([1 for w in t.split() if w in text])
        matches.append((t, score))
    matches.sort(key=lambda x: x[1], reverse=True)
    return [x[0] for x in matches[:top_k]]


In [3]:
# 프롬프트 구성 함수
def build_prompt(text, kg_hits):
    context = "\n".join(kg_hits)
    return f"다음은 참고 지식입니다:\n{context}\n\n사용자 입력: {text}\n답변:"


In [4]:
# ROUGE 평가 함수
def evaluate_with_rouge(predictions, references):
    scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
    scores = [scorer.score(ref, pred)["rougeL"].fmeasure for pred, ref in zip(predictions, references)]
    return sum(scores) / len(scores)


In [5]:
from random import randint

# 샘플 10개 추출
sample = test_df.sample(10, random_state=42)

# 1. KG 없이 (baseline)
preds_no_kg = ["답변: " + text[:20] for text in sample["input"]]

# 2. KG 사용
preds_with_kg = []
for text in sample["input"]:
    hits = find_relevant_kg(text, kg["triple_text"])
    prompt = build_prompt(text, hits)
    preds_with_kg.append("답변: " + text[:20] + " (지식반영)")

# 평가
rouge_no_kg = evaluate_with_rouge(preds_no_kg, sample["output"])
rouge_with_kg = evaluate_with_rouge(preds_with_kg, sample["output"])

print(f"KG 미사용 시 ROUGE-L: {rouge_no_kg:.4f}")
print(f"KG 사용 시 ROUGE-L: {rouge_with_kg:.4f}")


KeyError: 'input'

In [24]:
# ✅ 1. 필요한 라이브러리 로딩
import pandas as pd
import subprocess

# ✅ 2. Knowledge Graph 로딩 및 텍스트 구성
kg = pd.read_csv("kg_triples_test.csv")
kg["triple_text"] = kg["subject"] + " " + kg["predicate"] + " " + kg["object"]

# ✅ 3. 테스트 질문 로딩
test_df = pd.read_csv("test.csv")

# ✅ 4. KG 검색 함수
def find_relevant_kg(user_input, kg_texts, topk=3):
    return [t for t in kg_texts if any(word in user_input for word in t.split())][:topk]

# ✅ 5. 프롬프트 생성 함수
def build_prompt(user_input, kg_hits=None):
    prompt = f"질문: {user_input}\n"
    if kg_hits:
        prompt += "배경지식:\n"
        for hit in kg_hits:
            prompt += f"- {hit}\n"
    prompt += "답변:"
    return prompt

# ✅ 6. 로컬 Ollama 모델 호출 함수 (예: llama3)
def run_ollama(prompt, model="llama3"):
    result = subprocess.run(
        ["ollama", "run", model],
        input=prompt.encode("utf-8"),
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        timeout=30
    )
    return result.stdout.decode("utf-8").strip()

# ✅ 7. 질문 입력 → KG 사용 여부에 따른 비교
sample = test_df.sample(1, random_state=42)
user_input = sample["sentence1"].values[0]
true_answer = sample["sentence2"].values[0]

kg_hits = find_relevant_kg(user_input, kg["triple_text"])
prompt_kg = build_prompt(user_input, kg_hits)
prompt_no_kg = build_prompt(user_input)

print("📌 질문:", user_input)
print("\n[프롬프트 - KG 미사용]\n", prompt_no_kg)
print("\n[프롬프트 - KG 사용]\n", prompt_kg)

# ✅ 8. Ollama 실행 및 출력 결과
print("\n🤖 [KG 미사용 답변]\n")
print(run_ollama(prompt_no_kg))

print("\n🤖 [KG 사용 답변]\n")
print(run_ollama(prompt_kg))

📌 질문: 라이스 페이퍼의 두께나 구성이 일반적이지 않아 조금 놀랐습니다. 아마, 식사량을 충분히 하는 것이 목표였을건데, 이로 인해 월남쌈의 질감이나 맛을 느끼는 부분에서 조금 힘들었습니다. 개인적으로는 훠궈 육수는 조금 아쉬움이 남았지만, 다른 메뉴들 중에서는 고기가 빼어난 맛을 보였습니다.

[프롬프트 - KG 미사용]
 질문: 라이스 페이퍼의 두께나 구성이 일반적이지 않아 조금 놀랐습니다. 아마, 식사량을 충분히 하는 것이 목표였을건데, 이로 인해 월남쌈의 질감이나 맛을 느끼는 부분에서 조금 힘들었습니다. 개인적으로는 훠궈 육수는 조금 아쉬움이 남았지만, 다른 메뉴들 중에서는 고기가 빼어난 맛을 보였습니다.
답변:

[프롬프트 - KG 사용]
 질문: 라이스 페이퍼의 두께나 구성이 일반적이지 않아 조금 놀랐습니다. 아마, 식사량을 충분히 하는 것이 목표였을건데, 이로 인해 월남쌈의 질감이나 맛을 느끼는 부분에서 조금 힘들었습니다. 개인적으로는 훠궈 육수는 조금 아쉬움이 남았지만, 다른 메뉴들 중에서는 고기가 빼어난 맛을 보였습니다.
배경지식:
- 사용자 질문한다 다른 사람의 의도
답변:

🤖 [KG 미사용 답변]



FileNotFoundError: [WinError 2] 지정된 파일을 찾을 수 없습니다

In [None]:
import torch
from transformers import BertTokenizer, BertForMaskedLM
import pandas as pd

# CPU로 강제 설정
device = torch.device("cpu")
torch.set_default_tensor_type(torch.FloatTensor)

# KoBERT 모델과 토크나이저 로드
tokenizer = BertTokenizer.from_pretrained('monologg/kobert')
model = BertForMaskedLM.from_pretrained('monologg/kobert')
model.to(device)
model.eval()

# KG 데이터 불러오기
kg_df = pd.read_csv("kg_triples_test.csv")
if 'triple_text' not in kg_df.columns:
    kg_df['triple_text'] = kg_df['subject'] + " " + kg_df['predicate'] + " " + kg_df['object']

# 배경지식 찾기
def find_relevant_kg(question, kg_df):
    hits = []
    for triple in kg_df['triple_text']:
        if any(word in triple for word in question.split()):
            hits.append(triple)
    return hits[:3]

# 프롬프트 생성
def build_prompt(question, kg_hits=None):
    prompt = ""
    if kg_hits:
        prompt += "배경지식:\n"
        for hit in kg_hits:
            prompt += f"- {hit}\n"
    prompt += f"질문: {question}"
    return prompt

# 답변 생성
def generate_answer(prompt):
    if "[MASK]" not in prompt:
        return "질문에 [MASK]가 포함되어야 합니다."
    
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    
    mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
    mask_token_logits = outputs.logits[0, mask_token_index, :]
    top_token = torch.argmax(mask_token_logits, dim=1)
    predicted_token = tokenizer.decode(top_token)
    
    return prompt.replace("[MASK]", predicted_token)

# 사용자 입력
user_question = input("질문을 입력하세요 (예: 나는 [MASK] 기분이다): ")

# KG 미사용
prompt_no_kg = build_prompt(user_question)
answer_no_kg = generate_answer(prompt_no_kg)

# KG 사용
kg_hits = find_relevant_kg(user_question, kg_df)
prompt_with_kg = build_prompt(user_question, kg_hits)
answer_with_kg = generate_answer(prompt_with_kg)

print("\n KG 미사용:")
print("프롬프트:", prompt_no_kg)
print("답변:", answer_no_kg)

print("\n KG 사용:")
print("프롬프트:", prompt_with_kg)
print("답변:", answer_with_kg)


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'KoBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.
Some weights of BertForMaskedLM were not initialized from the model checkpoint at monologg/kobert and are newly initialized: ['cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



📌 KG 미사용:
프롬프트: 질문: 질문: 나는 [MASK] 기분이다
답변: 질문: 질문: 나는 꽉 기분이다

📌 KG 사용:
프롬프트: 질문: 질문: 나는 [MASK] 기분이다
답변: 질문: 질문: 나는 꽉 기분이다


In [None]:
import subprocess
import pandas as pd

# Ollama 실행 경로 (직접 지정)
OLLAMA_PATH = r"C:\Users\lsm40\AppData\Local\Programs\Ollama\ollama.exe"

# 사용자 질문 입력
user_input = input("질문을 입력하세요: ")

# KG 파일 불러오기
kg_df = pd.read_csv("kg_triples_test.csv")  # 경로는 필요에 따라 수정

# KG에서 관련 정보 추출 (간단한 키워드 기반 검색)
def find_relevant_kg(question, kg_df):
    hits = []
    for idx, row in kg_df.iterrows():
        triple = f"{row['subject']}, {row['predicate']}, {row['object']}"
        if any(word in triple for word in question.split()):
            hits.append(triple)
    return hits[:3]  # 최대 3개까지만 사용

# 프롬프트 생성
def build_prompt(user_input, kg_hits=None):
    prompt = ""
    if kg_hits:
        prompt += "배경지식:\n"
        for hit in kg_hits:
            prompt += f"- {hit}\n"
    prompt += f"질문: {user_input}\n답변:"
    return prompt

#  Llama3로 답변 생성
def run_ollama(prompt, model="llama3"):
    try:
        result = subprocess.run(
            [OLLAMA_PATH, "run", model],
            input=prompt.encode("utf-8"),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            timeout=60
        )
        return result.stdout.decode("utf-8").strip()
    except FileNotFoundError:
        return " Ollama 실행 파일을 찾을 수 없습니다. 경로를 다시 확인하세요."
    except subprocess.TimeoutExpired:
        return " 실행 시간이 초과되었습니다."
    except Exception as e:
        return f" 오류 발생: {e}"

#  KG 검색 결과 가져오기
kg_hits = find_relevant_kg(user_input, kg_df)

#  프롬프트 생성
prompt_kg = build_prompt(user_input, kg_hits)
prompt_no_kg = build_prompt(user_input)

#  출력
print("\n [KG 미사용 프롬프트]\n", prompt_no_kg)
print("\n [KG 미사용 답변]\n", run_ollama(prompt_no_kg))

print("\n [KG 사용 프롬프트]\n", prompt_kg)
print("\n [KG 사용 답변]\n", run_ollama(prompt_kg))



📌 [KG 미사용 프롬프트]
 질문: 피곤해
답변:

🤖 [KG 미사용 답변]
 ⏱️ 실행 시간이 초과되었습니다.

📌 [KG 사용 프롬프트]
 질문: 피곤해
답변:

🤖 [KG 사용 답변]
 ⏱️ 실행 시간이 초과되었습니다.


In [None]:
import subprocess
import pandas as pd
import time

#  Ollama 실행 파일 경로 (직접 확인된 경로로 수정 필요)
OLLAMA_PATH = r"C:\Users\lsm40\AppData\Local\Programs\Ollama\ollama.exe"

#  사용자 질문 실시간 입력
user_input = input("질문을 입력하세요: ").strip()

#  KG 그래프 CSV 로딩 (컬럼명 자동 감지)
kg_path = "./kg_triples_test.csv"  # 예: 'triple_text' 열이 있는 파일
kg_df = pd.read_csv(kg_path)
col = kg_df.columns[0]  # 첫 번째 열 자동 인식

#  KG에서 관련된 배경지식 추출 함수
def find_relevant_kg(question, df, column):
    hits = []
    for triple in df[column]:
        if any(word in triple for word in question.split()):
            hits.append(triple)
    return hits[:3]

#  프롬프트 생성 함수
def build_prompt(question, kg_hits=None):
    prompt = ""
    if kg_hits:
        prompt += "배경지식:\n"
        for hit in kg_hits:
            prompt += f"- {hit}\n"
    prompt += f"질문: {question}\n답변:"
    return prompt

#  Llama3 실행 함수
def run_ollama(prompt, timeout=60):
    try:
        result = subprocess.run(
            [OLLAMA_PATH, "run", "llama3"],
            input=prompt.encode("utf-8"),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            timeout=timeout
        )
        if result.stderr:
            print(" stderr:", result.stderr.decode("utf-8").strip())
        return result.stdout.decode("utf-8").strip()
    except subprocess.TimeoutExpired:
        return " 실행 시간이 초과되었습니다."
    except FileNotFoundError:
        return " Ollama 실행 파일을 찾을 수 없습니다. 경로를 확인하세요."

#  KG 기반 프롬프트 구성
kg_hits = find_relevant_kg(user_input, kg_df, col)
prompt_no_kg = build_prompt(user_input)
prompt_kg = build_prompt(user_input, kg_hits)

#  실행 및 결과 출력
print("\n [KG 미사용 프롬프트]\n", prompt_no_kg)
print("\n [KG 미사용 답변]\n", run_ollama(prompt_no_kg))

print("\n [KG 사용 프롬프트]\n", prompt_kg)
print("\n [KG 사용 답변]\n", run_ollama(prompt_kg))



📌 [KG 미사용 프롬프트]
 질문: 나 피곤해
답변:

🤖 [KG 미사용 답변]
 ⏱️ 실행 시간이 초과되었습니다.

📌 [KG 사용 프롬프트]
 질문: 나 피곤해
답변:

🤖 [KG 사용 답변]
 ⏱️ 실행 시간이 초과되었습니다.


In [17]:
import pandas as pd
import torch
import torch.nn as nn
from torch.optim import AdamW
from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import json

model_name = 'monologg/kobert'
tokenizer = BertTokenizer.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def load_jsonl(path):
    with open(path, "r", encoding="utf-8") as f:
        return [json.loads(line) for line in f]

data = load_jsonl("poetry.jsonl")
df = pd.DataFrame(data)

# 라벨 처리
label_map = {"human": 0, "ai": 1}
if df['label'].dtype == object:
    df['label'] = df['label'].map(label_map)
df = df.dropna(subset=['label'])
df['label'] = df['label'].astype(int)

# KG 로드
kg_df = pd.read_csv("kg_triples_test.csv")
kg_texts = [
    f"{r['subject']} {r['predicate']} {r['object']}"
    for _, r in kg_df.iterrows()
]

def find_relevant_kg(text, kg_texts, topk=3):
    hits = [kg for kg in kg_texts if any(tok in kg for tok in text.split())]
    return " ".join(hits[:topk])

class PoetryDataset(Dataset):
    def __init__(self, df, tokenizer, use_kg=False):
        self.df = df.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.use_kg = use_kg

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        text = row['text']
        kg = find_relevant_kg(text, kg_texts) if self.use_kg else None

        # 수정된 부분: truncation=True 로 통일
        encoded = self.tokenizer(
            text,
            kg,
            padding='max_length',
            truncation=True,
            max_length=128,
            return_tensors='pt'
        )
        item = {k: v.squeeze(0) for k, v in encoded.items()}
        item['labels'] = torch.tensor(row['label'], dtype=torch.long)
        return item

train_df, test_df = train_test_split(
    df, test_size=0.2, stratify=df['label'], random_state=42
)

def get_model():
    return BertForSequenceClassification.from_pretrained(
        model_name, num_labels=2
    ).to(device)

def train_model(model, loader, epochs=3):
    optimizer = AdamW(model.parameters(), lr=5e-5)
    model.train()
    for epoch in range(epochs):
        for batch in loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            outputs.loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        print(f"Epoch {epoch+1} complete")

def evaluate(model, loader):
    model.eval()
    preds, labels = [], []
    with torch.no_grad():
        for batch in loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            logits = model(**batch).logits
            preds += torch.argmax(logits, axis=1).cpu().tolist()
            labels += batch['labels'].cpu().tolist()
    print(classification_report(labels, preds, target_names=["human", "ai"], zero_division=0))

# KG 미사용
model = get_model()
train_loader = DataLoader(
    PoetryDataset(train_df, tokenizer, use_kg=False),
    batch_size=8, shuffle=True, pin_memory=True
)
test_loader = DataLoader(
    PoetryDataset(test_df, tokenizer, use_kg=False),
    batch_size=8, shuffle=False, pin_memory=True
)
print("Training without KG")
train_model(model, train_loader, epochs=3)
print("Evaluation without KG")
evaluate(model, test_loader)

# KG 사용
model = get_model()
train_loader = DataLoader(
    PoetryDataset(train_df, tokenizer, use_kg=True),
    batch_size=8, shuffle=True, pin_memory=True
)
test_loader = DataLoader(
    PoetryDataset(test_df, tokenizer, use_kg=True),
    batch_size=8, shuffle=False, pin_memory=True
)
print("Training with KG")
train_model(model, train_loader, epochs=3)
print("Evaluation with KG")
evaluate(model, test_loader)


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'KoBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at monologg/kobert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training without KG
Epoch 1 complete
Epoch 2 complete
Epoch 3 complete
Evaluation without KG
              precision    recall  f1-score   support

       human       0.00      0.00      0.00        38
          ai       0.80      1.00      0.89       151

    accuracy                           0.80       189
   macro avg       0.40      0.50      0.44       189
weighted avg       0.64      0.80      0.71       189



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at monologg/kobert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Training with KG


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Epoch 1 complete


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Epoch 2 complete


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Epoch 3 complete
Evaluation with KG


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

              precision    recall  f1-score   support

       human       0.00      0.00      0.00        38
          ai       0.80      1.00      0.89       151

    accuracy                           0.80       189
   macro avg       0.40      0.50      0.44       189
weighted avg       0.64      0.80      0.71       189



In [18]:
import pandas as pd
import torch
from torch.optim import AdamW
from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import json

# 1) 데이터 로드
def load_jsonl(path):
    with open(path, "r", encoding="utf-8") as f:
        return [json.loads(line) for line in f]
data = load_jsonl("poetry.jsonl")
df = pd.DataFrame(data)
print(" 로드 후 샘플 수:", len(df))
print(" 라벨 분포 (로드 직후):\n", df['label'].value_counts())

# 2) 문자열 'human'/'ai' → 0/1 매핑 (이미 숫자면 건너뛰기)
label_map = {"human": 0, "ai": 1}
if df['label'].dtype == object:
    df['label'] = df['label'].map(label_map)

# 3) 숫자형으로 변환 & 0·1 외 나머지(NaN 포함) 삭제
df['label'] = pd.to_numeric(df['label'], errors='coerce').astype('Int64')
df = df[df['label'].isin([0, 1])].reset_index(drop=True)
df['label'] = df['label'].astype(int)

print(" 정제 후 샘플 수:", len(df))
print(" 라벨 분포 (정제 후):\n", df['label'].value_counts())

# 4) KG 불러오기
kg_df = pd.read_csv("kg_triples_test.csv")
kg_texts = (
    kg_df
    .apply(lambda r: f"{r['subject']} {r['predicate']} {r['object']}", axis=1)
    .tolist()
)
def find_relevant_kg(text, kg_texts, topk=3):
    hits = [kg for kg in kg_texts if any(tok in kg for tok in text.split())]
    return " ".join(hits[:topk])

# 5) Dataset 정의
class PoetryDataset(Dataset):
    def __init__(self, df, tokenizer, use_kg=False):
        self.df = df
        self.tokenizer = tokenizer
        self.use_kg = use_kg

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        text = self.df.iloc[idx]['text']
        if self.use_kg:
            kg = find_relevant_kg(text, kg_texts)
            text = f"{text} [SEP] {kg}"
        inputs = self.tokenizer(
            text,
            padding='max_length',
            max_length=128,
            truncation=True,
            return_tensors='pt'
        )
        item = {k: v.squeeze(0) for k, v in inputs.items()}
        item['labels'] = torch.tensor(self.df.iloc[idx]['label'], dtype=torch.long)
        return item

# 6) 데이터 분리
train_df, test_df = train_test_split(
    df, test_size=0.2, stratify=df['label'], random_state=42
)

# 7) 토크나이저 및 모델 준비
model_name = 'monologg/kobert'
tokenizer = BertTokenizer.from_pretrained(model_name)
device = torch.device("cpu")
def get_model():
    return BertForSequenceClassification.from_pretrained(
        model_name, num_labels=2
    ).to(device)

# 8) 학습/평가 함수
def train_model(model, loader, epochs=3):
    optim = AdamW(model.parameters(), lr=5e-5)
    model.train()
    for e in range(epochs):
        for batch in loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            loss = model(**batch).loss
            loss.backward()
            optim.step(); optim.zero_grad()
        print(f"Epoch {e+1} 완료")

def evaluate(model, loader):
    model.eval()
    preds, labs = [], []
    with torch.no_grad():
        for batch in loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            logits = model(**batch).logits
            preds += torch.argmax(logits, axis=1).cpu().tolist()
            labs  += batch['labels'].cpu().tolist()
    print(classification_report(labs, preds, target_names=['human','ai']))

# 9) KG 미사용
model = get_model()
dl_train = DataLoader(PoetryDataset(train_df, tokenizer, use_kg=False),
                      batch_size=8, shuffle=True)
dl_test  = DataLoader(PoetryDataset(test_df,  tokenizer, use_kg=False),
                      batch_size=8)
print(" KG 미사용 학습 시작")
train_model(model, dl_train)
print(" KG 미사용 결과")
evaluate(model, dl_test)

# 10) KG 사용
model = get_model()
dl_train = DataLoader(PoetryDataset(train_df, tokenizer, use_kg=True),
                      batch_size=8, shuffle=True)
dl_test  = DataLoader(PoetryDataset(test_df,  tokenizer, use_kg=True),
                      batch_size=8)
print("\n KG 사용 학습 시작")
train_model(model, dl_train)
print(" KG 사용 결과")
evaluate(model, dl_test)


 로드 후 샘플 수: 945
 라벨 분포 (로드 직후):
 label
1    756
0    189
Name: count, dtype: int64
 정제 후 샘플 수: 945
 라벨 분포 (정제 후):
 label
1    756
0    189
Name: count, dtype: int64


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'KoBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at monologg/kobert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


 KG 미사용 학습 시작
Epoch 1 완료


KeyboardInterrupt: 

In [14]:
import pandas as pd
import torch
import torch.nn as nn
from torch.optim import AdamW
from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    get_linear_schedule_with_warmup
)
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import json

# 1) 데이터 로드 함수
def load_jsonl(path):
    with open(path, "r", encoding="utf-8") as f:
        return [json.loads(line) for line in f]

# 2) 데이터 불러와서 DataFrame 생성
data = load_jsonl("poetry.jsonl")
df = pd.DataFrame(data)

# 3) 라벨 정제
label_map = {"human": 0, "ai": 1}
if df['label'].dtype == object:
    df['label'] = df['label'].map(label_map)
df['label'] = pd.to_numeric(df['label'], errors='coerce').astype('Int64')
df = df[df['label'].isin([0, 1])].reset_index(drop=True)
df['label'] = df['label'].astype(int)

print(" 최종 샘플 수:", len(df))
print(" 라벨 분포:\n", df['label'].value_counts())

# 4) KG 불러오기
kg_df = pd.read_csv("kg_triples_test.csv")
kg_texts = [
    f"{r['subject']} {r['predicate']} {r['object']}"
    for _, r in kg_df.iterrows()
]

def find_relevant_kg(text, kg_texts, topk=3):
    hits = [kg for kg in kg_texts if any(tok in kg for tok in text.split())]
    return " ".join(hits[:topk])

# 5) Dataset 정의
class PoetryDataset(Dataset):
    def __init__(self, df, tokenizer, use_kg=False):
        self.df = df.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.use_kg = use_kg

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        text = self.df.loc[idx, 'text']
        kg_text = find_relevant_kg(text, kg_texts) if self.use_kg else None

        inputs = self.tokenizer(
            text,
            kg_text,
            padding='max_length',
            max_length=128,
            truncation='only_first' if kg_text else True,
            return_tensors='pt'
        )
        item = {k: v.squeeze(0) for k, v in inputs.items()}
        item['labels'] = torch.tensor(self.df.loc[idx, 'label'], dtype=torch.long)
        return item

# 6) train/test split
train_df, test_df = train_test_split(
    df, test_size=0.2, stratify=df['label'], random_state=42
)

# 7) GPU 세팅
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(" Using device:", device)

# 8) tokenizer & model 함수
model_name = 'monologg/kobert'
tokenizer = BertTokenizer.from_pretrained(model_name)

def get_model():
    model = BertForSequenceClassification.from_pretrained(
        model_name, num_labels=2
    )
    return model.to(device)

# 9) Oversampling sampler
counts = train_df['label'].value_counts().sort_index().tolist()  # [#human, #ai]
sample_weights = [1.0 / counts[label] for label in train_df['label']]
sampler = WeightedRandomSampler(
    sample_weights, num_samples=len(sample_weights), replacement=True
)

# 10) DataLoader 생성 함수
def make_train_loader(df, use_kg):
    return DataLoader(
        PoetryDataset(df, tokenizer, use_kg=use_kg),
        batch_size=8,
        sampler=sampler,
        num_workers=4,
        pin_memory=True
    )

def make_test_loader(df, use_kg):
    return DataLoader(
        PoetryDataset(df, tokenizer, use_kg=use_kg),
        batch_size=8,
        shuffle=False,
        num_workers=4,
        pin_memory=True
    )

train_loader_no_kg = make_train_loader(train_df, use_kg=False)
test_loader_no_kg  = make_test_loader(test_df,  use_kg=False)
train_loader_kg    = make_train_loader(train_df, use_kg=True)
test_loader_kg     = make_test_loader(test_df,  use_kg=True)

# 11) 학습/평가 함수
def train_model(model, loader, epochs=5):
    class_weights = torch.tensor([counts[1], counts[0]], dtype=torch.float).to(device)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)

    optimizer = AdamW(model.parameters(), lr=3e-5)
    total_steps = len(loader) * epochs
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=int(0.1 * total_steps),
        num_training_steps=total_steps
    )

    model.train()
    for epoch in range(epochs):
        for batch in loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            logits = model(**{k: v for k, v in batch.items() if k != 'labels'}).logits
            loss = loss_fn(logits, batch['labels'])
            loss.backward()
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()
        print(f"Epoch {epoch+1}/{epochs} 완료")

def evaluate(model, loader, title=""):
    print(f"\n {title} 평가 결과")
    model.eval()
    preds, labs = [], []
    with torch.no_grad():
        for batch in loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            logits = model(**{k: v for k, v in batch.items() if k != 'labels'}).logits
            preds += torch.argmax(logits, dim=1).cpu().tolist()
            labs  += batch['labels'].cpu().tolist()
    print(classification_report(labs, preds, target_names=['human','ai'], zero_division=0))

# 12) 실험 1: KG 미사용
model = get_model()
print("\n[실험 1] KG 미사용 학습 시작")
train_model(model, train_loader_no_kg, epochs=5)
evaluate(model, test_loader_no_kg, title="KG 미사용")

# 13) 실험 2: KG 사용
model = get_model()
print("\n[실험 2] KG 사용 학습 시작")
train_model(model, train_loader_kg, epochs=5)
evaluate(model, test_loader_kg, title="KG 사용")


 최종 샘플 수: 945
 라벨 분포:
 label
1    756
0    189
Name: count, dtype: int64
 Using device: cuda


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'KoBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at monologg/kobert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



[실험 1] KG 미사용 학습 시작
Epoch 1/5 완료
Epoch 2/5 완료
Epoch 3/5 완료
Epoch 4/5 완료
Epoch 5/5 완료

 KG 미사용 평가 결과
              precision    recall  f1-score   support

       human       0.41      0.79      0.54        38
          ai       0.93      0.72      0.81       151

    accuracy                           0.73       189
   macro avg       0.67      0.75      0.67       189
weighted avg       0.83      0.73      0.76       189



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at monologg/kobert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



[실험 2] KG 사용 학습 시작


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch 1/5 완료
Epoch 2/5 완료


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch 3/5 완료


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch 4/5 완료
Epoch 5/5 완료

 KG 사용 평가 결과
              precision    recall  f1-score   support

       human       0.36      0.82      0.50        38
          ai       0.93      0.64      0.76       151

    accuracy                           0.67       189
   macro avg       0.65      0.73      0.63       189
weighted avg       0.82      0.67      0.70       189



In [24]:
import pandas as pd
import torch
import torch.nn as nn
from torch.optim import AdamW
from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    get_linear_schedule_with_warmup,
    logging as hf_logging
)
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from torch.utils.data import Dataset, DataLoader
import json

# 경고 메시지 최소화
hf_logging.set_verbosity_error()

# 1) JSONL 파일 로드
def load_jsonl(path):
    with open(path, "r", encoding="utf-8") as f:
        return [json.loads(line) for line in f]

data = load_jsonl("poetry.jsonl")
df = pd.DataFrame(data)

# 2) 라벨 인코딩
label_map = {"human": 0, "ai": 1}
if df["label"].dtype == object:
    df["label"] = df["label"].map(label_map)
df = df.dropna(subset=["label"])
df["label"] = df["label"].astype(int)

# 3) KG 트리플 로드 및 TF-IDF 준비
kg_df = pd.read_csv("kg_triples_test.csv")
kg_texts = kg_df.apply(
    lambda r: f"{r['subject']} {r['predicate']} {r['object']}",
    axis=1
).tolist()

tfidf = TfidfVectorizer(max_features=10000)
kg_tfidf = tfidf.fit_transform(kg_texts)

# 4) TF-IDF 기반 상위 3개 KG 검색
def find_relevant_kg(text, topk=3):
    vec = tfidf.transform([text])
    sims = cosine_similarity(vec, kg_tfidf)[0]
    idxs = sims.argsort()[-topk:][::-1]
    return " ".join(kg_texts[i] for i in idxs)

# 5) 토크나이저 로드
tokenizer = BertTokenizer.from_pretrained("monologg/kobert")

# 6) Dataset 정의 (항상 max_length로 패딩/트렁크)
class PoetryDataset(Dataset):
    def __init__(self, df, use_kg=False):
        self.df = df.reset_index(drop=True)
        self.use_kg = use_kg

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        text = self.df.loc[idx, "text"]
        kg   = find_relevant_kg(text) if self.use_kg else None

        # ➤ text, kg 둘 다 max_length=128로 패딩/자르기
        enc = tokenizer(
            text,
            kg,
            padding="max_length",
            truncation=True,
            max_length=128,
            return_tensors="pt"
        )
        item = {k: v.squeeze(0).clone() for k, v in enc.items()}
        item["labels"] = torch.tensor(self.df.loc[idx, "label"], dtype=torch.long).clone()
        return item

# 7) 학습/테스트 분할
train_df, test_df = train_test_split(
    df, test_size=0.2, stratify=df["label"], random_state=42
)

# 8) 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 9) 모델 로드 함수
def get_model():
    model = BertForSequenceClassification.from_pretrained(
        "monologg/kobert", num_labels=2
    )
    return model.to(device)

# 10) 학습 및 평가 함수
def train_model(model, loader, epochs=5):
    optimizer = AdamW(model.parameters(), lr=2e-5)
    total_steps = len(loader) * epochs
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=int(0.2 * total_steps),
        num_training_steps=total_steps
    )
    loss_fn = nn.CrossEntropyLoss()

    model.train()
    for epoch in range(epochs):
        for batch in loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            logits = model(**{k: v for k, v in batch.items() if k != "labels"}).logits
            loss = loss_fn(logits, batch["labels"])
            loss.backward()
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()
        print(f"Epoch {epoch+1} 완료")

def evaluate(model, loader):
    model.eval()
    preds, labs = [], []
    with torch.no_grad():
        for batch in loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            logits = model(**{k: v for k, v in batch.items() if k != "labels"}).logits
            preds += torch.argmax(logits, dim=1).cpu().tolist()
            labs  += batch["labels"].cpu().tolist()
    print(classification_report(labs, preds, target_names=["human","ai"], zero_division=0))

# 11) KG 미사용 vs KG 사용 실험
for use_kg in (False, True):
    model = get_model()
    train_loader = DataLoader(
        PoetryDataset(train_df, use_kg=use_kg),
        batch_size=16, shuffle=True, num_workers=4, pin_memory=True
    )
    test_loader = DataLoader(
        PoetryDataset(test_df, use_kg=use_kg),
        batch_size=16, shuffle=False, num_workers=4, pin_memory=True
    )
    mode = "KG 미사용" if not use_kg else "KG 사용"
    print(f"\n[{mode}] 학습 시작")
    train_model(model, train_loader, epochs=5)
    print(f"[{mode}] 평가 결과")
    evaluate(model, test_loader)



[KG 미사용] 학습 시작
Epoch 1 완료
Epoch 2 완료
Epoch 3 완료
Epoch 4 완료
Epoch 5 완료
[KG 미사용] 평가 결과
              precision    recall  f1-score   support

       human       0.75      0.08      0.14        38
          ai       0.81      0.99      0.89       151

    accuracy                           0.81       189
   macro avg       0.78      0.54      0.52       189
weighted avg       0.80      0.81      0.74       189


[KG 사용] 학습 시작
Epoch 1 완료
Epoch 2 완료
Epoch 3 완료
Epoch 4 완료
Epoch 5 완료
[KG 사용] 평가 결과
              precision    recall  f1-score   support

       human       0.50      0.11      0.17        38
          ai       0.81      0.97      0.89       151

    accuracy                           0.80       189
   macro avg       0.66      0.54      0.53       189
weighted avg       0.75      0.80      0.74       189



In [25]:
import pandas as pd
import torch
import torch.nn as nn
from torch.optim import AdamW
from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    get_linear_schedule_with_warmup,
    logging as hf_logging
)
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from torch.utils.data import Dataset, DataLoader
import json

# 경고 메시지 숨기기
hf_logging.set_verbosity_error()

# 1) JSONL 로드 함수
def load_jsonl(path):
    with open(path, "r", encoding="utf-8") as f:
        return [json.loads(line) for line in f]

# 2) 데이터 읽기 및 라벨 인코딩
data = load_jsonl("poetry.jsonl")
df = pd.DataFrame(data)
label_map = {"human": 0, "ai": 1}
if df["label"].dtype == object:
    df["label"] = df["label"].map(label_map)
df = df.dropna(subset=["label"])
df["label"] = df["label"].astype(int)

# 3) KG 트리플 로드 및 TF-IDF 인덱스 생성
kg_df = pd.read_csv("kg_triples_test.csv")
kg_texts = kg_df.apply(
    lambda r: f"{r['subject']} {r['predicate']} {r['object']}",
    axis=1
).tolist()
tfidf = TfidfVectorizer(max_features=10000)
kg_tfidf = tfidf.fit_transform(kg_texts)

# 4) TF-IDF + 코사인 유사도로 KG 검색
def find_relevant_kg(text, topk=3):
    query_vec = tfidf.transform([text])
    sims = cosine_similarity(query_vec, kg_tfidf)[0]
    idxs = sims.argsort()[-topk:][::-1]
    return " ".join(kg_texts[i] for i in idxs)

# 5) KoBERT 토크나이저 로드
tokenizer = BertTokenizer.from_pretrained("monologg/kobert")

# 6) PyTorch Dataset 정의
class PoetryDataset(Dataset):
    def __init__(self, df, use_kg=False):
        self.df = df.reset_index(drop=True)
        self.use_kg = use_kg

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        text = row["text"]
        kg = find_relevant_kg(text) if self.use_kg else None

        # 본문과 KG를 모두 max_length=128로 패딩/트렁케이트
        enc = tokenizer(
            text,
            kg,
            padding="max_length",
            truncation=True,
            max_length=128,
            return_tensors="pt"
        )
        item = {k: v.squeeze(0).clone() for k, v in enc.items()}
        item["labels"] = torch.tensor(row["label"], dtype=torch.long).clone()
        return item

# 7) 학습/테스트 분할
train_df, test_df = train_test_split(
    df, test_size=0.2, stratify=df["label"], random_state=42
)

# 8) 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 9) 모델 로드 함수
def get_model():
    model = BertForSequenceClassification.from_pretrained(
        "monologg/kobert",
        num_labels=2
    )
    return model.to(device)

# 10) 학습 함수
def train_model(model, loader, epochs=5):
    optimizer = AdamW(model.parameters(), lr=2e-5)
    total_steps = len(loader) * epochs
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=int(0.2 * total_steps),
        num_training_steps=total_steps
    )
    loss_fn = nn.CrossEntropyLoss()

    model.train()
    for epoch in range(epochs):
        for batch in loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            logits = model(**{k: v for k, v in batch.items() if k != "labels"}).logits
            loss = loss_fn(logits, batch["labels"])
            loss.backward()
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()
        print(f"Epoch {epoch+1} 완료")

# 11) 평가 함수 (전체 리포트 보여주기)
def evaluate(model, loader):
    model.eval()
    preds, labs = [], []
    with torch.no_grad():
        for batch in loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            logits = model(**{k: v for k, v in batch.items() if k != "labels"}).logits
            preds += torch.argmax(logits, dim=1).cpu().tolist()
            labs  += batch["labels"].cpu().tolist()

    # pandas 출력 옵션: 모든 열과 넓이를 보여 줌
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 1000)

    # classification_report를 dict로 받아 DataFrame으로 출력
    report_dict = classification_report(labs, preds, target_names=["human","ai"],
                                        output_dict=True, digits=4)
    df_report = pd.DataFrame(report_dict).T
    print(df_report)

# 12) KG 미사용 vs KG 사용 실험
for use_kg in (False, True):
    model = get_model()
    train_loader = DataLoader(
        PoetryDataset(train_df, use_kg=use_kg),
        batch_size=16, shuffle=True,
        num_workers=4, pin_memory=True
    )
    test_loader = DataLoader(
        PoetryDataset(test_df, use_kg=use_kg),
        batch_size=16, shuffle=False,
        num_workers=4, pin_memory=True
    )
    mode = "KG 미사용" if not use_kg else "KG 사용"
    print(f"\n[{mode}] 학습 시작")
    train_model(model, train_loader, epochs=5)
    print(f"[{mode}] 평가 결과")
    evaluate(model, test_loader)



[KG 미사용] 학습 시작
Epoch 1 완료
Epoch 2 완료
Epoch 3 완료
Epoch 4 완료
Epoch 5 완료
[KG 미사용] 평가 결과
              precision    recall  f1-score     support
human          0.538462  0.184211  0.274510   38.000000
ai             0.823864  0.960265  0.886850  151.000000
accuracy       0.804233  0.804233  0.804233    0.804233
macro avg      0.681163  0.572238  0.580680  189.000000
weighted avg   0.766481  0.804233  0.763734  189.000000

[KG 사용] 학습 시작
Epoch 1 완료
Epoch 2 완료
Epoch 3 완료
Epoch 4 완료
Epoch 5 완료
[KG 사용] 평가 결과
              precision    recall  f1-score    support
human          0.400000  0.105263  0.166667   38.00000
ai             0.810056  0.960265  0.878788  151.00000
accuracy       0.788360  0.788360  0.788360    0.78836
macro avg      0.605028  0.532764  0.522727  189.00000
weighted avg   0.727611  0.788360  0.735610  189.00000


In [26]:
import pandas as pd
import torch
import torch.nn as nn
from torch.optim import AdamW
from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    get_linear_schedule_with_warmup,
    logging as hf_logging
)
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import json

# 경고 메시지 최소화
hf_logging.set_verbosity_error()

# 1) JSONL 파일 로드
def load_jsonl(path):
    with open(path, "r", encoding="utf-8") as f:
        return [json.loads(line) for line in f]

data = load_jsonl("poetry.jsonl")
df = pd.DataFrame(data)

# 2) 라벨 인코딩
label_map = {"human": 0, "ai": 1}
if df["label"].dtype == object:
    df["label"] = df["label"].map(label_map)
df = df.dropna(subset=["label"])
df["label"] = df["label"].astype(int)

# 3) KG 트리플 로드 및 TF-IDF 인덱스 생성
kg_df = pd.read_csv("kg_triples_test.csv")
kg_texts = kg_df.apply(
    lambda r: f"{r['subject']} {r['predicate']} {r['object']}",
    axis=1
).tolist()

tfidf = TfidfVectorizer(max_features=10000)
kg_tfidf = tfidf.fit_transform(kg_texts)

# 4) TF-IDF + 코사인 유사도로 top3 KG 검색
def find_relevant_kg(text, topk=3):
    qv = tfidf.transform([text])
    sims = cosine_similarity(qv, kg_tfidf)[0]
    idxs = sims.argsort()[-topk:][::-1]
    return " ".join(kg_texts[i] for i in idxs)

# 5) KoBERT 토크나이저 로드
tokenizer = BertTokenizer.from_pretrained("monologg/kobert")

# 6) Dataset 정의 (padding+truncation 통일)
class PoetryDataset(Dataset):
    def __init__(self, df, use_kg=False):
        self.df = df.reset_index(drop=True)
        self.use_kg = use_kg

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        text = row["text"]
        kg   = find_relevant_kg(text) if self.use_kg else None

        enc = tokenizer(
            text,
            kg,
            padding="max_length",
            truncation=True,
            max_length=128,
            return_tensors="pt"
        )
        item = {k: v.squeeze(0).clone() for k, v in enc.items()}
        item["labels"] = torch.tensor(row["label"], dtype=torch.long).clone()
        return item

# 7) 학습/테스트 분할
train_df, test_df = train_test_split(
    df, test_size=0.2, stratify=df["label"], random_state=42
)

# 8) oversampling 위한 WeightedRandomSampler 생성
counts = train_df["label"].value_counts().sort_index().tolist()  # [human_count, ai_count]
# 각 샘플에 inverse frequency 가중치 부여
sample_weights = [1.0 / counts[label] for label in train_df["label"]]
sampler = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

# 9) 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 10) 모델 로드 함수
def get_model():
    model = BertForSequenceClassification.from_pretrained(
        "monologg/kobert", num_labels=2
    )
    return model.to(device)

# 11) 학습 함수 (클래스 가중치 + 스케줄러)
def train_model(model, loader, epochs=5):
    # 클래스 가중치는 [ai_count, human_count] 순이 되도록
    class_weights = torch.tensor([counts[1], counts[0]], dtype=torch.float).to(device)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)

    optimizer = AdamW(model.parameters(), lr=2e-5)
    total_steps = len(loader) * epochs
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=int(0.2 * total_steps),
        num_training_steps=total_steps
    )

    model.train()
    for epoch in range(1, epochs+1):
        for batch in loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            logits = model(**{k: v for k, v in batch.items() if k != "labels"}).logits
            loss = loss_fn(logits, batch["labels"])
            loss.backward()
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()
        print(f"Epoch {epoch} 완료")

# 12) 평가 함수 (전체 리포트 출력)
def evaluate(model, loader):
    model.eval()
    preds, labs = [], []
    with torch.no_grad():
        for batch in loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            logits = model(**{k: v for k, v in batch.items() if k != "labels"}).logits
            preds += torch.argmax(logits, dim=1).cpu().tolist()
            labs  += batch["labels"].cpu().tolist()

    pd.set_option("display.max_columns", None)
    pd.set_option("display.width", 1000)
    report = classification_report(
        labs, preds,
        target_names=["human","ai"],
        output_dict=True,
        digits=4
    )
    print(pd.DataFrame(report).T)

# 13) KG 미사용 vs KG 사용 실험
for use_kg in (False, True):
    model = get_model()
    train_loader = DataLoader(
        PoetryDataset(train_df, use_kg=use_kg),
        batch_size=16,
        sampler=sampler,
        num_workers=4,
        pin_memory=True
    )
    test_loader = DataLoader(
        PoetryDataset(test_df, use_kg=use_kg),
        batch_size=16,
        shuffle=False,
        num_workers=4,
        pin_memory=True
    )

    mode = "KG 미사용" if not use_kg else "KG 사용"
    print(f"\n[{mode}] 학습 시작")
    train_model(model, train_loader, epochs=5)
    print(f"[{mode}] 평가 결과")
    evaluate(model, test_loader)



[KG 미사용] 학습 시작
Epoch 1 완료
Epoch 2 완료
Epoch 3 완료
Epoch 4 완료
Epoch 5 완료
[KG 미사용] 평가 결과
              precision    recall  f1-score     support
human          0.336634  0.894737  0.489209   38.000000
ai             0.954545  0.556291  0.702929  151.000000
accuracy       0.624339  0.624339  0.624339    0.624339
macro avg      0.645590  0.725514  0.596069  189.000000
weighted avg   0.830309  0.624339  0.659959  189.000000

[KG 사용] 학습 시작
Epoch 1 완료
Epoch 2 완료
Epoch 3 완료
Epoch 4 완료
Epoch 5 완료
[KG 사용] 평가 결과
              precision    recall  f1-score     support
human          0.282443  0.973684  0.437870   38.000000
ai             0.982759  0.377483  0.545455  151.000000
accuracy       0.497354  0.497354  0.497354    0.497354
macro avg      0.632601  0.675584  0.491662  189.000000
weighted avg   0.841954  0.497354  0.523824  189.000000


In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.optim import AdamW
from transformers import (
    BertTokenizer,
    BertModel,
    get_linear_schedule_with_warmup,
    logging as hf_logging
)
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import classification_report
from torch.utils.data import Dataset, DataLoader
import json

# 경고 메시지 최소화
hf_logging.set_verbosity_error()

# 1) 데이터 로드 및 전처리
def load_jsonl(path):
    with open(path, "r", encoding="utf-8") as f:
        return [json.loads(line) for line in f]

data = load_jsonl("poetry.jsonl")
df = pd.DataFrame(data)

label_map = {"human": 0, "ai": 1}
if df["label"].dtype == object:
    df["label"] = df["label"].map(label_map)
df = df.dropna(subset=["label"])
df["label"] = df["label"].astype(int)

# 2) KG 트리플 로드 및 TF-IDF 준비
kg_df = pd.read_csv("kg_triples_test.csv")
kg_texts = kg_df.apply(
    lambda r: f"{r['subject']} {r['predicate']} {r['object']}",
    axis=1
).tolist()

tfidf = TfidfVectorizer(max_features=5000)
tfidf.fit(kg_texts)

# 3) KG 검색 함수 ( 코사인 유사도 )
def find_relevant_kg(text, topk=3):
    qv = tfidf.transform([text])
    sims = cosine_similarity(qv, tfidf.transform(kg_texts))[0]
    idxs = sims.argsort()[-topk:][::-1]
    return " ".join(kg_texts[i] for i in idxs)

# 4) Dataset 정의 (late-fusion)
tokenizer = BertTokenizer.from_pretrained("monologg/kobert")
tfidf_dim = len(tfidf.vocabulary_)

class LateFusionDataset(Dataset): # 모델 KoBERT
    def __init__(self, df, use_kg=False):
        self.df = df.reset_index(drop=True)
        self.use_kg = use_kg

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        text = row["text"]
        kg_text = find_relevant_kg(text) if self.use_kg else ""
        enc = tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=128,
            return_tensors="pt"
        )
        input_ids = enc["input_ids"].squeeze(0)
        attention_mask = enc["attention_mask"].squeeze(0)
        kg_vec = torch.tensor(tfidf.transform([kg_text]).toarray()[0], dtype=torch.float)
        label = torch.tensor(row["label"], dtype=torch.long)
        return input_ids, attention_mask, kg_vec, label

# 5) Late-fusion 모델 정의
class LateFusionModel(nn.Module):
    def __init__(self, tfidf_dim):
        super().__init__()
        self.bert = BertModel.from_pretrained("monologg/kobert")
        hidden_size = self.bert.config.hidden_size
        self.kg_mlp = nn.Sequential(
            nn.Linear(tfidf_dim, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size)
        )
        self.classifier = nn.Sequential(
            nn.Linear(hidden_size * 2, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, 2)
        )

    def forward(self, input_ids, attention_mask, kg_vec):
        out = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        text_pool = out.pooler_output                 # (batch, hidden)
        kg_emb = self.kg_mlp(kg_vec)                  # (batch, hidden)
        fusion = torch.cat([text_pool, kg_emb], dim=1)  # (batch, hidden*2)
        logits = self.classifier(fusion)              # (batch, 2)
        return logits

# 6) 학습·평가 함수
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train(model, loader, epochs=5):
    model.to(device)
    optimizer = AdamW(model.parameters(), lr=2e-5)
    total_steps = len(loader) * epochs
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=int(0.1 * total_steps), num_training_steps=total_steps
    )
    loss_fn = nn.CrossEntropyLoss()
    model.train()
    for ep in range(1, epochs+1):
        for input_ids, attention_mask, kg_vec, labels in loader:
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            kg_vec = kg_vec.to(device)
            labels = labels.to(device)
            logits = model(input_ids, attention_mask, kg_vec)
            loss = loss_fn(logits, labels)
            loss.backward()
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()
        print(f"Epoch {ep} 완료")

def evaluate(model, loader):
    model.to(device)
    model.eval()
    preds, labs = [], []
    with torch.no_grad():
        for input_ids, attention_mask, kg_vec, labels in loader:
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            kg_vec = kg_vec.to(device)
            logits = model(input_ids, attention_mask, kg_vec)
            preds += torch.argmax(logits, dim=1).cpu().tolist()
            labs  += labels.tolist()
    print(classification_report(labs, preds, target_names=["human","ai"], digits=4))

# 7) 데이터로더 및 실험 실행
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df["label"], random_state=42)

for use_kg in (False, True):
    mode = "KG 없이" if not use_kg else "KG 포함"
    print(f"\n[{mode}] 실험 시작")

    train_loader = DataLoader(
        LateFusionDataset(train_df, use_kg=use_kg),
        batch_size=16, shuffle=True, num_workers=4, pin_memory=True
    )
    test_loader = DataLoader(
        LateFusionDataset(test_df, use_kg=use_kg),
        batch_size=16, shuffle=False, num_workers=4, pin_memory=True
    )

    model = LateFusionModel(tfidf_dim)
    train(model, train_loader, epochs=5)
    print(f"[{mode}] 평가 결과")
    evaluate(model, test_loader)



[KG 없이] 실험 시작
Epoch 1 완료
Epoch 2 완료
Epoch 3 완료
Epoch 4 완료
Epoch 5 완료
[KG 없이] 평가 결과


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

       human     0.0000    0.0000    0.0000        38
          ai     0.7989    1.0000    0.8882       151

    accuracy                         0.7989       189
   macro avg     0.3995    0.5000    0.4441       189
weighted avg     0.6383    0.7989    0.7096       189


[KG 포함] 실험 시작
Epoch 1 완료
Epoch 2 완료
Epoch 3 완료
Epoch 4 완료
Epoch 5 완료
[KG 포함] 평가 결과
              precision    recall  f1-score   support

       human     0.5000    0.0263    0.0500        38
          ai     0.8021    0.9934    0.8876       151

    accuracy                         0.7989       189
   macro avg     0.6511    0.5098    0.4688       189
weighted avg     0.7414    0.7989    0.7192       189

