In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import joblib, torch

dataset_name = "templete"
model_dir = f"/content/drive/MyDrive/졸업논문/models/{dataset_name}_model"

# 모델/토크나이저 로드
model = AutoModelForSequenceClassification.from_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir)  # 이제 오류 없음

# LabelEncoder 로드
le = joblib.load(f"{model_dir}/label_encoder_{dataset_name}.pkl")

# 새 문장 예측
new_text = "This is fantastic!"
inputs = tokenizer(new_text, return_tensors="pt", padding=True, truncation=True)

with torch.no_grad():
    logits = model(**inputs).logits

pred_id = torch.argmax(logits, dim=-1).item()
pred_label = le.inverse_transform([pred_id])[0]

print(f"예측 결과: {pred_label}")

예측 결과: Not Dark Pattern


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import joblib, torch

dataset_name = "contextual"
model_dir = f"/content/drive/MyDrive/졸업논문/models/{dataset_name}_model"

# 모델/토크나이저 로드
model = AutoModelForSequenceClassification.from_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir)  # 이제 오류 없음

# LabelEncoder 로드
le = joblib.load(f"{model_dir}/label_encoder_{dataset_name}.pkl")

# 새 문장 예측
new_text = "This is fantastic!"
inputs = tokenizer(new_text, return_tensors="pt", padding=True, truncation=True)

with torch.no_grad():
    logits = model(**inputs).logits

pred_id = torch.argmax(logits, dim=-1).item()
pred_label = le.inverse_transform([pred_id])[0]

print(f"예측 결과: {pred_label}")

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import joblib, torch

dataset_name = "paraphrase"
model_dir = f"/content/drive/MyDrive/졸업논문/models/{dataset_name}_model"

# 모델/토크나이저 로드
model = AutoModelForSequenceClassification.from_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir)

# LabelEncoder 로드
le = joblib.load(f"{model_dir}/label_encoder_{dataset_name}.pkl")

# 새 문장 예측
new_text = "This is fantastic!"
inputs = tokenizer(new_text, return_tensors="pt", padding=True, truncation=True)

with torch.no_grad():
    logits = model(**inputs).logits

pred_id = torch.argmax(logits, dim=-1).item()
pred_label = le.inverse_transform([pred_id])[0]

print(f"예측 결과: {pred_label}")

# DarkPatternGuiltyFeeds 의 데이터셋에서 테스트

In [12]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import joblib, torch
from datasets import load_dataset

# ----------------------
# 1. 모델 / 토크나이저 로드
# ----------------------
dataset_name = "paraphrase"
model_dir = f"/content/drive/MyDrive/졸업논문/models/{dataset_name}_model"

model = AutoModelForSequenceClassification.from_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir)
le = joblib.load(f"{model_dir}/label_encoder_{dataset_name}.pkl")

# ----------------------
# 2. 새로운 데이터셋 불러오기
# ----------------------
ds = load_dataset("Anshusinghh/DarkPatternGuiltyFeeds")
sentences = ds["train"]["No, I want to lose this offer"]   # 문장 전체
labels = ds["train"]["0"]                                  # 라벨 전체

# ----------------------
# 3. 예측 함수 정의 (배치 처리)
# ----------------------
def predict(texts, batch_size=32):
    preds = []
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True)
        with torch.no_grad():
            logits = model(**inputs).logits
        pred_ids = torch.argmax(logits, dim=-1).cpu().numpy()
        batch_labels = le.inverse_transform(pred_ids)
        preds.extend(batch_labels)
    return preds

# ----------------------
# 4. 전체 데이터셋 예측 실행
# ----------------------
preds = predict(sentences)

# ----------------------
# 5. 결과 DataFrame 생성 및 저장
# ----------------------
df = pd.DataFrame({
    "sentence": sentences,
    "true_label": labels,
    "pred_label": preds
})

output_path = "/content/drive/MyDrive/졸업논문/Test_Result/DarkPatternGuiltyFeeds_predictions.csv"
df.to_csv(output_path, index=False, encoding="utf-8-sig")

print(f"✅ 전체 예측 결과 저장 완료: {output_path}")
print(df.head(10))

✅ 전체 예측 결과 저장 완료: /content/drive/MyDrive/졸업논문/Test_Result/DarkPatternGuiltyFeeds_predictions.csv
                                           sentence  true_label  \
0  Nope, I'd prefer not to take up this opportunity           0   
1                  I'd rather not accept this offer           0   
2   No, thanks; I'm not interested in this proposal           0   
3              I'm not keen on accepting this offer           0   
4     I'm inclined to decline this particular offer           0   
5                No, I'd like to pass on this offer           0   
6    I'm not looking to accept this offer right now           0   
7        I'd rather not go for this particular deal           0   
8        I'm not interested in taking up this offer           0   
9    I'm leaning towards rejecting this opportunity           0   

        pred_label  
0   Confirmshaming  
1   Confirmshaming  
2   Confirmshaming  
3  Trick Questions  
4  Trick Questions  
5   Confirmshaming  
6  Trick Q

In [7]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import joblib, torch
from datasets import load_dataset

# ----------------------
# 1. 모델 / 토크나이저 로드
# ----------------------
dataset_name = "paraphrase"
model_dir = f"/content/drive/MyDrive/졸업논문/models/{dataset_name}_model"

model = AutoModelForSequenceClassification.from_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir)
le = joblib.load(f"{model_dir}/label_encoder_{dataset_name}.pkl")

# ----------------------
# 2. 새로운 데이터셋 불러오기
# ----------------------

ds = load_dataset("Anshusinghh/DarkPatternGuiltyFeeds")

# validation split 사용 (test용)
new_sentences = ds["validation"]["sentence"][:10]   # 샘플 10개만 확인

# ----------------------
# 3. 예측 함수 정의
# ----------------------
def predict(texts):
    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        logits = model(**inputs).logits
    pred_ids = torch.argmax(logits, dim=-1).cpu().numpy()
    return le.inverse_transform(pred_ids)

# ----------------------
# 4. 테스트 실행
# ----------------------
preds = predict(new_sentences)

for sent, pred in zip(new_sentences, preds):
    print(f"[문장] {sent}")
    print(f"[예측 결과] {pred}")
    print("-" * 50)

trainData.csv: 0.00B [00:00, ?B/s]

Generating train split:   0%|          | 0/423 [00:00<?, ? examples/s]

KeyError: 'validation'

In [6]:
preds

array(['Low-stock Messages', 'Not Dark Pattern', 'High-demand Messages',
       'Activity Notifications', 'Limited-time Messages',
       'Not Dark Pattern', 'Not Dark Pattern', 'Countdown Timers',
       'Activity Notifications', 'Not Dark Pattern'], dtype=object)