In [None]:
# 라이브러리 설치
!pip install transformers datasets torch
!pip install evaluate

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

In [None]:
# Task 1

# 라이브러리 불러오기
from datasets import load_dataset
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer
import random

# 데이터셋 로드 및 샘플 문장 선택
dataset = load_dataset("KaungHtetCho/Harry_Potter_LSTM")
sentences = dataset['train']['text']
random_sentence = random.choice(sentences)
print(f"Selected Sentence: {random_sentence}")

# 토크나이저 초기화
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# 데이터셋 정의
class TextDataset(Dataset):
    def __init__(self, sentences, tokenizer, max_length=50):
        self.sentences = sentences
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        text = self.sentences[idx]
        tokens = self.tokenizer(text, truncation=True, padding="max_length", max_length=self.max_length, return_tensors="pt")
        input_ids = tokens['input_ids'].squeeze(0)
        attention_mask = tokens['attention_mask'].squeeze(0)
        return input_ids, attention_mask

# 데이터셋 및 데이터로더 생성
max_length = 50
dataset = TextDataset([random_sentence], tokenizer, max_length)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

# 모델 정의
class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, input_ids, attention_mask):
        embeddings = self.embedding(input_ids)
        lstm_out, _ = self.lstm(embeddings)
        output = self.fc(lstm_out[:, -1, :])  # 마지막 time step 사용
        return output

# 하이퍼파라미터 정의
vocab_size = tokenizer.vocab_size
embedding_dim = 128
hidden_dim = 256
output_dim = 2  # 임의의 분류 문제 예제
learning_rate = 0.001

# 모델, 손실 함수, 옵티마이저 초기화
model = LSTMModel(vocab_size, embedding_dim, hidden_dim, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 학습 루프
num_epochs = 5
for epoch in range(num_epochs):
    for input_ids, attention_mask in dataloader:
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask)
        labels = torch.zeros(outputs.size(0), dtype=torch.long)  # 임의의 타겟 (예제용)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}")

print("Training complete!")


Selected Sentence: 


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Epoch 1/5, Loss: 0.5927
Epoch 2/5, Loss: 0.2626
Epoch 3/5, Loss: 0.1047
Epoch 4/5, Loss: 0.0396
Epoch 5/5, Loss: 0.0158
Training complete!


In [12]:
# Task 2

from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import evaluate
import os
import random
from datasets import load_dataset

# Hugging Face 인증 토큰 설정
os.environ["HF_TOKEN"] = "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"

# 비교할 Hugging Face 모델 리스트
models = [
    "monologg/kobert",        # KoBERT
    "klue/roberta-base",      # KLUE-RoBERTa
    "beomi/kcbert-base"       # KC-BERT
]

# 평가 함수 정의
def evaluate_models(models, dataset, num_samples=100, num_iterations=3):
    # 평가 지표 로드
    accuracy = evaluate.load("accuracy")
    final_results = {model_name: [] for model_name in models}

    for iteration in range(num_iterations):
        print(f"\n=== Evaluation Iteration {iteration + 1} ===")

        # Random한 100개의 샘플 선택
        samples = dataset.shuffle(seed=iteration).select(range(num_samples))
        test_data = [{"text": item["text"], "label": item["label"]} for item in samples]

        for model_name in models:
            print(f"Evaluating model: {model_name}")

            # 토크나이저 및 모델 로드
            tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=os.getenv("HF_TOKEN"))
            model = AutoModelForSequenceClassification.from_pretrained(model_name, use_auth_token=os.getenv("HF_TOKEN"), num_labels=2)
            classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)

            # 예측 수행 및 정확도 계산
            predictions, references = [], []
            for item in test_data:
                pred = classifier(item["text"])[0]["label"]
                pred_label = 1 if "1" in pred else 0  # 모델 출력값을 숫자로 변환
                predictions.append(pred_label)
                references.append(item["label"])

            # 정확도 계산
            accuracy_score = accuracy.compute(predictions=predictions, references=references)["accuracy"]
            print(f"Accuracy for {model_name} (Iteration {iteration + 1}): {accuracy_score:.2f}")

            # 결과 저장
            final_results[model_name].append(accuracy_score)

    return final_results

# 메인 실행
if __name__ == "__main__":
    # 데이터셋 로드 (Korean Sentiment Dataset)
    dataset = load_dataset("sepidmnorozy/Korean_sentiment")["test"]

    # 모델 평가 수행
    results = evaluate_models(models, dataset)

    # 평균 정확도 출력
    print("\n=== Final Results (Average Accuracy) ===")
    sorted_results = []
    for model_name, scores in results.items():
        avg_accuracy = sum(scores) / len(scores)
        sorted_results.append((model_name, avg_accuracy))
        print(f"Model: {model_name}, Average Accuracy: {avg_accuracy:.2f}")

    # 모델 순위 출력
    sorted_results.sort(key=lambda x: x[1], reverse=True)
    print("\n=== Model Ranking ===")
    for rank, (model_name, avg_accuracy) in enumerate(sorted_results, 1):
        print(f"{rank}. {model_name} - Average Accuracy: {avg_accuracy:.2f}")

    # 최우수 모델 출력
    best_model, best_accuracy = sorted_results[0]
    print(f"\nThe best model is '{best_model}' with an average accuracy of {best_accuracy:.2f}.")



=== Evaluation Iteration 1 ===
Evaluating model: monologg/kobert
The repository for monologg/kobert contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co/monologg/kobert.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] ㅛ
The repository for monologg/kobert contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co/monologg/kobert.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at monologg/kobert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


Accuracy for monologg/kobert (Iteration 1): 0.41
Evaluating model: klue/roberta-base


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


KeyboardInterrupt: 