In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import tensorflow_datasets as tfds

# 모델과 토크나이저 설정
torch.random.manual_seed(0)
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3.5-mini-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/3.45k [00:00<?, ?B/s]

configuration_phi3.py:   0%|          | 0.00/11.2k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3.5-mini-instruct:
- configuration_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_phi3.py:   0%|          | 0.00/73.8k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3.5-mini-instruct:
- modeling_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/16.3k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/195 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.98k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [2]:
# 파이프라인 설정
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

In [3]:
# MNLI 데이터셋 로드
def load_mnli_dataset():
    mnli_dataset, _ = tfds.load('glue/mnli', with_info=True)
    return mnli_dataset['train'], mnli_dataset['validation_matched']

In [4]:
# MNLI 예제 처리 함수
def process_mnli_example(example):
    premise = example['premise'].numpy().decode('utf-8')
    hypothesis = example['hypothesis'].numpy().decode('utf-8')
    label = example['label'].numpy()
    label_map = {0: "entailment", 1: "neutral", 2: "contradiction"}
    return premise, hypothesis, label_map[label]

In [5]:
# MNLI 태스크 수행 함수
def perform_nli(premise, hypothesis):
    messages = [
        {"role": "system", "content": "You are a helpful AI assistant skilled in natural language inference."},
        {"role": "user", "content": f"Determine the relationship between the following premise and hypothesis. The relationship can be either entailment, neutral, or contradiction.\n\nPremise: {premise}\nHypothesis: {hypothesis}\nRelationship:"}
    ]

    generation_args = {
        "max_new_tokens": 10,
        "return_full_text": False,
        "temperature": 0.0,
        "do_sample": False,
    }

    output = pipe(messages, **generation_args)
    response = output[0]['generated_text'].strip().lower()

    if "entailment" in response:
        return "entailment"
    elif "neutral" in response:
        return "neutral"
    elif "contradiction" in response:
        return "contradiction"
    else:
        return "neutral"  # 기본값으로 neutral 반환

In [6]:
# 평가 함수
def evaluate_mnli(dataset, num_samples=100):
    correct = 0
    total = 0
    for example in dataset.take(num_samples):
        premise, hypothesis, true_label = process_mnli_example(example)
        predicted_label = perform_nli(premise, hypothesis)
        if predicted_label == true_label:
            correct += 1
        total += 1
        if total % 10 == 0:
            print(f"Processed {total} examples...")

    accuracy = correct / total
    print(f"Accuracy: {accuracy:.2f}")
    return accuracy

In [7]:
# 메인 실행 부분
def main():
    print("Loading MNLI dataset...")
    _, val_dataset = load_mnli_dataset()

    print("Evaluating on validation set...")
    val_accuracy = evaluate_mnli(val_dataset, num_samples=100)

if __name__ == "__main__":
    main()

Loading MNLI dataset...
Downloading and preparing dataset 298.29 MiB (download: 298.29 MiB, generated: 100.56 MiB, total: 398.85 MiB) to /root/tensorflow_datasets/glue/mnli/2.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/5 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/392702 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/glue/mnli/incomplete.S6VOJE_2.0.0/glue-train.tfrecord*...:   0%|          …

Generating validation_matched examples...:   0%|          | 0/9815 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/glue/mnli/incomplete.S6VOJE_2.0.0/glue-validation_matched.tfrecord*...:   …

Generating validation_mismatched examples...:   0%|          | 0/9832 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/glue/mnli/incomplete.S6VOJE_2.0.0/glue-validation_mismatched.tfrecord*...:…

Generating test_matched examples...:   0%|          | 0/9796 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/glue/mnli/incomplete.S6VOJE_2.0.0/glue-test_matched.tfrecord*...:   0%|   …

Generating test_mismatched examples...:   0%|          | 0/9847 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/glue/mnli/incomplete.S6VOJE_2.0.0/glue-test_mismatched.tfrecord*...:   0%|…

Dataset glue downloaded and prepared to /root/tensorflow_datasets/glue/mnli/2.0.0. Subsequent calls will reuse this data.
Evaluating on validation set...


The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Processed 10 examples...
Processed 20 examples...
Processed 30 examples...
Processed 40 examples...
Processed 50 examples...
Processed 60 examples...
Processed 70 examples...
Processed 80 examples...
Processed 90 examples...
Processed 100 examples...
Accuracy: 0.35
