In [None]:
!pip install transformers



In [1]:
from google.colab import files
uploaded = files.upload()

Saving dev_matched_sampled-1.jsonl to dev_matched_sampled-1.jsonl
Saving dev_mismatched_sampled-1.jsonl to dev_mismatched_sampled-1.jsonl


In [2]:
from transformers import pipeline
import json

# Load model
def load_model(model_name="facebook/bart-large-mnli"):
    return pipeline("text-classification", model=model_name)

# Load jsonl data
def load_nli_data(filepath):
    data = []
    with open(filepath, "r") as f:
        for line in f:
            d = json.loads(line)
            data.append((d["sentence1"], d["sentence2"], d["gold_label"]))
    return data

# Evaluate accuracy
def evaluate_nli(model, data):
    correct = 0
    total = 0
    for sentence1, sentence2, gold_label in data:
        prompt = f"{sentence1} \n\nQuestion: Does this imply that \"{sentence2}\"?\nAnswer:"
        result = model(prompt)[0]
        pred_label = result["label"].lower()
        if pred_label == gold_label.lower():
            correct += 1
        total += 1
    return correct / total

In [3]:
# 加载模型
model = load_model("facebook/bart-large-mnli")

# 加载 matched 数据集
matched_data = load_nli_data("dev_matched_sampled-1.jsonl")
matched_acc = evaluate_nli(model, matched_data)
print(f"[Matched] Accuracy: {matched_acc:.2%}")

# 加载 mismatched 数据集
mismatched_data = load_nli_data("dev_mismatched_sampled-1.jsonl")
mismatched_acc = evaluate_nli(model, mismatched_data)
print(f"[Mismatched] Accuracy: {mismatched_acc:.2%}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cuda:0
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


[Matched] Accuracy: 72.56%
[Mismatched] Accuracy: 74.84%


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
matched_data = load_nli_data("dev_matched_sampled-1.jsonl")
matched_preds = []

for sentence1, sentence2, _ in matched_data:
    prompt = f"{sentence1} \n\nQuestion: Does this imply that \"{sentence2}\"?\nAnswer:"
    result = model(prompt)[0]
    matched_preds.append(result["label"].lower())

# 保存到 CSV
save_predictions(matched_data, matched_preds, filename="matched_results.csv")

# 下载
from google.colab import files
files.download("matched_results.csv")

Saved to matched_results.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
matched_data = load_nli_data("dev_mismatched_sampled-1.jsonl")
matched_preds = []

for sentence1, sentence2, _ in matched_data:
    prompt = f"{sentence1} \n\nQuestion: Does this imply that \"{sentence2}\"?\nAnswer:"
    result = model(prompt)[0]
    matched_preds.append(result["label"].lower())

# 保存到 CSV
save_predictions(matched_data, matched_preds, filename="mismatched_results.csv")

# 下载
from google.colab import files
files.download("mismatched_results.csv")

Saved to mismatched_results.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [6]:
from transformers import pipeline
from tqdm import tqdm
import json, torch

device = 0 if torch.cuda.is_available() else -1   # -1 表示 CPU
classifier = pipeline(
    task="text-classification",
    model="roberta-large-mnli",
    device=device,
    batch_size=32            # << 批量推理的关键！
)

def load_nli(file):
    data = []
    with open(file) as f:
        for j in f:
            d = json.loads(j)
            data.append((d["sentence1"], d["sentence2"], d["gold_label"]))
    return data

def evaluate(samples):
    """samples: list[(premise,hypothesis,label)] -> acc, predictions"""
    prompts = [f"{p}\n\nQuestion: Does this imply that \"{h}\"?\nAnswer:"
               for p,h,_ in samples]
    outputs = classifier(prompts)
    preds = [o["label"].lower() for o in outputs]
    acc   = sum(p == g for p, (_,_,g) in zip(preds, samples)) / len(samples)
    return acc, preds

# ======= 运行 =======
matched = load_nli("dev_matched_sampled-1.jsonl")
acc_m, preds_m = evaluate(matched)
print(f"[RoBERTa] Matched ACC: {acc_m:.2%}")

Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0


[RoBERTa] Matched ACC: 76.60%


In [8]:
mismatched = load_nli("dev_mismatched_sampled-1.jsonl")
acc_mis, preds_mis = evaluate(mismatched)
print(f"[RoBERTa] Mismatched Accuracy: {acc_mis:.2%}")

[RoBERTa] Mismatched Accuracy: 75.00%


In [10]:
import pandas as pd

def save_predictions(data, predictions, filename="results.csv"):
    df = pd.DataFrame({
        "premise": [p for p, _, _ in data],
        "hypothesis": [h for _, h, _ in data],
        "gold_label": [g for _, _, g in data],
        "predicted_label": predictions
    })
    df.to_csv(filename, index=False)
    print(f"Saved to {filename}")

save_predictions(matched, preds_m, filename="roberta_matched_results.csv")
files.download("roberta_matched_results.csv")

Saved to roberta_matched_results.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [11]:
save_predictions(mismatched_data, preds_mis, filename="roberta_mismatched_results.csv")
files.download("roberta_mismatched_results.csv")

Saved to roberta_mismatched_results.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>