In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2-1.5B-Instruct",
    torch_dtype="auto",
    device_map="cuda"
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")

  from .autonotebook import tqdm as notebook_tqdm
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [2]:
device = "cuda" # the device to load the model onto
def generate_message(premise, hypothesis):

    prompt = f"Premise: {premise}\nHypothesis: {hypothesis}"

    messages = [
        {"role": "system", "content": "You are a classifier. Your goal is to classify two sentences as entailment 0, neutral 1, or contradiction 2. You only answer with the labels 0, 1, or 2."},
        {"role": "user", "content": prompt}
    ]

    return messages

def predict(premise, hypothesis):
    messages = generate_message(premise, hypothesis)

    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(device)

    generated_ids = model.generate(
        model_inputs.input_ids,
        max_new_tokens=512
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    if response == "entailment":
        return 0

    if response == "neutral":
        return 1

    if response == "contradiction":
        return 2

    try:
        response = int(response)
    except ValueError:
        response = 3

    return response

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split

raw_data      = pd.read_csv('../data/sentence-relations/train.csv', index_col='id')
raw_submissions = pd.read_csv('../data/sentence-relations/test.csv', index_col='id')

# Only take english
raw_data = raw_data[raw_data['lang_abv'] == 'en']

training_data, test_data = train_test_split(raw_data, test_size=0.2, random_state=42)

In [4]:
from transformers import TextDataset, DataCollatorForLanguageModeling

def dataset_to_file(df, file_path):
    with open(file_path, "w") as file:
        for index, example in df.iterrows():
            file.write(example["premise"] + "\n" + example["hypothesis"] + "\n" + str(example["label"]) + "\n\n")


dataset_to_file(training_data, "train_dataset.txt")

train_dataset = TextDataset(
    tokenizer=tokenizer,
    file_path="train_dataset.txt",
    block_size=128
)



In [5]:
# Retrain the model
# from transformers import TrainingArguments, Trainer

# training_args = TrainingArguments(
#     output_dir="./models/sentence-relations",
#     num_train_epochs=2,
#     per_device_train_batch_size=2,
#     per_device_eval_batch_size=2,
#     warmup_steps=100,
#     weight_decay=0.01,
#     logging_dir="./logs",
#     learning_rate=2e-5,
# )

# data_collator = DataCollatorForLanguageModeling(
#     tokenizer=tokenizer,
#     mlm=False,
# )

# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=train_dataset,
#     eval_dataset=test_data,
#     data_collator=data_collator,
# )

# trainer.train()

In [6]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load local model
# model = AutoModelForCausalLM.from_pretrained("./best-checkpoints/quwen-checkpoint-1500", device_map="cuda")
# tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")

predict("I am a student", "I am a teacher")

2

In [7]:
correct = 0
total = len(test_data)

index = 0

for idx, row in test_data.iterrows():
    premise = row['premise']
    hypothesis = row['hypothesis']
    label = row['label']

    prediction = predict(premise, hypothesis)

    if prediction == label:
        correct += 1

    index += 1

    if index % 10 == 0:
        print(f"Accuracy: {correct / index} | {index} / {total}")

print(f"Accuracy: {correct / total}")


Accuracy: 0.2 | 10 / 1374
Accuracy: 0.35 | 20 / 1374
Accuracy: 0.4 | 30 / 1374
Accuracy: 0.425 | 40 / 1374
Accuracy: 0.46 | 50 / 1374
Accuracy: 0.4 | 60 / 1374
Accuracy: 0.45714285714285713 | 70 / 1374
Accuracy: 0.4375 | 80 / 1374
Accuracy: 0.43333333333333335 | 90 / 1374
Accuracy: 0.45 | 100 / 1374
Accuracy: 0.4636363636363636 | 110 / 1374
Accuracy: 0.4666666666666667 | 120 / 1374
Accuracy: 0.46923076923076923 | 130 / 1374
Accuracy: 0.4857142857142857 | 140 / 1374
Accuracy: 0.48 | 150 / 1374
Accuracy: 0.49375 | 160 / 1374
Accuracy: 0.4823529411764706 | 170 / 1374
Accuracy: 0.4777777777777778 | 180 / 1374
Accuracy: 0.5 | 190 / 1374
Accuracy: 0.5 | 200 / 1374
Accuracy: 0.5 | 210 / 1374
Accuracy: 0.4909090909090909 | 220 / 1374
Accuracy: 0.4826086956521739 | 230 / 1374
Accuracy: 0.48333333333333334 | 240 / 1374
Accuracy: 0.484 | 250 / 1374
Accuracy: 0.48846153846153845 | 260 / 1374
Accuracy: 0.5 | 270 / 1374
Accuracy: 0.5035714285714286 | 280 / 1374
Accuracy: 0.49310344827586206 | 290 / 

KeyboardInterrupt: 