In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2-0.5B-Instruct",
    torch_dtype="auto",
    device_map="cuda"
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")

In [8]:
device = "cuda" # the device to load the model onto
def generate_message(premise, hypothesis):

    prompt = f"Premise: {premise}\nHypothesis: {hypothesis}"

    messages = [
        {"role": "system", "content": "You are a classifier. Your goal is to classify two sentences as entailment 0, neutral 1, or contradiction 2. You only answer with the labels 0, 1, or 2."},
        {"role": "user", "content": prompt}
    ]

    return messages

def predict(premise, hypothesis):
    messages = generate_message(premise, hypothesis)

    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(device)

    generated_ids = model.generate(
        model_inputs.input_ids,
        max_new_tokens=512
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    if response == "entailment":
        return 0

    if response == "neutral":
        return 1

    if response == "contradiction":
        return 2

    try:
        response = int(response)
    except ValueError:
        response = 3

    return response

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split

raw_data      = pd.read_csv('../data/sentence-relations/train.csv', index_col='id')
raw_submissions = pd.read_csv('../data/sentence-relations/test.csv', index_col='id')

# Only take english
raw_data = raw_data[raw_data['lang_abv'] == 'en']

training_data, test_data = train_test_split(raw_data, test_size=0.2, random_state=42)

In [3]:
from transformers import TextDataset, DataCollatorForLanguageModeling

def dataset_to_file(df, file_path):
    with open(file_path, "w") as file:
        for index, example in df.iterrows():
            file.write(example["premise"] + "\n" + example["hypothesis"] + "\n" + str(example["label"]) + "\n\n")


dataset_to_file(training_data, "train_dataset.txt")

train_dataset = TextDataset(
    tokenizer=tokenizer,
    file_path="train_dataset.txt",
    block_size=128
)



In [4]:
# Retrain the model
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./models/sentence-relations",
    num_train_epochs=3,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_data,
    data_collator=data_collator,
)

trainer.train()

 10%|▉         | 500/5253 [01:18<12:45,  6.21it/s]

{'loss': 3.2222, 'grad_norm': 31.25, 'learning_rate': 5e-05, 'epoch': 0.29}


 19%|█▉        | 1000/5253 [02:41<11:48,  6.00it/s] 

{'loss': 2.9458, 'grad_norm': 26.0, 'learning_rate': 4.474016410687987e-05, 'epoch': 0.57}


 29%|██▊       | 1500/5253 [04:04<10:09,  6.16it/s]  

{'loss': 2.6087, 'grad_norm': 23.0, 'learning_rate': 3.9480328213759734e-05, 'epoch': 0.86}


 38%|███▊      | 2000/5253 [05:27<08:46,  6.18it/s]  

{'loss': 1.8965, 'grad_norm': 27.625, 'learning_rate': 3.42204923206396e-05, 'epoch': 1.14}


 48%|████▊     | 2500/5253 [06:48<07:26,  6.16it/s]  

{'loss': 1.2493, 'grad_norm': 17.375, 'learning_rate': 2.896065642751946e-05, 'epoch': 1.43}


 57%|█████▋    | 3000/5253 [08:12<06:08,  6.11it/s]

{'loss': 1.165, 'grad_norm': 29.0, 'learning_rate': 2.370082053439933e-05, 'epoch': 1.71}


 67%|██████▋   | 3500/5253 [09:33<04:47,  6.09it/s]

{'loss': 1.1194, 'grad_norm': 24.0, 'learning_rate': 1.8440984641279193e-05, 'epoch': 2.0}


 76%|███████▌  | 4000/5253 [10:57<03:13,  6.48it/s]

{'loss': 0.5776, 'grad_norm': 38.5, 'learning_rate': 1.3181148748159059e-05, 'epoch': 2.28}


 86%|████████▌ | 4500/5253 [12:16<02:02,  6.14it/s]

{'loss': 0.5899, 'grad_norm': 24.5, 'learning_rate': 7.921312855038922e-06, 'epoch': 2.57}


 95%|█████████▌| 5000/5253 [13:49<00:40,  6.27it/s]

{'loss': 0.6048, 'grad_norm': 21.125, 'learning_rate': 2.661476961918788e-06, 'epoch': 2.86}


100%|██████████| 5253/5253 [14:34<00:00,  6.01it/s]

{'train_runtime': 874.6621, 'train_samples_per_second': 6.006, 'train_steps_per_second': 6.006, 'train_loss': 1.5494646427859675, 'epoch': 3.0}





TrainOutput(global_step=5253, training_loss=1.5494646427859675, metrics={'train_runtime': 874.6621, 'train_samples_per_second': 6.006, 'train_steps_per_second': 6.006, 'total_flos': 1443869784834048.0, 'train_loss': 1.5494646427859675, 'epoch': 3.0})

In [11]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load local model
model = AutoModelForCausalLM.from_pretrained("./best-checkpoints/quwen-checkpoint-5000", device_map="cuda")
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")

predict("I am a student", "I am a teacher")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


3

In [14]:
predict("I am a student", "I am currently studying")

3

In [10]:
correct = 0
total = len(test_data)

index = 0

for idx, row in test_data.iterrows():
    premise = row['premise']
    hypothesis = row['hypothesis']
    label = row['label']

    prediction = predict(premise, hypothesis)

    if prediction == label:
        correct += 1

    index += 1

    if index % 1 == 0:
        print(f"Accuracy: {correct / index} | {index} / {total}")

print(f"Accuracy: {correct / total}")


Accuracy: 0.0 | 1 / 1374


KeyboardInterrupt: 