In [47]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

import sys
import logging
logging.getLogger().setLevel(logging.ERROR)
logging.disable(sys.maxsize)

from torch.utils.data import *
from transformers import *
sys.path.insert(0, "..")

from models import *
from logic import *
from my_datasets import *

from utils import *
import numpy as np
from tqdm import tqdm
import evaluate

from datasets import Dataset


In [48]:
n, r = 5, 8
ap, bp, tp, sp = 0.2, 0.2, 0.4, 0.1

num_arsteps = 3

train_len = 2500
test_len = 500
num_epochs = 10
# test_is_train = True

In [49]:
qed_train_dataset_config = OneShotQedDatasetConfig(r,n,ap,bp,tp,dataset_len=train_len,seed=1234)
qed_test_dataset_config = OneShotQedDatasetConfig(r,n,ap,bp,tp,dataset_len=test_len,seed=2345)
qed_train_dataset = OneShotQedDataset(qed_train_dataset_config)
qed_test_dataset = OneShotQedDataset(qed_test_dataset_config)

succ_train_dataset_config = PredictSuccDatasetConfig(r,n,ap,bp,tp,dataset_len=train_len,seed=1234)
succ_test_dataset_config = PredictSuccDatasetConfig(r,n,ap,bp,tp,dataset_len=test_len,seed=2345)
succ_train_dataset = PredictSuccDataset(succ_train_dataset_config)
succ_test_dataset = PredictSuccDataset(succ_test_dataset_config)

arsteps_train_dataset_config = AutoRegFixedStepsDatasetConfig(r,n,ap,bp,sp,num_arsteps,dataset_len=train_len,seed=1234)
arsteps_test_dataset_config = AutoRegFixedStepsDatasetConfig(r,n,ap,bp,sp,num_arsteps,dataset_len=test_len,seed=2345)
arsteps_train_dataset = AutoRegFixedStepsDataset(arsteps_train_dataset_config)
arsteps_test_dataset = AutoRegFixedStepsDataset(arsteps_test_dataset_config)

In [50]:
def stringify_rule(rule, var_sep_token):
    """
    Create a rule of the form xi , xj , ... -> xa
    from a one-hot vector of [<ants>, <cons>]
    """

    n_vars = len(rule) // 2
    ants = [f"x{i}" for i in range(n_vars) if rule[i]]
    cons = [f"x{i}" for i in range(n_vars) if rule[n_vars+i]]
    if len(ants) < 1:
        ants = ["empty"]
    if len(cons) < 1:
        cons = ["empty"]
    rule = var_sep_token.join(ants) + " -> " + var_sep_token.join(cons)
    return rule

def get_string_rep(dataset_item):
    """
    Returns a string of the form:
    [RULES_START] [RULE_START] ... [RULE_END] ... [RULES_END]
    [THEOREM_START] ... [THEOREM_END]
    [QED]
    """

    # Define the placeholder tokens
    var_sep_token = " , "
    rules_start = "[RULES_START]"
    rules_end = "[RULES_END]"
    rule_start = "[RULE_START]"
    rule_end = "[RULE_END]"
    theorem_start = "[THEOREM_START]"
    theorem_end = "[THEOREM_END]"
    qed = "[QED]"

    rules = dataset_item["rules"]
    theorem = dataset_item["theorem"]

    n_vars = len(theorem)

    rule_strs = [rule_start + " " + stringify_rule(rule, var_sep_token) + " " + rule_end for rule in rules]
    theorem_str = var_sep_token.join([f"x{i}" for i in range(n_vars) if theorem[i]])
    theorem_str = theorem_start + " " + theorem_str + " " + theorem_end
    rules_str = rules_start + " " + " ".join(rule_strs) + " " + rules_end
    return rules_str + " " + theorem_str + " " + qed


In [51]:
print(qed_train_dataset[0])
print(get_string_rep(qed_train_dataset[0]))

{'rules': tensor([[0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
        [0, 0, 0, 0, 1, 0, 1, 0, 0, 1]]), 'theorem': tensor([1, 0, 1, 0, 1]), 'labels': tensor(0)}
[RULES_START] [RULE_START] empty -> x1 [RULE_END] [RULE_START] empty -> x2 [RULE_END] [RULE_START] empty -> empty [RULE_END] [RULE_START] empty -> x1 [RULE_END] [RULE_START] empty -> x3 [RULE_END] [RULE_START] x4 -> empty [RULE_END] [RULE_START] empty -> x2 [RULE_END] [RULE_START] x4 -> x1 , x4 [RULE_END] [RULES_END] [THEOREM_START] x0 , x2 , x4 [THEOREM_END] [QED]


In [52]:
# Create HuggingFace datasets for the QED task

train_data = [get_string_rep(qed_train_dataset[i]) for i in tqdm(range(len(qed_train_dataset)))]
train_labels = [qed_train_dataset[i]["labels"].item() for i in tqdm(range(len(qed_train_dataset)))]

print("Creating train dataset")
qed_train_hf_dataset = Dataset.from_dict({
    "data": train_data,
    "label": train_labels
}).with_format("torch")

test_data = [get_string_rep(qed_test_dataset[i]) for i in tqdm(range(len(qed_test_dataset)))]
test_labels = [qed_test_dataset[i]["labels"].item() for i in tqdm(range(len(qed_test_dataset)))]

print("Creating test dataset")
qed_test_hf_dataset = Dataset.from_dict({
    "data": test_data,
    "label": test_labels
}).with_format("torch")

100%|██████████| 2500/2500 [00:01<00:00, 1324.45it/s]
100%|██████████| 2500/2500 [00:01<00:00, 2166.57it/s]


Creating train dataset


100%|██████████| 500/500 [00:00<00:00, 1386.90it/s]
100%|██████████| 500/500 [00:00<00:00, 2186.84it/s]

Creating test dataset





In [53]:
# Get the GPT-2 tokenizer

tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

In [54]:
def tokenize_function(item):
    return tokenizer(item["data"], truncation=True)

qed_train_tokenized_dataset = qed_train_hf_dataset.map(tokenize_function, batched=True)
qed_test_tokenized_dataset = qed_test_hf_dataset.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Map:   0%|          | 0/2500 [00:00<?, ? examples/s]

Map: 100%|██████████| 2500/2500 [00:00<00:00, 7907.81 examples/s]
Map: 100%|██████████| 500/500 [00:00<00:00, 7213.72 examples/s]


In [55]:
model = AutoModelForSequenceClassification.from_pretrained(
    "gpt2", num_labels=2
)
model.config.pad_token_id = tokenizer.pad_token_id

In [56]:
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    avg_ones = np.mean(predictions)
    acc = accuracy.compute(predictions=predictions, references=labels)
    return {"Accuracy" : acc["accuracy"], "Avg Ones" : avg_ones}

In [57]:
training_args = TrainingArguments(
    output_dir="gpt2_string_results",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=8,
    num_train_epochs=num_epochs,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    logging_steps=50
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=qed_train_tokenized_dataset,
    eval_dataset=qed_test_tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [58]:
trainer.train()



Epoch,Training Loss,Validation Loss,Accuracy,Avg ones
1,No log,0.514845,0.74,0.68
2,0.655200,0.475655,0.752,0.56
3,0.533400,0.424052,0.804,0.516
4,0.464300,0.295055,0.882,0.662
5,0.342900,0.161476,0.936,0.6
6,0.342900,0.137821,0.944,0.596
7,0.280900,0.153797,0.942,0.522
8,0.237400,0.103142,0.952,0.552
9,0.211000,0.090331,0.97,0.554
10,0.188800,0.085676,0.976,0.56




TrainOutput(global_step=400, training_loss=0.3642372441291809, metrics={'train_runtime': 319.4135, 'train_samples_per_second': 78.268, 'train_steps_per_second': 1.252, 'total_flos': 2698854523011072.0, 'train_loss': 0.3642372441291809, 'epoch': 10.0})