In [None]:
import copy
import torch
from torch.utils.data import *
from transformers import *
import inspect
import sys
sys.path.insert(0, "..")

from models import *
from logic import *
from my_datasets import *

from utils import *

import numpy as np

In [2]:
n, r = 5, 8
ap, bp, tp = 0.2, 0.2, 0.4

qed_train_dataset_config = OneShotQedDatasetConfig(r, n, ap, bp, tp, num_items=640, base_seed=1234)
qed_test_dataset_config = OneShotQedDatasetConfig(r, n, ap, bp, tp, num_items=640, base_seed=2345)
qed_train_dataset = OneShotQedDataset(qed_train_dataset_config)
qed_test_dataset = OneShotQedDataset(qed_test_dataset_config)

succ_train_dataset_config = PredictSuccDatasetConfig(r, n, ap, bp, tp, num_items=1000, base_seed=1234)
succ_test_dataset_config = PredictSuccDatasetConfig(r, n, ap, bp, tp, num_items=500, base_seed=2345)
succ_train_dataset = PredictSuccDataset(succ_train_dataset_config)
succ_test_dataset = PredictSuccDataset(succ_test_dataset_config)

In [3]:
mytf_config = MyTfConfig(embed_dim=768, ffwd_width=1024, ffwd_depth=4, num_heads=2, num_layers=8)
mytf_model = get_seq2seq_model("mytf", config=mytf_config)
mytf_qed_model = OneShotQedTaskModel(OneShotQedTaskConfig(r, n, copy.deepcopy(mytf_model)))
mytf_succ_model = PredictSuccTaskModel(PredictSuccTaskConfig(r, n, copy.deepcopy(mytf_model)))

mygpt2_model = get_seq2seq_model("gpt2", use_pretrained=True)
mygpt2_qed_model = OneShotQedTaskModel(OneShotQedTaskConfig(r, n, copy.deepcopy(mygpt2_model)))
mygpt2_succ_model = PredictSuccTaskModel(PredictSuccTaskConfig(r, n, copy.deepcopy(mygpt2_model)))

loading configuration file config.json from cache at /home/antonxue/.cache/huggingface/hub/models--gpt2/snapshots/11c5a3d5811f50298f278a704980280950aedb10/config.json
Model config GPT2Config {
  "activation_function": "gelu_new",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 12,
  "n_positions": 1024,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 50
    }
  },
  "transformers_version": "4.34.1",
  

In [4]:
qed_training_args = TrainingArguments(
    "test-trainer",
    evaluation_strategy = "epoch",
    num_train_epochs = 100,
    per_device_train_batch_size = 24,
    per_device_eval_batch_size = 24,
    logging_steps = 5
)

succ_training_args = qed_training_args

Found safetensors installation, but --save_safetensors=False. Safetensors should be a preferred weights saving format due to security and performance reasons. If your model cannot be saved by safetensors please feel free to open an issue at https://github.com/huggingface/safetensors!
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [5]:
### QED GPT2
mygpt2_qed_trainer = Trainer(mygpt2_qed_model, qed_training_args,
    train_dataset = qed_train_dataset,
    eval_dataset = qed_test_dataset,
    compute_metrics = qed_compute_metrics)
# mygpt2_qed_trainer.train()

In [6]:
### QED MyTf
mytf_qed_trainer = Trainer(mytf_qed_model, qed_training_args,
    train_dataset = qed_train_dataset,
    eval_dataset = qed_test_dataset,
    compute_metrics = qed_compute_metrics)
# mytf_qed_trainer.train()

In [None]:
### SUCC GPT2
mygpt2_succ_trainer = Trainer(mygpt2_succ_model, succ_training_args,
    train_dataset = succ_train_dataset,
    eval_dataset = succ_test_dataset,
    compute_metrics = succ_compute_metrics)
mygpt2_succ_trainer.train()

***** Running training *****
  Num examples = 1,000
  Num Epochs = 100
  Instantaneous batch size per device = 24
  Total train batch size (w. parallel, distributed & accumulation) = 24
  Gradient Accumulation steps = 1
  Total optimization steps = 4,200
  Number of trainable parameters = 126,424,325


Epoch,Training Loss,Validation Loss,Accuracy,Avg ones
1,0.5331,0.53816,0.7792,1.0
2,0.5474,0.531928,0.7792,1.0
3,0.5281,0.540472,0.7792,1.0
4,0.523,0.535523,0.7792,1.0
5,0.4592,0.562002,0.7792,1.0
6,0.4449,0.567684,0.7792,1.0
7,0.3731,0.550001,0.7792,1.0
8,0.325,0.719572,0.7792,1.0
9,0.2564,0.817121,0.7792,1.0
10,0.2978,1.107274,0.7792,1.0


***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
Saving model checkpoint to test-trainer/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Nu

In [None]:
# batch = next(iter(DataLoader(train_qed_dataset, batch_size=8)))
# mygpt2_qed_model.cpu().eval()
# print(f"training? {mygpt2_qed_model.training}")
# out = mygpt2_qed_model(**batch)

In [None]:
mytf_trainer = Trainer(
    mytf_qed_model,
    training_args,
    train_dataset = train_qed_dataset,
    eval_dataset = test_qed_dataset,
    compute_metrics = compute_metrics)

mytf_trainer.train()

In [None]:
inspect.getfile(mygpt2_trainer.predict)

In [None]:
bce_loss = nn.BCELoss()

In [None]:
bce_loss(torch.rand(3,4,5), torch.rand(3,4,5))