In [1]:
import copy
import torch
from torch.utils.data import *
from transformers import *
import inspect
import sys
sys.path.insert(0, "..")

from models import *
from logic import *
from my_datasets import *

from utils import *

import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
n, r = 5, 8
ap, bp, tp, sp = 0.2, 0.2, 0.4, 0.1
num_arsteps = 3

qed_train_dataset_config = OneShotQedDatasetConfig(r,n,ap,bp,tp,dataset_len=1000,seed=1234)
qed_test_dataset_config = OneShotQedDatasetConfig(r,n,ap,bp,tp,dataset_len=500,seed=2345)
qed_train_dataset = OneShotQedDataset(qed_train_dataset_config)
qed_test_dataset = OneShotQedDataset(qed_test_dataset_config)

succ_train_dataset_config = PredictSuccDatasetConfig(r,n,ap,bp,tp,dataset_len=1000,seed=1234)
succ_test_dataset_config = PredictSuccDatasetConfig(r,n,ap,bp,tp,dataset_len=500,seed=2345)
succ_train_dataset = PredictSuccDataset(succ_train_dataset_config)
succ_test_dataset = PredictSuccDataset(succ_test_dataset_config)

arsteps_train_dataset_config = AutoRegFixedStepsDatasetConfig(r,n,ap,bp,sp,num_arsteps,dataset_len=1000,seed=1234)
arsteps_test_dataset_config = AutoRegFixedStepsDatasetConfig(r,n,ap,bp,sp,num_arsteps,dataset_len=500,seed=2345)
arsteps_train_dataset = AutoRegFixedStepsDataset(arsteps_train_dataset_config)
arsteps_test_dataset = AutoRegFixedStepsDataset(arsteps_test_dataset_config)

In [3]:
mytf_qed_base = get_seqcls_model("mytf", num_labels=2, problem_type="single_label_classification")
mytf_qed = OneShotQedTaskModel(OneShotQedTaskConfig(n, mytf_qed_base))

mytf_succ_base = get_seqcls_model("mytf", num_labels=2, problem_type="multi_label_classification")
mytf_succ = PredictSuccTaskModel(PredictSuccTaskConfig(n, mytf_succ_base))

mytf_arsteps_base = get_seqcls_model("mytf", num_labels=2, problem_type="multi_label_classification")
mytf_arsteps = AutoRegFixedStepsTaskModel(AutoRegFixedStepsTaskConfig(n, num_arsteps, mytf_arsteps_base))

# mygpt2_model = get_seqcls_model("gpt2", num_labels=2, problem_type="single_label_classification")
# mygpt2_qed = OneShotQedTaskModel(OneShotQedTaskConfig(n, copy.deepcopy(mygpt2_model)))
# mygpt2_succ = PredictSuccTaskModel(PredictSuccTaskConfig(n, copy.deepcopy(mygpt2_model)))
# mygpt2_arsteps = AutoRegFixedStepsTaskModel(AutoRegFixedStepsTaskConfig(n, num_arsteps, copy.deepcopy(mygpt2_model)))

In [4]:
qed_training_args = TrainingArguments(
    "test-trainer",
    evaluation_strategy = "epoch",
    num_train_epochs = 50,
    per_device_train_batch_size = 24,
    per_device_eval_batch_size = 24,
    logging_steps = 5
)

succ_training_args = qed_training_args
arsteps_training_args = qed_training_args

Found safetensors installation, but --save_safetensors=False. Safetensors should be a preferred weights saving format due to security and performance reasons. If your model cannot be saved by safetensors please feel free to open an issue at https://github.com/huggingface/safetensors!
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [5]:
# ### QED GPT2
# mygpt2_qed_trainer = Trainer(mygpt2_qed_model, qed_training_args,
#     train_dataset = qed_train_dataset,
#     eval_dataset = qed_test_dataset,,
#     compute_metrics = qed_compute_metrics)
# mygpt2_qed_trainer.train()

In [6]:
### QED MyTf
mytf_qed_trainer = Trainer(mytf_qed, qed_training_args,
    train_dataset = qed_train_dataset,
    eval_dataset = qed_test_dataset,
    compute_metrics = qed_compute_metrics)
mytf_qed_trainer.train()

***** Running training *****
  Num examples = 1,000
  Num Epochs = 50
  Instantaneous batch size per device = 24
  Total train batch size (w. parallel, distributed & accumulation) = 24
  Gradient Accumulation steps = 1
  Total optimization steps = 2,100
  Number of trainable parameters = 42,545,666


Epoch,Training Loss,Validation Loss,Accuracy,Avg ones
1,0.6864,0.685276,0.0,0.5
2,0.7249,0.689485,0.0,0.5
3,0.6897,0.718415,0.0,0.5
4,0.7512,0.694397,0.0,0.5
5,0.6862,0.688938,0.0,0.5
6,0.7072,0.686328,0.0,0.5
7,0.7065,0.68528,0.0,0.5
8,0.6894,0.684936,0.0,0.5
9,0.6933,0.684934,0.0,0.0
10,0.6872,0.684973,0.0,0.5


***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
Saving model checkpoint to test-trainer/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Nu

TrainOutput(global_step=2100, training_loss=0.6912903459866842, metrics={'train_runtime': 123.1958, 'train_samples_per_second': 405.858, 'train_steps_per_second': 17.046, 'total_flos': 0.0, 'train_loss': 0.6912903459866842, 'epoch': 50.0})

In [7]:
### SUCC GPT2
# mygpt2_succ_trainer = Trainer(mygpt2_succ, succ_training_args,
#     train_dataset = succ_train_dataset,
#     eval_dataset = succ_test_dataset,
#     compute_metrics = succ_compute_metrics)
# mygpt2_succ_trainer.train()

In [8]:
### SUCC MyTF
mytf_succ_trainer = Trainer(mytf_succ, succ_training_args,
    train_dataset = succ_train_dataset,
    eval_dataset = succ_test_dataset,
    compute_metrics = succ_compute_metrics)
mytf_succ_trainer.train()

***** Running training *****
  Num examples = 1,000
  Num Epochs = 50
  Instantaneous batch size per device = 24
  Total train batch size (w. parallel, distributed & accumulation) = 24
  Gradient Accumulation steps = 1
  Total optimization steps = 2,100
  Number of trainable parameters = 43,074,567


Epoch,Training Loss,Validation Loss,Accuracy,Avg ones
1,0.5301,0.531797,0.7792,1.0
2,0.549,0.532635,0.7792,1.0
3,0.5314,0.533265,0.7792,1.0
4,0.5402,0.531925,0.7792,1.0
5,0.5082,0.531243,0.7792,1.0
6,0.5525,0.530575,0.7792,1.0
7,0.5455,0.530424,0.7792,1.0
8,0.5278,0.53,0.7792,1.0
9,0.5396,0.52968,0.7792,1.0
10,0.5075,0.529459,0.7792,1.0


***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
Saving model checkpoint to test-trainer/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
***** Running Evaluation *****
  Num examples = 500
  Batch size = 24
***** Running Evaluation *****
  Nu

TrainOutput(global_step=2100, training_loss=0.5195948993592036, metrics={'train_runtime': 108.4933, 'train_samples_per_second': 460.858, 'train_steps_per_second': 19.356, 'total_flos': 0.0, 'train_loss': 0.5195948993592036, 'epoch': 50.0})

In [9]:
### ARSteps GPT2
# mygpt2_arsteps_trainer = Trainer(mygpt2_arsteps_model, arsteps_training_args,
#     train_dataset = arsteps_train_dataset,
#     eval_dataset = arsteps_test_dataset,
#     compute_metrics = arsteps_compute_metrics)
# mygpt2_arsteps_trainer.train()

NameError: name 'mygpt2_arsteps_model' is not defined

In [18]:
batch = next(iter(DataLoader(qed_train_dataset, batch_size=7)))
mytf_qed.cpu()
out = mytf_qed(**batch)

In [21]:
out.logits.shape

torch.Size([7, 2])