In [None]:
!pip install pytorch-lightning==1.9.2 transformers torchmetrics deepspeed nltk wandb

In [None]:
!git clone https://github.com/Myashka/CQA_RLHF.git cqa

In [None]:
if 'google.colab' in str(get_ipython()):
    !pip install cloud-tpu-client==0.10 torch==1.13.1 https://storage.googleapis.com/tpu-pytorch/wheels/colab/torch_xla-1.13-cp38-cp38-linux_x86_64.whl tensorboard-plugin-profile
    !pip install google-api-python-client==1.12.1
    from google.colab import drive
    drive.mount('/content/drive')

In [1]:
import yaml

## Run train

In [2]:
trainer_config = dict(
    seed=42,
    model_name="Myashka/125M_GPTneo_reward_base",
    wandb=dict(
        api="60fce56bfaec85b8d6bc78bfac2086891f5afe54",
        project_name="CQA_RLHF",
        args=dict(job_type="train", group="reward", name="125M-lr_2e_5-1_ws-api_usage-freezed_3"),
    ),
    data=dict(
        data_dir=r"/kaggle/input/python-qa-api-usage/1.0-data-div-ans-sep-api-usage.json",
        max_length=512,
        batch_size=8,
        truncate_promt = True,
        on_tpu=False,
    ),
    model_params=dict(
        learning_rate=2e-5,
        use_cache=False,
        warmup_steps_per_cent=0.01,
        adam_betas=[0.9, 0.95],
        weight_decay=0.001,
        do_compute_metrics = True,
        nr_frozen_epochs = 3,
    ),
    trainer=dict(
        checkpoint=dict(
            every_n_train_steps=1000,
            dirpath=r"/kaggle/working/Checkpoints",
            log_obg = 'val_accuracy',
            mode = 'max',

        ),
        ckpt_path=None,
        params=dict(
            accelerator="tpu",
            max_epochs=50,
            accumulate_grad_batches=1,
            gradient_clip_val=1,
            precision="16",
            val_check_interval=104,
            overfit_batches=0,  # 0 for train
            num_sanity_val_steps=2,
            log_every_n_steps = 20,
#             limit_train_batches=100,
#             strategy="deepspeed_stage_2_offload"
        ),
    ),
)

with open("trainer_config.yaml", "w") as outfile:
    yaml.dump(trainer_config, outfile, default_flow_style=False)

In [None]:
!python /kaggle/working/CQA_RLHF/reward_model/mse_regression/train.py --config_file /kaggle/working/trainer_config.yaml

## Run test

In [4]:
test_config = dict(
    seed=42,
    cuda=True,
    log_file = '/content/test-base-125M.csv',
    save_steps = 100,
    test_params=dict(
        save_steps=100,
        test_model_path=r'/content/artifacts/model-34980p14:v4/model.ckpt',
        log_file='/content/test-tuned-api_usage-answer_loss-bredogenerated-125M.csv',
        use_cache=True,
    ),
    wandb=dict(
        api="60fce56bfaec85b8d6bc78bfac2086891f5afe54",
        args=dict(group="reward", job_type="test", name="125M-base-test"),
        project_name="CQA_RLHF",
        table_name="125M-reward",
    ),
    model_name="EleutherAI/gpt-neo-125m",
    data=dict(
        data_dir=r"/kaggle/input/python-qa-api-usage/1.0-data-div-ans-sep-api-usage.json",
        max_length=512,
        split='test',
        padding_side='right',
        padding=False,
        truncate_promt = True,
    ),
    test_model_path=None,
)

with open("test_config.yaml", "w") as outfile:
    yaml.dump(test_config, outfile, default_flow_style=False)

In [None]:
!python /content/cqa/reward/mse_regression/test.py --config_file /content/test_config.yaml