In [7]:
import torch, argparse, pandas, numpy

from sklearn.metrics import accuracy_score, mean_squared_error


from hahadataset import HahaDataset

from transformers import GPT2ForSequenceClassification, GPT2Tokenizer
from transformers import TrainingArguments, Trainer

def metrics_acc(eval_pred):

    labels = eval_pred.label_ids
    preds = eval_pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)

    return {"accuracy": acc}

def metrics_rmse(eval_pred):
    labels = eval_pred.label_ids
    preds = eval_pred.predictions
    rmse = mean_squared_error(labels,preds)

    rmse = numpy.float64(rmse)

    return{"rmse": rmse}


if __name__ == '__main__':

    parser = argparse.ArgumentParser()

    parser.add_argument('--load_from_checkpoint', type=str)
    parser.add_argument('--continue_training', type=str)
    parser.add_argument('--output_directory', type=str,default="output_dir")
    parser.add_argument('--tokenizer_path', type=str)
    parser.add_argument('--max_len', type=int, default=256)
    parser.add_argument('--max_steps', type=int, default=500)
    parser.add_argument('--batch_size', type=int, default=8)
    parser.add_argument('--num_gpus', type=int, default=4)
    parser.add_argument("-f", "--fff", help="a dummy argument to fool ipython", default="1")
    args = parser.parse_args()
    task = 'humor_rating'

    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
    tokenizer.padding_side = "left"
    tokenizer.pad_token = tokenizer.eos_token


    train_dataset = HahaDataset(input_file='./../Data/train_data/train.csv', tokenizer=tokenizer, max_len=256, task=task,
                                split='train')
    eval_dataset = HahaDataset(input_file='./../Data/train_data/train.csv', tokenizer=tokenizer, max_len=256, task=task,
                               split='eval')
    test_dataset = HahaDataset(input_file='./../Data/test_data/public_test.csv', tokenizer=tokenizer, max_len=256, task=task,
                               split='test')

    model = GPT2ForSequenceClassification.from_pretrained('gpt2', num_labels=1)
    
    model.resize_token_embeddings(len(tokenizer))

    # fix model padding token id
    model.config.pad_token_id = model.config.eos_token_id


    warmup_steps = int(args.max_steps * .01)

    training_args = TrainingArguments(
        output_dir=args.output_directory,
        max_steps=args.max_steps,
        per_device_train_batch_size=args.batch_size,
        logging_steps=25,
        save_total_limit=1,
#         evaluate_during_training=True,
        eval_steps=50,
        learning_rate=2e-5,
        warmup_steps=warmup_steps,
        load_best_model_at_end=True,
        metric_for_best_model='eval_rmse',
        disable_tqdm=True
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=metrics_rmse,
    )

    trainer.train()

    predictions = trainer.predict(test_dataset)
    humor_rating_preds = [pred[0] for pred in predictions.predictions]

    output_list = []
    for pred in humor_rating_preds:
        temp = {}
        temp['humor_rating'] = pred
        output_list.append(temp)

    out_df = pandas.DataFrame(output_list)
    out_df.to_csv('submission_humor_rating.csv', index_label='id')


Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 4.955144958496094, 'learning_rate': 1.9191919191919194e-05, 'epoch': 0.05060728744939271}
{'loss': 0.43979358673095703, 'learning_rate': 1.8181818181818182e-05, 'epoch': 0.10121457489878542}
{'loss': 0.346490364074707, 'learning_rate': 1.7171717171717173e-05, 'epoch': 0.15182186234817813}
{'loss': 0.34578815460205076, 'learning_rate': 1.616161616161616e-05, 'epoch': 0.20242914979757085}
{'loss': 0.3592367935180664, 'learning_rate': 1.5151515151515153e-05, 'epoch': 0.25303643724696356}
{'loss': 0.30040239334106444, 'learning_rate': 1.4141414141414143e-05, 'epoch': 0.30364372469635625}
{'loss': 0.356728515625, 'learning_rate': 1.3131313131313132e-05, 'epoch': 0.354251012145749}
{'loss': 0.4129073333740234, 'learning_rate': 1.2121212121212122e-05, 'epoch': 0.4048582995951417}
{'loss': 0.37961257934570314, 'learning_rate': 1.1111111111111113e-05, 'epoch': 0.45546558704453444}
{'loss': 0.3658058547973633, 'learning_rate': 1.0101010101010103e-05, 'epoch': 0.5060728744939271}
{'loss'