In [None]:
!pip install datasets transformers[sentencepiece]

In [None]:
import torch

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")

model = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot-400M-distill").to(device)

In [None]:
from datasets import load_dataset

dataset = load_dataset("Adapting/empathetic_dialogues_with_special_tokens")

In [None]:
def preprocess_function(examples):
    inputs = examples['chat_history']
    targets = examples['sys_response']
    model_inputs = tokenizer(inputs, truncation=True)

    # Set up the tokenizer for targets
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, truncation=True)

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [None]:
tokenized_datasets = dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=dataset["train"].column_names,
)

In [None]:
from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)


In [None]:
GPU = torch.cuda.is_available()


from transformers import Seq2SeqTrainingArguments

args = Seq2SeqTrainingArguments(
    "dialog_nlp2022",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=10,
    # predict_with_generate=True, 
    fp16=GPU,# speeds up training on modern GPUs.
    eval_accumulation_steps = 10,
)

In [None]:
from transformers import Seq2SeqTrainer

trainer = Seq2SeqTrainer(
    model,
    args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,

)

In [None]:
trainer.train()

In [None]:
'''
codes for pushing the fine-tuned model to Huggingface
'''
# from huggingface_hub import notebook_login
# notebook_login()
# checkpoint = 'blenderbot-400M-distill'
# push = input("push to huggingface? ")
# if push == 'yes' or push == 'y':
#     num_epochs = input("Number of epochs: ")
#     commit_msg = f'{checkpoint}_{num_epochs}'
#     tokenizer.push_to_hub(commit_message=commit_msg, repo_path_or_name=f"YOUR REPO NAME" )
#     model.push_to_hub(commit_message=commit_msg, repo_path_or_name=f"YOUR REPO NAME")