## Colab Setup

In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import os
import sys

GOOGLE_DRIVE_PATH_AFTER_MYDRIVE = 'CS546_proj'
GOOGLE_DRIVE_PATH = os.path.join('drive', 'MyDrive', GOOGLE_DRIVE_PATH_AFTER_MYDRIVE)
print(os.listdir(GOOGLE_DRIVE_PATH))

sys.path.append(GOOGLE_DRIVE_PATH)

import time, os
os.environ["TZ"] = "US/Central"
time.tzset()

['OpenPrompt', '.ipynb_checkpoints', '__pycache__', 'woz', 'multiwoz', 'checkpoint_best.pt', 'checkpoint_woz_best_91_17.pt', 'checkpoint_multiwoz_best.pt', 'helper.py', 'dialogue state tracking.ipynb']


In [5]:
!pip install transformers==4.21.3
!pip install openprompt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers==4.21.3
  Downloading transformers-4.21.3-py3-none-any.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 14.1 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 86.3 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 64.1 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.12.1 transformers-4.21.3
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting openprompt
  Downloading openprompt-1.0.1-py3-none-any.whl (146 kB)
[K     |████████████████████████████████| 146

## Woz

In [6]:
import json

train_data_path = os.path.join(GOOGLE_DRIVE_PATH, 'woz', 'processed' , 'train_data_withneg.json')
valid_data_path = os.path.join(GOOGLE_DRIVE_PATH, 'woz', 'processed' , 'dev_data_withneg.json')
test_data_path  = os.path.join(GOOGLE_DRIVE_PATH, 'woz', 'processed' , 'test_data.json')
restaurant_db_path = os.path.join(GOOGLE_DRIVE_PATH, 'woz', 'db' , 'restaurant_db.json')
# model_save_dir = os.path.join(GOOGLE_DRIVE_PATH,"checkpoint_best.pt")

from helper import process_data

dataset = {
    "train": process_data(train_data_path),
    "valid": process_data(valid_data_path),
    "test": process_data(test_data_path)
}


In [7]:
print(len(dataset["train"]),"dialogues in training set")
print(len(dataset["valid"]),"dialogues in validation set")
print(len(dataset["test"]),"dialogues in test set")

2531 dialogues in training set
830 dialogues in validation set
1642 dialogues in test set


In [8]:
BATCH_SIZE = 2
# MAX_SEQ_LENGTH = 250
MAX_SEQ_LENGTH = 300
LEARNING_RATE = 1e-4
EPOCH = 25


model_save_dir = os.path.join(GOOGLE_DRIVE_PATH,"checkpoint_woz_best.pt")

In [11]:
from openprompt import PromptDataLoader
from openprompt.plms import load_plm, T5LMTokenizerWrapper
from openprompt.prompts import ManualTemplate, PrefixTuningTemplate

plm, tokenizer, model_config, WrapperClass = load_plm("t5-lm", "t5-base")

promptTemplate = PrefixTuningTemplate(
    model = plm,
    text = '{"meta": "sentence", "shortenable": "False"} {"mask"}',
    tokenizer = tokenizer,
    num_token = 8, # for 91_17 t5-base
    prefix_dropout = 0.5
)

train_dataloader = PromptDataLoader(
    dataset = dataset["train"],
    tokenizer = tokenizer,
    template = promptTemplate,
    tokenizer_wrapper_class=WrapperClass,
    max_seq_length=MAX_SEQ_LENGTH,
    batch_size=BATCH_SIZE,
    shuffle=True, 
    decoder_max_length = 25,
    teacher_forcing=True, 
    predict_eos_token=True,
    truncate_method="head"
)

valid_dataloader = PromptDataLoader(
    dataset = dataset["valid"],
    tokenizer = tokenizer,
    template = promptTemplate,
    tokenizer_wrapper_class=WrapperClass,
    max_seq_length=MAX_SEQ_LENGTH,
    batch_size=BATCH_SIZE,
    shuffle=False, 
    decoder_max_length = 25,
    teacher_forcing=True, 
    predict_eos_token=True,
    truncate_method="head"
)

test_dataloader = PromptDataLoader(
    dataset = dataset["test"],
    tokenizer = tokenizer,
    template = promptTemplate,
    tokenizer_wrapper_class=WrapperClass,
    max_seq_length=MAX_SEQ_LENGTH,
    batch_size=BATCH_SIZE,
    shuffle=False, 
    decoder_max_length = 25,
    teacher_forcing=True, 
    predict_eos_token=True,
    truncate_method="head"
)



Downloading config.json:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/850M [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/773k [00:00<?, ?B/s]

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.
tokenizing: 2531it [00:02, 1112.13it/s]
tokenizing: 830it [00:00, 1063.51it/s]
tokenizing: 1642it [00:01, 1050.45it/s]


### Train

In [None]:
from openprompt import PromptForGeneration
from transformers import  AdamW
import tqdm
from helper import evaluation, to_device
import torch

promptModel = PromptForGeneration(
    template = promptTemplate,
    plm = plm,
    freeze_plm = False
)

promptModel.cuda()

# load checkpoint

# promptModel.load_state_dict(torch.load(os.path.join(GOOGLE_DRIVE_PATH,"checkpoint_woz_best_91_17.pt")))


no_decay = ['bias', 'LayerNorm.weight']
# it's always good practice to set no decay to biase and LayerNorm parameters
optimizer_grouped_parameters = [
    {'params': [p for n, p in promptModel.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.025},
    {'params': [p for n, p in promptModel.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0},
]


optimizer = AdamW(optimizer_grouped_parameters, lr=LEARNING_RATE)


max_em = 0.0
max_patience, current_patience, patience_break = 7, 0, False
train_em_history = []
valid_em_history = []
train_loss_history = []
valid_loss_history = []
label_loss_history = []

for epoch in range(EPOCH):
    tot_loss = 0.0
    progress = tqdm.tqdm(total=len(train_dataloader), ncols=150, position=0, leave=True,desc="Epoch: "+str(epoch))
    for step, inputs in enumerate(train_dataloader):
        inputs = to_device(inputs, "cuda")
        loss = promptModel(inputs)
        loss.backward()
        tot_loss += loss.item()
        optimizer.step()
        optimizer.zero_grad()

        if step %500 == 499:
            
            train_loss_history.append(tot_loss/(step+1))
            train_pred_text = promptModel.generate(batch=inputs, num_beams=15, max_new_tokens=25)[-1]
            train_em = evaluation(train_pred_text, inputs.tgt_text)
            train_em_history.append(train_em)
            print("\nStep {}, average loss: {} Train EM: {}".format(step, tot_loss/(step+1), train_em), flush=True)
            allpreds, alllabels = [], []
            """Validation"""
            promptModel.eval()
            valid_tot_loss = 0.0
            with torch.no_grad():
                for step, inputs in enumerate(valid_dataloader):
                    inputs = to_device(inputs, "cuda")
                    loss = promptModel(inputs)
                    valid_tot_loss += loss.item()
                    valid_pred_text = promptModel.generate(batch=inputs, num_beams=15, max_new_tokens=25)[-1]
                    alllabels.extend(inputs.tgt_text)
                    allpreds.extend(valid_pred_text)
            promptModel.train()

            valid_loss_history.append(valid_tot_loss/len(valid_dataloader))
            label_loss_history.append((epoch,step))

            valid_em = evaluation(allpreds, alllabels)
            print(f"\nEM Score: {valid_em}")
            valid_em_history.append(valid_em)
            if valid_em > max_em:
                max_em = valid_em
                print("\nsaving model to ",model_save_dir)
                torch.save(promptModel.state_dict(), model_save_dir)
                current_patience = 0
            else:
                current_patience += 1
                # if current_patience > max_patience:
                #     patience_break = True
                #     break
        progress.update(1)
    progress.close()
    # if patience_break:
    #     break

print("EM score", max_em)


### Evaluation on Test

In [12]:
from openprompt import PromptForGeneration
from transformers import  AdamW
import tqdm
from helper import evaluation, to_device
import torch

promptModel = PromptForGeneration(
    template = promptTemplate,
    plm = plm,
    freeze_plm = False
)

promptModel.cuda()


promptModel.load_state_dict(torch.load(os.path.join(GOOGLE_DRIVE_PATH,"checkpoint_woz_best_91_17.pt")))
promptModel.eval()

alllabels = []
allpreds = []
with torch.no_grad():
    for step, inputs in enumerate(test_dataloader):
        inputs = to_device(inputs, "cuda")
        # loss = promptModel(inputs)
        # valid_tot_loss += loss.item()
        valid_pred_text = promptModel.generate(batch=inputs, num_beams=15, max_new_tokens=20, skip_special_tokens=True)[-1]
        # print(valid_pred_text)
        # print([x[6:] for x in inputs.tgt_text])
        # print()
        # assert False
        alllabels.extend(inputs.tgt_text)
        allpreds.extend(valid_pred_text)
promptModel.train()

valid_em = evaluation(allpreds, alllabels)
print(f"\nEM Score: {valid_em}")

['area east, pricerange cheap', 'area east, pricerange cheap', 'area east, pricerange cheap', 'pricerange moderate', 'area north, pricerange moderate', 'area north, pricerange moderate', 'area north, pricerange moderate', 'area north, pricerange moderate', 'area north, pricerange moderate', 'area north, pricerange moderate', 'food vegetarian', 'food turkish', 'food turkish, pricerange dontcare', 'food turkish, pricerange dontcare', 'food turkish, pricerange dontcare', 'area south, food danish', 'area south, food portuguese', 'area south, food portuguese', 'food swedish, pricerange cheap', 'food indian, pricerange cheap', 'food indian, pricerange cheap', 'food indian, pricerange cheap', 'area west, pricerange moderate', 'area west, pricerange moderate', 'area dontcare, food spanish', 'area dontcare, food spanish', 'area dontcare, food spanish', 'food australian, pricerange cheap', 'food italian, pricerange cheap', 'food italian, pricerange cheap', 'food italian, pricerange cheap', 'food

In [None]:
for step, inputs in enumerate(valid_dataloader):
    print(inputs["tgt_text"])
    break

['<pad> area south, pricerange expensive', '<pad> area south, pricerange expensive', '<pad> area south, pricerange expensive', '<pad> area west, pricerange cheap', '<pad> area west, pricerange cheap', '<pad> area west, pricerange cheap']


## MultiWoz

In [None]:
import json

train_data_path = os.path.join(GOOGLE_DRIVE_PATH, 'multiwoz', 'processed' , 'train_data_withneg.json')
valid_data_path = os.path.join(GOOGLE_DRIVE_PATH, 'multiwoz', 'processed' , 'dev_data_withneg.json')
test_data_path  = os.path.join(GOOGLE_DRIVE_PATH, 'multiwoz', 'processed' , 'test_data.json')
restaurant_db_path = os.path.join(GOOGLE_DRIVE_PATH, 'multiwoz', 'db' , 'restaurant_db.json')
model_save_dir = os.path.join(GOOGLE_DRIVE_PATH,"checkpoint_best.pt")

from helper import process_data

dataset = {
    "train": process_data(train_data_path),
    "valid": process_data(valid_data_path),
    "test": process_data(test_data_path)
}

# only choose 20000 train dataset
dataset["train"] = dataset["train"][:20000]

In [None]:
sentence_info = []
tgt_info = []

for x in dataset:
    for d in dataset[x]:
        sentence_info.append(len(d.meta["sentence"].split()))
        tgt_info.append(len(d.tgt_text.split()))

print(max(sentence_info))
print(max(tgt_info))


863
72


In [None]:
import numpy as np

sentence_info = np.array(sentence_info)
tgt_info = np.array(tgt_info)

print(np.percentile(sentence_info, 99))
print(np.percentile(tgt_info, 99))

418.6399999999994
50.0


In [None]:
sum(tgt_info)/len(tgt_info)

17.228200477876616

In [None]:
print(len(dataset["train"]),"dialogues in training set")
print(len(dataset["valid"]),"dialogues in validation set")
print(len(dataset["test"]),"dialogues in test set")

20000 dialogues in training set
7365 dialogues in validation set
7372 dialogues in test set


In [None]:
BATCH_SIZE = 1
# MAX_SEQ_LENGTH5= 250
MAX_SEQ_LENGTH = 360
LEARNING_RATE = 1e-4
EPOCH = 25

model_save_dir = os.path.join(GOOGLE_DRIVE_PATH,"checkpoint_multiwoz_best.pt")

In [None]:
from openprompt import PromptDataLoader
from openprompt.plms import load_plm, T5LMTokenizerWrapper
from openprompt.prompts import ManualTemplate, PrefixTuningTemplate

plm, tokenizer, model_config, WrapperClass = load_plm("t5-lm", "t5-large")

promptTemplate = PrefixTuningTemplate(
    model = plm,
    text = '{"meta": "sentence", "shortenable": "False"} {"mask"}',
    tokenizer = tokenizer,
    num_token = 6,
    prefix_dropout = 0.2
)

train_dataloader = PromptDataLoader(
    dataset = dataset["train"],
    tokenizer = tokenizer,
    template = promptTemplate,
    tokenizer_wrapper_class=WrapperClass,
    max_seq_length=MAX_SEQ_LENGTH,
    batch_size=BATCH_SIZE,
    shuffle=True, 
    decoder_max_length = 50,
    teacher_forcing=True, 
    predict_eos_token=True,
    truncate_method="head"
)

valid_dataloader = PromptDataLoader(
    dataset = dataset["valid"],
    tokenizer = tokenizer,
    template = promptTemplate,
    tokenizer_wrapper_class=WrapperClass,
    max_seq_length=MAX_SEQ_LENGTH,
    batch_size=BATCH_SIZE,
    shuffle=False, 
    decoder_max_length = 50,
    teacher_forcing=True, 
    predict_eos_token=True,
    truncate_method="head"
)

test_dataloader = PromptDataLoader(
    dataset = dataset["test"],
    tokenizer = tokenizer,
    template = promptTemplate,
    tokenizer_wrapper_class=WrapperClass,
    max_seq_length=MAX_SEQ_LENGTH,
    batch_size=BATCH_SIZE,
    shuffle=False, 
    decoder_max_length = 50,
    teacher_forcing=True, 
    predict_eos_token=True,
    truncate_method="head"
)



Downloading config.json:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.75G [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/773k [00:00<?, ?B/s]

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-large automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.
tokenizing: 0it [00:00, ?it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (536 > 512). Running this sequence through the model will result in indexing errors
tokenizing: 20000it [00:29, 675.04it/s]
tokenizing: 7365it [00:10, 715.26it/s]
tokenizing: 7372it [00:10, 724.44it/s]


In [None]:
from openprompt import PromptForGeneration
from transformers import  AdamW
import tqdm
from helper import evaluation, to_device
import torch

promptModel = PromptForGeneration(
    template = promptTemplate,
    plm = plm,
    freeze_plm = False
)

promptModel.cuda()

# load checkpoint
# promptModel.load_state_dict(torch.load(os.path.join(GOOGLE_DRIVE_PATH, "checkpoint_woz_best_91_17.pt")))



no_decay = ['bias', 'LayerNorm.weight']
# it's always good practice to set no decay to biase and LayerNorm parameters
optimizer_grouped_parameters = [
    {'params': [p for n, p in promptModel.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.025},
    {'params': [p for n, p in promptModel.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0},
]


optimizer = AdamW(optimizer_grouped_parameters, lr=LEARNING_RATE)


max_em = 0.0
max_patience, current_patience, patience_break = 7, 0, False
train_em_history = []
valid_em_history = []
train_loss_history = []
valid_loss_history = []
label_loss_history = []

for epoch in range(EPOCH):
    tot_loss = 0.0
    progress = tqdm.tqdm(total=len(train_dataloader), ncols=150, position=0, leave=True,desc="Epoch: "+str(epoch))
    for step, inputs in enumerate(train_dataloader):
        inputs = to_device(inputs, "cuda")
        loss = promptModel(inputs)
        loss.backward()
        tot_loss += loss.item()
        optimizer.step()
        optimizer.zero_grad()

        if step %1000 == 999:
            
            train_loss_history.append(tot_loss/(step+1))
            train_pred_text = promptModel.generate(batch=inputs, num_beams=15, max_new_tokens=60)[-1]
            train_em = evaluation(train_pred_text, inputs.tgt_text)
            train_em_history.append(train_em)
            print("\nStep {}, average loss: {} Train EM: {}".format(step, tot_loss/(step+1), train_em), flush=True)
            allpreds, alllabels = [], []
            """Validation"""
            promptModel.eval()
            valid_tot_loss = 0.0
            with torch.no_grad():
                for step, inputs in enumerate(valid_dataloader):
                    inputs = to_device(inputs, "cuda")
                    loss = promptModel(inputs)
                    valid_tot_loss += loss.item()
                    valid_pred_text = promptModel.generate(batch=inputs, num_beams=15, max_new_tokens=70)[-1]
                    alllabels.extend(inputs.tgt_text)
                    allpreds.extend(valid_pred_text)
            promptModel.train()

            valid_loss_history.append(valid_tot_loss/len(valid_dataloader))
            label_loss_history.append((epoch,step))

            valid_em = evaluation(allpreds, alllabels)
            print(f"\nEM Score: {valid_em}")
            valid_em_history.append(valid_em)
            if valid_em > max_em:
                max_em = valid_em
                torch.save(promptModel.state_dict(), model_save_dir)
                current_patience = 0
            else:
                current_patience += 1
                # if current_patience > max_patience:
                #     patience_break = True
                #     break
        progress.update(1)
    progress.close()
    # if patience_break:
    #     break

print("EM score", max_em)
