In [1]:
import pandas as pd
val_df = pd.read_csv('validation_data.csv')
val_df.head()

Unnamed: 0,in,out
0,What are you do with these leads with us?,What do you do with this lead?
1,"The feathers settled, and his shoulders become...","Her feathers settled, and her shoulders became..."
2,Buy for $2.10 USD on eBay.,Buy for $2.10 USD on eBay 2 watchers.
3,Sitting down stairly or upstairs on balcony.,Seating downstairs or upstairs on the balcony.
4,The counter 42 will count up however the revol...,The counter 42 will however count the revoluti...


In [1]:
from transformers import T5ForConditionalGeneration, T5Tokenizer


model_name = 't5-base'
tokenizer = T5Tokenizer.from_pretrained(model_name)
token_model = T5ForConditionalGeneration.from_pretrained(model_name)

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


In [2]:
from torch.utils.data import Dataset, DataLoader

class GrammarDataset(Dataset):
    def __init__(self, dataset, tokenizer, print_text=False):         
        self.dataset = dataset
        self.pad_to_max_length = False
        self.tokenizer = tokenizer
        self.print_text = print_text
        self.max_len = 128
    
    def __len__(self):
        return len(self.dataset)
    
    def tokenize_data(self, in_out_pair):
        input_, target_ = in_out_pair['in'], in_out_pair['out']

        tokenized_inputs = self.tokenizer(input_, pad_to_max_length=self.pad_to_max_length, 
                                            max_length=self.max_len,
                                            return_attention_mask=True,
                                            truncation=True)
    
        tokenized_targets = self.tokenizer(target_, pad_to_max_length=self.pad_to_max_length, 
                                            max_length=self.max_len,
                                            return_attention_mask=True,
                                            truncation=True)

        inputs={"input_ids": tokenized_inputs['input_ids'],
            "attention_mask": tokenized_inputs['attention_mask'],
            "labels": tokenized_targets['input_ids']
        }
        
        return inputs

    def __getitem__(self, index):
        inputs = self.tokenize_data(self.dataset[index])
        
        if self.print_text:
            for k in inputs.keys():
                print(k, len(inputs[k]))

        return inputs


In [3]:
!pip install rouge_score
from datasets import load_metric
rouge_metric = load_metric("rouge")

from nltk import sent_tokenize

def eval_model(model, eval_dataset, eval_df):
    corrections = model.predict(eval_dataset, num_return_sequences=1, early_stopping=True).predictions
    decoded_preds = tokenizer.batch_decode(corrections, skip_special_tokens=True)
    
    decoded_preds = ["\n".join(sent_tokenize(pred.strip())) for pred in decoded_preds]
    
    rouge_data = rouge_metric.compute(predictions=decoded_preds, references=list(eval_df["out"]), use_stemmer=True)
    return rouge_data



  rouge_metric = load_metric("rouge")


In [4]:
from transformers import Seq2SeqTrainingArguments
import copy
from transformers import Seq2SeqTrainer
from datasets import Dataset
from tqdm import tqdm
from collections import defaultdict
import threading
import pandas as pd

from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(tokenizer, model=token_model, padding='longest', return_tensors='pt')

def grid_search_cv(grid, folds=5, num_threads=4):
    val_df = pd.read_csv('validation_data.csv').iloc[:500]
    hyperparam_configs = [dict()]
    for param_name, param_vals in grid.items():
        config_len = len(hyperparam_configs)
        for i in range(config_len):
            old_config = hyperparam_configs.pop(0)
            for val in param_vals:
                config_copy = copy.deepcopy(old_config)
                config_copy[param_name] = val
                hyperparam_configs.append(config_copy)

    val_df_len = len(val_df)
    config_result_dicts = [{} for _ in range(len(hyperparam_configs))]
    
    train_dfs = []
    fold_dfs = []
    
    for i in range(len(hyperparam_configs)):
        train_dfs.append([])
        fold_dfs.append([])
        for j in range(folds):
            fold_start_idx = int(j / folds * val_df_len)
            fold_end_idx = int((j + 1) / folds * val_df_len)
            fold_dfs[-1].append(val_df.iloc[fold_start_idx : fold_end_idx])
            train_dfs[-1].append(val_df.iloc[lambda row: (0 <= row.index) & (row.index < fold_start_idx) | (fold_end_idx <= row.index) & (row.index < val_df_len)])
    
    
    def worker_process(worker_idx):
        print(f"Started Thread {worker_idx}")
        for i in range(worker_idx, len(hyperparam_configs), num_threads):
            config = hyperparam_configs[i]
            print(f"On config {config}")
            args = Seq2SeqTrainingArguments(output_dir="hyperparam_out", 
                                            evaluation_strategy="epoch", 
                                            num_train_epochs=1,
                                            predict_with_generate=True,
                                            **config)

            avg_rouges = defaultdict(int)

            for fold in tqdm(range(folds)):
                fold_start_idx = int(i / folds * val_df_len)
                fold_end_idx = int((i + 1) / folds * val_df_len)
                fold_df = fold_dfs[i][fold]
                train_df = train_dfs[i][fold]
                
                train_dataset = GrammarDataset(Dataset.from_pandas(train_df), tokenizer)
                eval_dataset = GrammarDataset(Dataset.from_pandas(fold_df), tokenizer)

                gec_model = Seq2SeqTrainer(model=token_model, 
                                args=args,
                                train_dataset=train_dataset,
                                eval_dataset=eval_dataset,
                                tokenizer=tokenizer,
                                data_collator=data_collator)

                gec_model.train()
                for key, val in eval_model(gec_model, eval_dataset, fold_df).items():
                    avg_rouges[key] += val.mid.fmeasure

            for key, val in avg_rouges.items():
                config_result_dicts[i][key] = val / folds
    
    worker_threads = [threading.Thread(target=worker_process, args=(idx,)) for idx in range(num_threads)]
    
    for thread in worker_threads:
        thread.start()
    
    for thread in worker_threads:
        thread.join()

    return config_result_dicts
        

In [5]:
grid = {
    "learning_rate": [1e-5, 1e-4, 1e-3],
    "weight_decay": [1e-3, 0.01, 0.1]
}

result_dicts = grid_search_cv(grid)

Started Thread 0
On config {'learning_rate': 1e-05, 'weight_decay': 0.001}
Started Thread 1
On config {'learning_rate': 1e-05, 'weight_decay': 0.01}
Started Thread 2
On config {'learning_rate': 1e-05, 'weight_decay': 0.1}
Started Thread 3
On config {'learning_rate': 0.0001, 'weight_decay': 0.001}


  0%|                                                                                                             | 0/5 [00:00<?, ?it/s]

  0%|                                                                                                             | 0/5 [00:00<?, ?it/s][A[A


  0%|                                                                                                             | 0/5 [00:00<?, ?it/s][A[A[A


Epoch,Training Loss,Validation Loss
1,No log,0.778953


Epoch,Training Loss,Validation Loss
1,No log,0.778953


Epoch,Training Loss,Validation Loss
1,No log,0.778953


Epoch,Training Loss,Validation Loss
1,No log,0.778953







  0%|                                                                                                             | 0/5 [55:12<?, ?it/s][A[A[A
Exception in thread Thread-7 (worker_process):
Traceback (most recent call last):
  File "/Users/archit/miniconda3/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/Users/archit/miniconda3/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/var/folders/_s/t0dxy3t91bn2pn1_261pdm680000gn/T/ipykernel_5687/1673379990.py", line 72, in worker_process
  File "/var/folders/_s/t0dxy3t91bn2pn1_261pdm680000gn/T/ipykernel_5687/3145317039.py", line 13, in eval_model
  File "/Users/archit/miniconda3/lib/python3.10/site-packages/datasets/metric.py", line 467, in compute
    os.remove(file_path)
FileNotFoundError: [Errno 2] No such file or directory: '/Users/archit/.cache/huggingface/metrics/rouge/default/default_experiment-67239e53-0b2b-41f5-af90-305a98899c5e-1-0.arrow

Epoch,Training Loss,Validation Loss
1,No log,0.709532


Epoch,Training Loss,Validation Loss
1,No log,0.709532


Epoch,Training Loss,Validation Loss
1,No log,0.709529




 40%|██████████████████████████████████████                                                         | 2/5 [1:15:06<1:43:18, 2066.26s/it][A[A


 40%|██████████████████████████████████████                                                         | 2/5 [1:15:06<1:43:19, 2066.46s/it][A[A[A

Epoch,Training Loss,Validation Loss
1,No log,0.438199


 40%|██████████████████████████████████████                                                         | 2/5 [1:15:10<1:43:25, 2068.56s/it]

Epoch,Training Loss,Validation Loss
1,No log,0.425048


Epoch,Training Loss,Validation Loss
1,No log,0.414328




 60%|██████████████████████████████████████████████████████████▏                                      | 3/5 [1:34:02<54:42, 1641.47s/it][A[A

Epoch,Training Loss,Validation Loss





 60%|██████████████████████████████████████████████████████████▏                                      | 3/5 [1:34:05<54:46, 1643.02s/it][A[A[A

Epoch,Training Loss,Validation Loss
1,No log,0.488976


 60%|██████████████████████████████████████████████████████████▏                                      | 3/5 [1:34:10<54:49, 1644.74s/it]

Epoch,Training Loss,Validation Loss
1,No log,0.439894


 60%|█████████████████████████████████████████████████████████                                      | 3/5 [1:51:31<1:14:20, 2230.39s/it]
Exception in thread Thread-8 (worker_process):
Traceback (most recent call last):
  File "/Users/archit/miniconda3/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/Users/archit/miniconda3/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/var/folders/_s/t0dxy3t91bn2pn1_261pdm680000gn/T/ipykernel_5687/1673379990.py", line 71, in worker_process
  File "/Users/archit/miniconda3/lib/python3.10/site-packages/transformers/trainer.py", line 1662, in train
    return inner_training_loop(
  File "/Users/archit/miniconda3/lib/python3.10/site-packages/transformers/trainer.py", line 1929, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
  File "/Users/archit/miniconda3/lib/python3.10/site-packages/transformers/trainer.py", line 2717, in training_step




 80%|█████████████████████████████████████████████████████████████████████████████▌                   | 4/5 [1:53:10<24:06, 1446.37s/it][A[A[A

Epoch,Training Loss,Validation Loss
1,No log,0.422664


 80%|█████████████████████████████████████████████████████████████████████████████▌                   | 4/5 [2:01:18<27:17, 1637.95s/it]

Epoch,Training Loss,Validation Loss
1,No log,0.398721





100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [2:03:27<00:00, 1481.50s/it][A[A[A


On config {'learning_rate': 0.0001, 'weight_decay': 0.1}



  0%|                                                                                                             | 0/5 [00:00<?, ?it/s][A

Epoch,Training Loss,Validation Loss
1,No log,0.493116


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [2:03:32<00:00, 1482.55s/it]


On config {'learning_rate': 0.0001, 'weight_decay': 0.01}


  0%|                                                                                                             | 0/5 [00:00<?, ?it/s]

Epoch,Training Loss,Validation Loss
1,No log,0.435215



 20%|███████████████████▍                                                                             | 1/5 [19:21<1:17:27, 1161.75s/it][A

Epoch,Training Loss,Validation Loss
1,No log,0.391945


 20%|███████████████████▍                                                                             | 1/5 [19:20<1:17:23, 1160.86s/it]

Epoch,Training Loss,Validation Loss
1,No log,0.348271



 40%|██████████████████████████████████████▊                                                          | 2/5 [54:04<1:25:10, 1703.44s/it][A

Epoch,Training Loss,Validation Loss
1,No log,0.250912


Epoch,Training Loss,Validation Loss
1,No log,0.226232



 60%|███████████████████████████████████████████████████████████▍                                       | 3/5 [56:20<37:35, 1127.74s/it][A

Epoch,Training Loss,Validation Loss


 60%|███████████████████████████████████████████████████████████▍                                       | 3/5 [58:44<35:07, 1053.82s/it]

Epoch,Training Loss,Validation Loss
1,No log,


 60%|██████████████████████████████████████████████████████████▏                                      | 3/5 [1:00:10<40:06, 1203.40s/it]
Exception in thread Thread-6 (worker_process):
Traceback (most recent call last):
  File "/Users/archit/miniconda3/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/Users/archit/miniconda3/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/var/folders/_s/t0dxy3t91bn2pn1_261pdm680000gn/T/ipykernel_5687/1673379990.py", line 71, in worker_process
  File "/Users/archit/miniconda3/lib/python3.10/site-packages/transformers/trainer.py", line 1662, in train
    return inner_training_loop(
  File "/Users/archit/miniconda3/lib/python3.10/site-packages/transformers/trainer.py", line 1929, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
  File "/Users/archit/miniconda3/lib/python3.10/site-packages/transformers/trainer.py", line 2717, in training_step

 80%|██████████████████████████████████████████████████████████████████████████████▍                   | 4/5 [1:00:53<11:28, 688.46s/it]

Epoch,Training Loss,Validation Loss
1,No log,


100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [1:19:52<00:00, 958.40s/it]


On config {'learning_rate': 0.001, 'weight_decay': 0.1}


  0%|                                                                                                             | 0/5 [00:00<?, ?it/s]

Epoch,Training Loss,Validation Loss
1,No log,


 20%|███████████████████▍                                                                             | 1/5 [16:53<1:07:33, 1013.37s/it]

Epoch,Training Loss,Validation Loss
1,No log,


 40%|███████████████████████████████████████▌                                                           | 2/5 [34:30<51:56, 1038.87s/it]

Epoch,Training Loss,Validation Loss
1,No log,


 60%|███████████████████████████████████████████████████████████▍                                       | 3/5 [51:52<34:40, 1040.42s/it]

Epoch,Training Loss,Validation Loss
1,No log,


 80%|█████████████████████████████████████████████████████████████████████████████▌                   | 4/5 [1:10:27<17:50, 1070.04s/it]

Epoch,Training Loss,Validation Loss
1,No log,


100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [1:12:17<00:00, 867.51s/it]


In [9]:
import pickle
pickle.dump(result_dicts, open("grid_search_results.pickle", "wb"))

In [10]:
result_dicts = pickle.load(open("grid_search_results.pickle", "rb"))
result_dicts

[{'rouge1': 0.6993681412384894,
  'rouge2': 0.6003006717988871,
  'rougeL': 0.6912311801326643,
  'rougeLsum': 0.6907267407676037},
 {'rouge1': 0.6994653994066248,
  'rouge2': 0.6005671722940525,
  'rougeL': 0.6914039437404257,
  'rougeLsum': 0.6912665059679248},
 {'rouge1': 0.521336141228944,
  'rouge2': 0.4713261813328993,
  'rougeL': 0.519306421213279,
  'rougeLsum': 0.5207922431122379},
 {'rouge1': 0.4937158793668424,
  'rouge2': 0.42398969790402946,
  'rougeL': 0.4970772036913645,
  'rougeLsum': 0.4912636414865781},
 {'rouge1': 0.4340816016477408,
  'rouge2': 0.3831328095112644,
  'rougeL': 0.42989356991229244,
  'rougeLsum': 0.4297700130370391},
 {'rouge1': 0.4718702068373914,
  'rouge2': 0.4168448640175759,
  'rougeL': 0.47032384427064805,
  'rougeLsum': 0.4702965765983274},
 {'rouge1': 0.40836077644433366,
  'rouge2': 0.346551216961293,
  'rougeL': 0.40361057737051953,
  'rougeLsum': 0.40285153394211354},
 {'rouge1': 0.36526506245354445,
  'rouge2': 0.3121983939282684,
  'rouge

In [16]:
optimal_scores = {
    "rouge1": float("-inf"),
    "rouge2": float("-inf"),
    "rougeL": float("-inf"),
    "rougeLsum": float("-inf")
}

optimal_score_idx = {
    "rouge1": float("-inf"),
    "rouge2": float("-inf"),
    "rougeL": float("-inf"),
    "rougeLsum": float("-inf")
}

for i, score_dict in enumerate(result_dicts):
    for metric, score in score_dict.items():
        if score > optimal_scores[metric]:
            optimal_scores[metric] = score
            optimal_score_idx[metric] = i

print(optimal_score_idx)

{'rouge1': 1, 'rouge2': 1, 'rougeL': 1, 'rougeLsum': 1}


In [18]:
import copy

grid = {
    "learning_rate": [1e-5, 1e-4, 1e-3],
    "weight_decay": [1e-3, 0.01, 0.1]
}


hyperparam_configs = [dict()]
for param_name, param_vals in grid.items():
    config_len = len(hyperparam_configs)
    for i in range(config_len):
        old_config = hyperparam_configs.pop(0)
        for val in param_vals:
            config_copy = copy.deepcopy(old_config)
            config_copy[param_name] = val
            hyperparam_configs.append(config_copy)

print(f"Opimal Config: {hyperparam_configs[1]}")

Opimal Config: {'learning_rate': 1e-05, 'weight_decay': 0.01}
