In [1]:
from tqdm.auto import tqdm
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW
from torch.utils.data import Dataset
import torch
import os
import json
import re
from tqdm import tqdm
tqdm.pandas()
from transformers import Trainer, TrainingArguments
import numpy as np
from datasets import load_metric
from sklearn.model_selection import train_test_split
import ast

In [2]:
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert")

In [3]:
def model_init():
    return AutoModelForSequenceClassification.from_pretrained("ai4bharat/indic-bert", num_labels=2)

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [5]:
train_df = pd.read_csv("/scratch/username/textrank_summaries/train_split_44_districts.csv")
test_df = pd.read_csv("/scratch/username/textrank_summaries/validation_split_10_districts.csv")
#train_df = train_df.head(500)
#test_df = test_df.head(500)
hp_train_df = train_df.sample(frac = 0.1, random_state=42).reset_index()
hp_test_df = test_df.sample(frac = 0.1, random_state=42).reset_index()

In [6]:
class LegalDataset(Dataset):
    def __init__(self, df, tokenizer):
        self.df = df.reset_index(drop=True)
        self.df["text"] = self.df["ranked-sentences"].progress_apply(lambda x:" ".join([i[1] for i in eval(x)[:10]]))
        self.df["label"] = self.df["decision"].progress_apply(lambda x:1 if x=="granted" else 0)
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        model_input = self.df['text'][idx]            
        encoded_sent = self.tokenizer.encode_plus(
            text=model_input, 
            add_special_tokens=True,       
            max_length=512,                  
            padding='max_length',          
            return_attention_mask=True, 
            truncation=True
            )
        
        input_ids = encoded_sent.get('input_ids')
        attention_mask = encoded_sent.get('attention_mask')
        input_ids = torch.tensor(input_ids)
        attention_mask = torch.tensor(attention_mask)        

        label = torch.tensor(self.df['label'][idx])
        
        return {'input_ids': input_ids, 'attention_mask': attention_mask, 'label': label}

In [7]:
train_dataset = LegalDataset(train_df, tokenizer)
test_dataset = LegalDataset(test_df, tokenizer)
hp_train_dataset = LegalDataset(hp_train_df, tokenizer)
hp_test_dataset = LegalDataset(hp_test_df, tokenizer)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 14310.44it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 326049.75it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 13401.10it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 538836.59it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 13640.90it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████

In [8]:
metric1 = load_metric("accuracy")
metric2 = load_metric("f1")

In [9]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = metric1.compute(predictions=predictions, references=labels)
    f1 = metric2.compute(predictions=predictions, references=labels, average="micro")
    return {'accuracy': accuracy["accuracy"], 'f1-score': f1["f1"]}

In [10]:
def my_hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
        "weight_decay":trial.suggest_float("weight_decay", 0.005, 0.05),
        "adam_beta1":trial.suggest_float("adam_beta1", 0.75, 0.95),
        "adam_beta2":trial.suggest_float("adam_beta2", 0.99, 0.9999),
        "adam_epsilon":trial.suggest_float("adam_epsilon", 1e-9, 1e-7, log=True)
    }

In [11]:
training_args = TrainingArguments(
    output_dir='/scratch/username/htr1_results',          # output directory
    num_train_epochs=5,            # total number of training epochs
    per_device_train_batch_size=8,  # batch size per device during training
    per_device_eval_batch_size=8,   # batch size for evaluation
    warmup_steps=500,               # number of warmup steps for learning rate scheduler
    weight_decay=0.01,              # strength of weight decay
    logging_dir='/scratch/username/htr1_logs',           # directory for storing logs
    evaluation_strategy="epoch",
    logging_steps=250,
    save_strategy='epoch',
    save_total_limit = 1,
    learning_rate = 0.00001,
    load_best_model_at_end=True,
    metric_for_best_model ="eval_f1-score",
)

In [12]:
trainer = Trainer(
    model_init=model_init,                        # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=hp_train_dataset,         # training dataset
    eval_dataset=hp_test_dataset,           # evaluation dataset
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

loading configuration file https://huggingface.co/ai4bharat/indic-bert/resolve/main/config.json from cache at /home2/arjunth2001/.cache/huggingface/transformers/2d290a1a22a5f80e173def8b2f31f12d68a957542e6769ab06bfc3de06bc49f4.06ba3893e888d6ff1388c45cdbee1fb785542ae22b70ff159f55da323230a159
Model config AlbertConfig {
  "attention_probs_dropout_prob": 0,
  "bos_token_id": 2,
  "classifier_dropout_prob": 0.1,
  "down_scale_factor": 1,
  "embedding_size": 128,
  "eos_token_id": 3,
  "gap_size": 0,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "inner_group_num": 1,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "albert",
  "net_structure_type": 0,
  "num_attention_heads": 12,
  "num_hidden_groups": 1,
  "num_hidden_layers": 12,
  "num_memory_blocks": 0,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.11.3",
  "type_vocab_size": 2,


In [13]:
best_run = trainer.hyperparameter_search(n_trials=10,direction="maximize",hp_space=my_hp_space)

[32m[I 2021-10-25 21:50:47,045][0m A new study created in memory with name: no-name-31f17326-7d63-4447-8a3f-2e7394e19e00[0m
Trial:
loading configuration file https://huggingface.co/ai4bharat/indic-bert/resolve/main/config.json from cache at /home2/arjunth2001/.cache/huggingface/transformers/2d290a1a22a5f80e173def8b2f31f12d68a957542e6769ab06bfc3de06bc49f4.06ba3893e888d6ff1388c45cdbee1fb785542ae22b70ff159f55da323230a159
Model config AlbertConfig {
  "attention_probs_dropout_prob": 0,
  "bos_token_id": 2,
  "classifier_dropout_prob": 0.1,
  "down_scale_factor": 1,
  "embedding_size": 128,
  "eos_token_id": 3,
  "gap_size": 0,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "inner_group_num": 1,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "albert",
  "net_structure_type": 0,
  "num_attention_heads": 12,
  "num_hidden_groups": 1,
  "num_hidden_layers": 12,
  "num_memory_

Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,No log,0.694485,0.3,0.3
2,No log,0.694353,0.3,0.3
3,No log,0.694094,0.32,0.32
4,No log,0.69371,0.38,0.38
5,No log,0.693323,0.5,0.5


***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-0/checkpoint-7
Configuration saved in /scratch/arjunth2001/htf2_results/run-0/checkpoint-7/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-0/checkpoint-7/pytorch_model.bin
tokenizer config file saved in /scratch/arjunth2001/htf2_results/run-0/checkpoint-7/tokenizer_config.json
Special tokens file saved in /scratch/arjunth2001/htf2_results/run-0/checkpoint-7/special_tokens_map.json
Deleting older checkpoint [/scratch/arjunth2001/htf2_results/run-0/checkpoint-65] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-0/checkpoint-14
Configuration saved in /scratch/arjunth2001/htf2_results/run-0/checkpoint-14/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-0/checkpoint-14/pytorch_model.bin
tokenizer conf

Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,No log,0.694271,0.32,0.32
2,No log,0.693534,0.38,0.38
3,No log,0.692208,0.62,0.62
4,No log,0.690582,0.72,0.72
5,No log,0.688655,0.72,0.72


***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-1/checkpoint-7
Configuration saved in /scratch/arjunth2001/htf2_results/run-1/checkpoint-7/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-1/checkpoint-7/pytorch_model.bin
tokenizer config file saved in /scratch/arjunth2001/htf2_results/run-1/checkpoint-7/tokenizer_config.json
Special tokens file saved in /scratch/arjunth2001/htf2_results/run-1/checkpoint-7/special_tokens_map.json
Deleting older checkpoint [/scratch/arjunth2001/htf2_results/run-1/checkpoint-65] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-1/checkpoint-14
Configuration saved in /scratch/arjunth2001/htf2_results/run-1/checkpoint-14/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-1/checkpoint-14/pytorch_model.bin
tokenizer conf

Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,No log,0.694482,0.3,0.3
2,No log,0.69436,0.3,0.3
3,No log,0.694128,0.32,0.32
4,No log,0.693793,0.34,0.34
5,No log,0.693421,0.5,0.5


***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-2/checkpoint-7
Configuration saved in /scratch/arjunth2001/htf2_results/run-2/checkpoint-7/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-2/checkpoint-7/pytorch_model.bin
tokenizer config file saved in /scratch/arjunth2001/htf2_results/run-2/checkpoint-7/tokenizer_config.json
Special tokens file saved in /scratch/arjunth2001/htf2_results/run-2/checkpoint-7/special_tokens_map.json
Deleting older checkpoint [/scratch/arjunth2001/htf2_results/run-2/checkpoint-39] due to args.save_total_limit
Deleting older checkpoint [/scratch/arjunth2001/htf2_results/run-2/checkpoint-65] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-2/checkpoint-14
Configuration saved in /scratch/arjunth2001/htf2_results/run-2/checkpoint-14/config.jso

Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,No log,0.694395,0.3,0.3
2,No log,0.693969,0.3,0.3
3,No log,0.693298,0.5,0.5
4,No log,0.692166,0.62,0.62
5,No log,0.691179,0.72,0.72


***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-3/checkpoint-7
Configuration saved in /scratch/arjunth2001/htf2_results/run-3/checkpoint-7/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-3/checkpoint-7/pytorch_model.bin
tokenizer config file saved in /scratch/arjunth2001/htf2_results/run-3/checkpoint-7/tokenizer_config.json
Special tokens file saved in /scratch/arjunth2001/htf2_results/run-3/checkpoint-7/special_tokens_map.json
Deleting older checkpoint [/scratch/arjunth2001/htf2_results/run-3/checkpoint-65] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-3/checkpoint-14
Configuration saved in /scratch/arjunth2001/htf2_results/run-3/checkpoint-14/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-3/checkpoint-14/pytorch_model.bin
tokenizer conf

Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,No log,0.693971,0.32,0.32
2,No log,0.69254,0.6,0.6
3,No log,0.690976,0.72,0.72
4,No log,0.687419,0.72,0.72
5,No log,0.684166,0.72,0.72


***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-4/checkpoint-7
Configuration saved in /scratch/arjunth2001/htf2_results/run-4/checkpoint-7/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-4/checkpoint-7/pytorch_model.bin
tokenizer config file saved in /scratch/arjunth2001/htf2_results/run-4/checkpoint-7/tokenizer_config.json
Special tokens file saved in /scratch/arjunth2001/htf2_results/run-4/checkpoint-7/special_tokens_map.json
Deleting older checkpoint [/scratch/arjunth2001/htf2_results/run-4/checkpoint-65] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-4/checkpoint-14
Configuration saved in /scratch/arjunth2001/htf2_results/run-4/checkpoint-14/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-4/checkpoint-14/pytorch_model.bin
tokenizer conf

Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,No log,0.693912,0.32,0.32
2,No log,0.692334,0.6,0.6
3,No log,0.690517,0.72,0.72
4,No log,0.686452,0.72,0.72
5,No log,0.682204,0.72,0.72


***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-5/checkpoint-7
Configuration saved in /scratch/arjunth2001/htf2_results/run-5/checkpoint-7/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-5/checkpoint-7/pytorch_model.bin
tokenizer config file saved in /scratch/arjunth2001/htf2_results/run-5/checkpoint-7/tokenizer_config.json
Special tokens file saved in /scratch/arjunth2001/htf2_results/run-5/checkpoint-7/special_tokens_map.json
Deleting older checkpoint [/scratch/arjunth2001/htf2_results/run-5/checkpoint-65] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-5/checkpoint-14
Configuration saved in /scratch/arjunth2001/htf2_results/run-5/checkpoint-14/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-5/checkpoint-14/pytorch_model.bin
tokenizer conf

Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,No log,0.694173,0.34,0.34
2,No log,0.693069,0.48,0.48
3,No log,0.691211,0.72,0.72
4,No log,0.688838,0.72,0.72
5,No log,0.686189,0.72,0.72


***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-6/checkpoint-7
Configuration saved in /scratch/arjunth2001/htf2_results/run-6/checkpoint-7/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-6/checkpoint-7/pytorch_model.bin
tokenizer config file saved in /scratch/arjunth2001/htf2_results/run-6/checkpoint-7/tokenizer_config.json
Special tokens file saved in /scratch/arjunth2001/htf2_results/run-6/checkpoint-7/special_tokens_map.json
Deleting older checkpoint [/scratch/arjunth2001/htf2_results/run-6/checkpoint-26] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-6/checkpoint-14
Configuration saved in /scratch/arjunth2001/htf2_results/run-6/checkpoint-14/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-6/checkpoint-14/pytorch_model.bin
tokenizer conf

Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,No log,0.694151,0.32,0.32
2,No log,0.693051,0.48,0.48
3,No log,0.691199,0.72,0.72
4,No log,0.688771,0.72,0.72
5,No log,0.686056,0.72,0.72


***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-7/checkpoint-7
Configuration saved in /scratch/arjunth2001/htf2_results/run-7/checkpoint-7/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-7/checkpoint-7/pytorch_model.bin
tokenizer config file saved in /scratch/arjunth2001/htf2_results/run-7/checkpoint-7/tokenizer_config.json
Special tokens file saved in /scratch/arjunth2001/htf2_results/run-7/checkpoint-7/special_tokens_map.json
Deleting older checkpoint [/scratch/arjunth2001/htf2_results/run-7/checkpoint-13] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-7/checkpoint-14
Configuration saved in /scratch/arjunth2001/htf2_results/run-7/checkpoint-14/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-7/checkpoint-14/pytorch_model.bin
tokenizer conf

Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,No log,0.694162,0.34,0.34
2,No log,0.693084,0.48,0.48
3,No log,0.691242,0.72,0.72
4,No log,0.688903,0.72,0.72
5,No log,0.686309,0.72,0.72


***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-8/checkpoint-7
Configuration saved in /scratch/arjunth2001/htf2_results/run-8/checkpoint-7/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-8/checkpoint-7/pytorch_model.bin
tokenizer config file saved in /scratch/arjunth2001/htf2_results/run-8/checkpoint-7/tokenizer_config.json
Special tokens file saved in /scratch/arjunth2001/htf2_results/run-8/checkpoint-7/special_tokens_map.json
Deleting older checkpoint [/scratch/arjunth2001/htf2_results/run-8/checkpoint-13] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-8/checkpoint-14
Configuration saved in /scratch/arjunth2001/htf2_results/run-8/checkpoint-14/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-8/checkpoint-14/pytorch_model.bin
tokenizer conf

Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,No log,0.694152,0.34,0.34
2,No log,0.693174,0.5,0.5
3,No log,0.69146,0.7,0.7


***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-9/checkpoint-7
Configuration saved in /scratch/arjunth2001/htf2_results/run-9/checkpoint-7/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-9/checkpoint-7/pytorch_model.bin
tokenizer config file saved in /scratch/arjunth2001/htf2_results/run-9/checkpoint-7/tokenizer_config.json
Special tokens file saved in /scratch/arjunth2001/htf2_results/run-9/checkpoint-7/special_tokens_map.json
Deleting older checkpoint [/scratch/arjunth2001/htf2_results/run-9/checkpoint-26] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 50
  Batch size = 8
Saving model checkpoint to /scratch/arjunth2001/htf2_results/run-9/checkpoint-14
Configuration saved in /scratch/arjunth2001/htf2_results/run-9/checkpoint-14/config.json
Model weights saved in /scratch/arjunth2001/htf2_results/run-9/checkpoint-14/pytorch_model.bin
tokenizer conf

In [14]:
print("Best HyperParameters")

Best HyperParameters


In [15]:
print(best_run)

BestRun(run_id='1', objective=1.44, hyperparameters={'learning_rate': 2.8897392720600686e-05, 'weight_decay': 0.026866483483660025, 'adam_beta1': 0.8331169535494759, 'adam_beta2': 0.9902932231531537, 'adam_epsilon': 2.681133488124786e-09})


In [16]:
del trainer
del training_args
import gc
gc.collect()

187

In [17]:
print("Starting Training...")

Starting Training...


In [18]:
training_args = TrainingArguments(
    output_dir='/scratch/username/tr1_results',          # output directory
    num_train_epochs=15,            # total number of training epochs
    per_device_train_batch_size=8,  # batch size per device during training
    per_device_eval_batch_size=8,   # batch size for evaluation
    warmup_steps=500,               # number of warmup steps for learning rate scheduler
    weight_decay=0.01,              # strength of weight decay
    logging_dir='/scratch/username/tr1_logs',           # directory for storing logs
    evaluation_strategy="epoch",
    logging_steps=250,
    save_strategy='epoch',
    save_total_limit = 1,
    learning_rate = 0.00001,
    load_best_model_at_end=True,
    metric_for_best_model ="eval_f1-score",
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [19]:
trainer = Trainer(
    model_init=model_init,                        # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=test_dataset,           # evaluation dataset
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

loading configuration file https://huggingface.co/ai4bharat/indic-bert/resolve/main/config.json from cache at /home2/arjunth2001/.cache/huggingface/transformers/2d290a1a22a5f80e173def8b2f31f12d68a957542e6769ab06bfc3de06bc49f4.06ba3893e888d6ff1388c45cdbee1fb785542ae22b70ff159f55da323230a159
Model config AlbertConfig {
  "attention_probs_dropout_prob": 0,
  "bos_token_id": 2,
  "classifier_dropout_prob": 0.1,
  "down_scale_factor": 1,
  "embedding_size": 128,
  "eos_token_id": 3,
  "gap_size": 0,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "inner_group_num": 1,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "albert",
  "net_structure_type": 0,
  "num_attention_heads": 12,
  "num_hidden_groups": 1,
  "num_hidden_layers": 12,
  "num_memory_blocks": 0,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.11.3",
  "type_vocab_size": 2,


In [20]:
for n, v in best_run.hyperparameters.items():
    setattr(trainer.args, n, v)
print(trainer.args)
trainer.train()

TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.8331169535494759,
adam_beta2=0.9902932231531537,
adam_epsilon=2.681133488124786e-09,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_find_unused_parameters=None,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_steps=None,
evaluation_strategy=IntervalStrategy.EPOCH,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=True,
group_by_length=False,
hub_model_id=None,
hub_strategy=HubStrategy.EVERY_SAVE,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
label_names=None,
label_smoothing_factor=0.0,
learning_rate=2.8897392720600686e-05,
length_column_name=length,
load_best_model_at_end=True,
local_rank=-1,
log_level=-1,
log_level_replica=-1,
log_on_each_node=True,
logging_dir=/scratch/arjunth2001/tf2_logs,
logging_first_st

loading configuration file https://huggingface.co/ai4bharat/indic-bert/resolve/main/config.json from cache at /home2/arjunth2001/.cache/huggingface/transformers/2d290a1a22a5f80e173def8b2f31f12d68a957542e6769ab06bfc3de06bc49f4.06ba3893e888d6ff1388c45cdbee1fb785542ae22b70ff159f55da323230a159
Model config AlbertConfig {
  "attention_probs_dropout_prob": 0,
  "bos_token_id": 2,
  "classifier_dropout_prob": 0.1,
  "down_scale_factor": 1,
  "embedding_size": 128,
  "eos_token_id": 3,
  "gap_size": 0,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "inner_group_num": 1,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "albert",
  "net_structure_type": 0,
  "num_attention_heads": 12,
  "num_hidden_groups": 1,
  "num_hidden_layers": 12,
  "num_memory_blocks": 0,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.11.3",
  "type_vocab_size": 2,


Epoch,Training Loss,Validation Loss


***** Running Evaluation *****
  Num examples = 500
  Batch size = 8


KeyboardInterrupt: 

In [None]:
trainer.save_model("/home2/username/legal-tech/textrank_sum+indic-dw")