In [1]:
from tqdm.auto import tqdm
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.optim import AdamW
from torch.utils.data import Dataset
import torch
import os
import json
import re
from tqdm import tqdm
tqdm.pandas()
from transformers import Trainer, TrainingArguments
import numpy as np
import evaluate

from sklearn.model_selection import train_test_split
import ast

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
!hf auth login

In [2]:
from transformers import AlbertTokenizer

tokenizer = AlbertTokenizer.from_pretrained("ai4bharat/indic-bert")


In [3]:
def model_init():
    return AutoModelForSequenceClassification.from_pretrained("ai4bharat/indic-bert", num_labels=2)

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [5]:
train_df = pd.read_csv("train_all_ranked.csv")
test_df = pd.read_csv("dev_all_ranked.csv")
#train_df = train_df.head(500)
#test_df = test_df.head(500)
hp_train_df = train_df.sample(frac = 0.1, random_state=42).reset_index()
hp_test_df = test_df.sample(frac = 0.1, random_state=42).reset_index()

In [None]:
train_df.head(1)#['text']
#test_df = test_df.head(500)

In [6]:
class LegalDataset(Dataset):
    def __init__(self, df, tokenizer):
        self.df = df.reset_index(drop=True)
        self.df["text"] = self.df["ranked-sentences"].progress_apply(lambda x:" ".join(eval(x)[:10]))
        #self.df["label"] = self.df["decision"].progress_apply(lambda x:1 if x=="granted" else 0)
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        model_input = self.df['text'][idx]
        encoded_sent = self.tokenizer.encode_plus(
            text=model_input,
            add_special_tokens=True,
            max_length=512,
            padding='max_length',
            return_attention_mask=True,
            truncation=True
            )

        input_ids = encoded_sent.get('input_ids')
        attention_mask = encoded_sent.get('attention_mask')
        input_ids = torch.tensor(input_ids)
        attention_mask = torch.tensor(attention_mask)

        label = torch.tensor(self.df['label'][idx])

        return {'input_ids': input_ids, 'attention_mask': attention_mask, 'label': label}

In [7]:
train_dataset = LegalDataset(train_df, tokenizer)
test_dataset = LegalDataset(test_df, tokenizer)
hp_train_dataset = LegalDataset(hp_train_df, tokenizer)
hp_test_dataset = LegalDataset(hp_test_df, tokenizer)

100%|██████████| 123742/123742 [00:04<00:00, 30605.57it/s]
100%|██████████| 17707/17707 [00:00<00:00, 32533.07it/s]
100%|██████████| 12374/12374 [00:00<00:00, 37680.77it/s]
100%|██████████| 1771/1771 [00:00<00:00, 34966.85it/s]


In [8]:
metric1 = evaluate.load("accuracy")
metric2 = evaluate.load("f1")

In [17]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = metric1.compute(predictions=predictions, references=labels)
    f1 = metric2.compute(predictions=predictions, references=labels, average="macro")
    return {'accuracy': accuracy["accuracy"], 'f1-score': f1["f1"]}

In [10]:
def my_hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
        "weight_decay":trial.suggest_float("weight_decay", 0.005, 0.05),
        "adam_beta1":trial.suggest_float("adam_beta1", 0.75, 0.95),
        "adam_beta2":trial.suggest_float("adam_beta2", 0.99, 0.9999),
        "adam_epsilon":trial.suggest_float("adam_epsilon", 1e-9, 1e-7, log=True)
    }

In [11]:
training_args = TrainingArguments(
    output_dir='htf3_results',          # output directory
    num_train_epochs=5,            # total number of training epochs
    per_device_train_batch_size=8,  # batch size per device during training
    per_device_eval_batch_size=8,   # batch size for evaluation
    warmup_steps=500,               # number of warmup steps for learning rate scheduler
    weight_decay=0.01,              # strength of weight decay
    logging_dir='htf3_logs',           # directory for storing logs
    eval_strategy="epoch",
    logging_steps=250,
    save_strategy='epoch',
    save_total_limit = 1,
    learning_rate = 0.00001,
    load_best_model_at_end=True,
    metric_for_best_model ="eval_f1-score",
)

In [12]:
trainer = Trainer(
    model_init=model_init,                        # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=hp_train_dataset,         # training dataset
    eval_dataset=hp_test_dataset,           # evaluation dataset
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

  trainer = Trainer(
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
best_run = trainer.hyperparameter_search(n_trials=10,direction="maximize",hp_space=my_hp_space)

[I 2025-12-16 23:30:35,336] A new study created in memory with name: no-name-52bddbda-5b7e-4897-849e-97c591b6eb61
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,0.6388,0.607652,0.659514,0.659514
2,0.5809,0.620935,0.652739,0.652739
3,0.522,0.54412,0.727837,0.727837
4,0.5053,0.52578,0.739695,0.739695
5,0.486,0.515317,0.762281,0.762281


[I 2025-12-17 00:40:17,419] Trial 0 finished with value: 1.5245623941276114 and parameters: {'learning_rate': 1.073446762791212e-05, 'weight_decay': 0.013795256179277411, 'adam_beta1': 0.7738309599946543, 'adam_beta2': 0.9975279662623506, 'adam_epsilon': 8.982944128984163e-09}. Best is trial 0 with value: 1.5245623941276114.
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,0.6399,0.611689,0.629588,0.629588
2,0.6186,0.605548,0.685488,0.685488
3,0.5872,0.599063,0.696217,0.696217
4,0.5938,0.587872,0.704122,0.704122
5,0.5636,0.583,0.702993,0.702993


[I 2025-12-17 01:49:56,348] Trial 1 finished with value: 1.4059853190287972 and parameters: {'learning_rate': 1.4301355697080908e-06, 'weight_decay': 0.010381038116824762, 'adam_beta1': 0.8680002602350502, 'adam_beta2': 0.9972046863141097, 'adam_epsilon': 2.851091486451478e-09}. Best is trial 0 with value: 1.5245623941276114.
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,0.6714,0.66081,0.627894,0.627894
2,0.674,0.65935,0.627894,0.627894
3,0.6645,0.660049,0.627894,0.627894
4,0.6671,0.660079,0.627894,0.627894
5,0.6615,0.660627,0.627894,0.627894


[I 2025-12-17 02:58:42,655] Trial 2 finished with value: 1.2557876905702994 and parameters: {'learning_rate': 4.8487858527449394e-05, 'weight_decay': 0.024920666818435015, 'adam_beta1': 0.8852178466691856, 'adam_beta2': 0.9983194186538753, 'adam_epsilon': 1.1118485615749815e-09}. Best is trial 0 with value: 1.5245623941276114.
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,0.6171,0.599269,0.661773,0.661773
2,0.5518,0.546224,0.728402,0.728402
3,0.5034,0.502397,0.770751,0.770751
4,0.4711,0.511952,0.769622,0.769622
5,0.4466,0.51684,0.776398,0.776398


[I 2025-12-17 04:08:07,410] Trial 3 finished with value: 1.5527950310559007 and parameters: {'learning_rate': 1.0375657886671487e-05, 'weight_decay': 0.04627390717704294, 'adam_beta1': 0.8107180784008142, 'adam_beta2': 0.9988716037246189, 'adam_epsilon': 1.253765545756664e-08}. Best is trial 3 with value: 1.5527950310559007.
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,0.6757,0.660067,0.627894,0.627894
2,0.6754,0.660345,0.627894,0.627894
3,0.6668,0.660274,0.627894,0.627894
4,0.665,0.660082,0.627894,0.627894
5,0.6613,0.660066,0.627894,0.627894


[I 2025-12-17 05:16:44,361] Trial 4 finished with value: 1.2557876905702994 and parameters: {'learning_rate': 7.493435578036118e-05, 'weight_decay': 0.03032717254551403, 'adam_beta1': 0.8479593508676107, 'adam_beta2': 0.9991721937186858, 'adam_epsilon': 2.0711984126169957e-08}. Best is trial 3 with value: 1.5527950310559007.
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,0.6625,0.65945,0.628458,0.628458


[I 2025-12-17 05:30:36,253] Trial 5 pruned. 
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,0.637,0.612885,0.627894,0.627894


[I 2025-12-17 05:44:29,544] Trial 6 pruned. 
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,0.6704,0.664296,0.627894,0.627894


[I 2025-12-17 05:58:22,184] Trial 7 pruned. 
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,0.6716,0.661912,0.627894,0.627894


[I 2025-12-17 06:12:08,243] Trial 8 pruned. 
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,0.6762,0.659881,0.627894,0.627894


[I 2025-12-17 06:25:55,846] Trial 9 pruned. 


In [14]:
print("Best HyperParameters")

Best HyperParameters


In [15]:
print(best_run)

BestRun(run_id='3', objective=1.5527950310559007, hyperparameters={'learning_rate': 1.0375657886671487e-05, 'weight_decay': 0.04627390717704294, 'adam_beta1': 0.8107180784008142, 'adam_beta2': 0.9988716037246189, 'adam_epsilon': 1.253765545756664e-08}, run_summary=None)


In [18]:
del trainer
del training_args
import gc
gc.collect()

79

In [19]:
print("Starting Training...")

Starting Training...


In [21]:
training_args = TrainingArguments(
    output_dir='tf3_results',          # output directory
    num_train_epochs=15,            # total number of training epochs
    per_device_train_batch_size=8,  # batch size per device during training
    per_device_eval_batch_size=8,   # batch size for evaluation
    warmup_steps=500,               # number of warmup steps for learning rate scheduler
    weight_decay=0.01,              # strength of weight decay
    logging_dir='tf3_logs',           # directory for storing logs
    eval_strategy="epoch",
    logging_steps=250,
    save_strategy='epoch',
    save_total_limit = 1,
    learning_rate = 0.00001,
    load_best_model_at_end=True,
    metric_for_best_model ="eval_f1-score",
)

In [24]:
trainer = Trainer(
    model_init=model_init,                        # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=test_dataset,           # evaluation dataset
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

  trainer = Trainer(
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [23]:
for n, v in best_run.hyperparameters.items():
    setattr(trainer.args, n, v)
print(trainer.args)
trainer.train()

TrainingArguments(
_n_gpu=1,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
adafactor=False,
adam_beta1=0.8107180784008142,
adam_beta2=0.9988716037246189,
adam_epsilon=1.253765545756664e-08,
auto_find_batch_size=False,
average_tokens_across_devices=True,
batch_eval_metrics=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_on_start=False,
eval_steps=None,
eval_strategy=epoch,
ev

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

In [None]:
trainer.save_model("/home2/username/legal-tech/tfidf_sum+indic-ad")