In [1]:
from tqdm.auto import tqdm
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.optim import AdamW
from torch.utils.data import Dataset
import torch
import os
import json
import re
from tqdm import tqdm
tqdm.pandas()
from transformers import Trainer, TrainingArguments
import numpy as np
import evaluate

from sklearn.model_selection import train_test_split
import ast

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
!hf auth login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): Traceback (most recent call last):
  File "/home/amlan/legal/joshi/bail/.venv/bin/hf", line 10, in <module>
    sys.exit(main())
  File "/home/amlan/legal/joshi/bail/.venv/lib/python3.10/site-packages/huggingface_hub/cli/hf.py", line 59, in main
    service.run()
  File "/ho

In [2]:
from transformers import AlbertTokenizer

tokenizer = AlbertTokenizer.from_pretrained("ai4bharat/indic-bert")


In [3]:
def model_init():
    return AutoModelForSequenceClassification.from_pretrained("ai4bharat/indic-bert", num_labels=2)

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [26]:
train_df = pd.read_csv("train_all_ranked.csv")
test_df = pd.read_csv("dev_all_ranked.csv")
#train_df = train_df.head(500)
#test_df = test_df.head(500)
hp_train_df = train_df.sample(frac = 0.1, random_state=42).reset_index()
hp_test_df = test_df.sample(frac = 0.1, random_state=42).reset_index()

In [25]:
train_df.head(1)['text']
#test_df = test_df.head(500)

0    {'facts-and-arguments': ['अग्रिम जमानत प्रार्थ...
Name: text, dtype: object

In [6]:
class LegalDataset(Dataset):
    def __init__(self, df, tokenizer):
        self.df = df.reset_index(drop=True)
        self.df["text"] = self.df["ranked-sentences"].progress_apply(lambda x:" ".join(eval(x)[:10]))
        #self.df["label"] = self.df["decision"].progress_apply(lambda x:1 if x=="granted" else 0)
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        model_input = self.df['text'][idx]
        encoded_sent = self.tokenizer.encode_plus(
            text=model_input,
            add_special_tokens=True,
            max_length=512,
            padding='max_length',
            return_attention_mask=True,
            truncation=True
            )

        input_ids = encoded_sent.get('input_ids')
        attention_mask = encoded_sent.get('attention_mask')
        input_ids = torch.tensor(input_ids)
        attention_mask = torch.tensor(attention_mask)

        label = torch.tensor(self.df['label'][idx])

        return {'input_ids': input_ids, 'attention_mask': attention_mask, 'label': label}

In [7]:
train_dataset = LegalDataset(train_df, tokenizer)
test_dataset = LegalDataset(test_df, tokenizer)
hp_train_dataset = LegalDataset(hp_train_df, tokenizer)
hp_test_dataset = LegalDataset(hp_test_df, tokenizer)

  0%|          | 0/123742 [00:00<?, ?it/s]

100%|██████████| 123742/123742 [00:09<00:00, 12660.57it/s]
100%|██████████| 17707/17707 [00:00<00:00, 31838.64it/s]
100%|██████████| 12374/12374 [00:00<00:00, 36785.65it/s]
100%|██████████| 1771/1771 [00:00<00:00, 33772.74it/s]


In [8]:
metric1 = evaluate.load("accuracy")
metric2 = evaluate.load("f1")

Downloading builder script: 4.20kB [00:00, 3.62MB/s]
Downloading builder script: 6.79kB [00:00, 4.45MB/s]


In [9]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = metric1.compute(predictions=predictions, references=labels)
    f1 = metric2.compute(predictions=predictions, references=labels, average="micro")
    return {'accuracy': accuracy["accuracy"], 'f1-score': f1["f1"]}

In [10]:
def my_hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
        "weight_decay":trial.suggest_float("weight_decay", 0.005, 0.05),
        "adam_beta1":trial.suggest_float("adam_beta1", 0.75, 0.95),
        "adam_beta2":trial.suggest_float("adam_beta2", 0.99, 0.9999),
        "adam_epsilon":trial.suggest_float("adam_epsilon", 1e-9, 1e-7, log=True)
    }

In [11]:
training_args = TrainingArguments(
    output_dir='htf2_results',          # output directory
    num_train_epochs=5,            # total number of training epochs
    per_device_train_batch_size=8,  # batch size per device during training
    per_device_eval_batch_size=8,   # batch size for evaluation
    warmup_steps=500,               # number of warmup steps for learning rate scheduler
    weight_decay=0.01,              # strength of weight decay
    logging_dir='htf2_logs',           # directory for storing logs
    eval_strategy="epoch",
    logging_steps=250,
    save_strategy='epoch',
    save_total_limit = 1,
    learning_rate = 0.00001,
    load_best_model_at_end=True,
    metric_for_best_model ="eval_f1-score",
)

In [12]:
trainer = Trainer(
    model_init=model_init,                        # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=hp_train_dataset,         # training dataset
    eval_dataset=hp_test_dataset,           # evaluation dataset
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

  trainer = Trainer(
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
best_run = trainer.hyperparameter_search(n_trials=10,direction="maximize",hp_space=my_hp_space)

[I 2025-12-15 12:12:52,738] A new study created in memory with name: no-name-12b64b59-26db-492a-af9e-3fb6c1e68041
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,0.6759,0.660075,0.627894,0.627894
2,0.6754,0.659552,0.627894,0.627894
3,0.5932,0.588971,0.696217,0.696217
4,0.5451,0.528042,0.748165,0.748165
5,0.4981,0.516058,0.757199,0.757199


[I 2025-12-15 13:24:51,752] Trial 0 finished with value: 1.5143986448334275 and parameters: {'learning_rate': 4.643981250325776e-05, 'weight_decay': 0.03809725020772307, 'adam_beta1': 0.8178722055669438, 'adam_beta2': 0.9970260800287583, 'adam_epsilon': 7.458155533648117e-09}. Best is trial 0 with value: 1.5143986448334275.
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,0.6309,0.629781,0.645963,0.645963
2,0.6031,0.58994,0.669113,0.669113
3,0.55,0.560965,0.712592,0.712592
4,0.541,0.557136,0.710898,0.710898
5,0.5233,0.559705,0.715415,0.715415


[I 2025-12-15 14:47:35,658] Trial 1 finished with value: 1.4308300395256917 and parameters: {'learning_rate': 3.366363048523265e-06, 'weight_decay': 0.013682001577772673, 'adam_beta1': 0.7667682000952083, 'adam_beta2': 0.9947310374831695, 'adam_epsilon': 1.4881366673767326e-08}. Best is trial 0 with value: 1.5143986448334275.
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1-score
1,0.6316,0.609807,0.661773,0.661773
2,0.5951,0.596238,0.676454,0.676454
3,0.5502,0.577847,0.694523,0.694523
4,0.5573,0.571915,0.702428,0.702428


[W 2025-12-15 15:53:08,086] Trial 2 failed with parameters: {'learning_rate': 2.663993729992551e-06, 'weight_decay': 0.03363998169624114, 'adam_beta1': 0.813870886698232, 'adam_beta2': 0.992291282263385, 'adam_epsilon': 7.216750511119966e-09} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/amlan/legal/joshi/bail/.venv/lib/python3.10/site-packages/optuna/study/_optimize.py", line 205, in _run_trial
    value_or_values = func(trial)
  File "/home/amlan/legal/joshi/bail/.venv/lib/python3.10/site-packages/transformers/integrations/integration_utils.py", line 277, in _objective
    trainer.train(resume_from_checkpoint=checkpoint, trial=trial)
  File "/home/amlan/legal/joshi/bail/.venv/lib/python3.10/site-packages/transformers/trainer.py", line 2325, in train
    return inner_training_loop(
  File "/home/amlan/legal/joshi/bail/.venv/lib/python3.10/site-packages/transformers/trainer.py", line 2679, in _inner_training_loop
    and (torch.is

KeyboardInterrupt: 

In [None]:
print("Best HyperParameters")

Best HyperParameters


In [18]:
print(best_run)

NameError: name 'best_run' is not defined

In [None]:
del trainer
del training_args
import gc
gc.collect()

In [None]:
print("Starting Training...")

In [None]:
training_args = TrainingArguments(
    output_dir='/scratch/username/tf2_results',          # output directory
    num_train_epochs=15,            # total number of training epochs
    per_device_train_batch_size=8,  # batch size per device during training
    per_device_eval_batch_size=8,   # batch size for evaluation
    warmup_steps=500,               # number of warmup steps for learning rate scheduler
    weight_decay=0.01,              # strength of weight decay
    logging_dir='/scratch/username/tf2_logs',           # directory for storing logs
    evaluation_strategy="epoch",
    logging_steps=250,
    save_strategy='epoch',
    save_total_limit = 1,
    learning_rate = 0.00001,
    load_best_model_at_end=True,
    metric_for_best_model ="eval_f1-score",
)

In [None]:
trainer = Trainer(
    model_init=model_init,                        # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=test_dataset,           # evaluation dataset
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

In [None]:
for n, v in best_run.hyperparameters.items():
    setattr(trainer.args, n, v)
print(trainer.args)
trainer.train()

In [None]:
trainer.save_model("/home2/username/legal-tech/tfidf_sum+indic-ad")