In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
from trl import SFTTrainer, SFTConfig
import os

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from datasets import Dataset
import json

def load_dataset(jsonl_file, max_samples=500):
    with open(jsonl_file, "r", encoding="utf-8") as f:
        data = [json.loads(line) for line in f][:max_samples]

    prompt_template = "### Instruction: Summarize the following legal text.\n\n### Input:\n{input}\n\n### Response:\n{output}"
    samples = []

    for item in data:
        input_text = item['judgement'].strip()[:10000]
        output_text = item['summary'].strip()
        full_prompt = prompt_template.format(input=input_text, output=output_text)
        samples.append(full_prompt)

    return Dataset.from_dict({"text": samples})

train_path =r"processed-IN-Abs\train-data\full_summaries.jsonl"
train_dataset = load_dataset(train_path, max_samples=500)


In [4]:
len(train_dataset)

500

In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)


In [6]:
from peft import LoraConfig, get_peft_model, TaskType

peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)


In [13]:
training_args = TrainingArguments(
    output_dir="output-lora-latest",
    num_train_epochs=5,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    optim="paged_adamw_32bit",
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    save_strategy="steps",
    save_steps=500,
    logging_steps=50,
    report_to="tensorboard",
    remove_unused_columns=False
)

In [14]:
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    peft_config=peft_config
)

Converting train dataset to ChatML: 100%|██████████| 500/500 [00:00<00:00, 45452.91 examples/s]
Adding EOS to train dataset: 100%|██████████| 500/500 [00:00<00:00, 26311.42 examples/s]
Tokenizing train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]Token indices sequence length is longer than the specified maximum sequence length for this model (2763 > 2048). Running this sequence through the model will result in indexing errors
Tokenizing train dataset: 100%|██████████| 500/500 [00:02<00:00, 222.04 examples/s]
Truncating train dataset: 100%|██████████| 500/500 [00:00<00:00, 100035.87 examples/s]
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [15]:
trainer.train()

Step,Training Loss
50,1.7815
100,1.6555
150,1.6113
200,1.6035
250,1.5785
300,1.5807


TrainOutput(global_step=310, training_loss=1.6333228541958718, metrics={'train_runtime': 6316.388, 'train_samples_per_second': 0.396, 'train_steps_per_second': 0.049, 'total_flos': 1.5669894180962304e+16, 'train_loss': 1.6333228541958718})

In [16]:
model.save_pretrained("output-lora-latest")
tokenizer.save_pretrained("output-lora-latest")

('output-lora-latest\\tokenizer_config.json',
 'output-lora-latest\\special_tokens_map.json',
 'output-lora-latest\\tokenizer.json')

In [1]:
import os
import json
import time
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import evaluate
from tqdm import tqdm

# Load your fine-tuned model and tokenizer
model_dir = "output-lora-latest"  # <-- update with your saved model path
tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = AutoModelForCausalLM.from_pretrained(model_dir)
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Load ROUGE and BLEU metrics from 'evaluate' library
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")

# Load and preprocess test dataset from jsonl, with optional max_samples limit
def load_test_dataset(jsonl_file, max_input_length=1024, max_samples=None):
    system_prompt = "Summarize the following legal text."
    inputs = []
    references = []
    with open(jsonl_file, "r", encoding="utf-8") as f:
        for i, line in enumerate(f):
            if max_samples and i >= max_samples:
                break
            item = json.loads(line)
            input_text = f"""### Instruction: {system_prompt}

### Input:
{item['judgement'].strip()[:max_input_length]}

### Response:"""
            inputs.append(input_text)
            references.append(item['summary'].strip())
    return inputs, references

# Path to your test set jsonl file
test_jsonl_path = r"processed-IN-Abs\test-data\full_summaries.jsonl"  # <-- replace with your path

# Load only first 10 examples (change or set None for full test set)
test_inputs, test_references = load_test_dataset(test_jsonl_path, max_samples=10)

# Function to generate summary from input text
def generate_summary(text, max_new_tokens=256):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=2048).to(device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,  # generate tokens beyond input length
            do_sample=False,  # greedy decoding
            pad_token_id=tokenizer.eos_token_id
        )
    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return summary

# Generate predictions and collect references for metrics
predictions = []
references_for_bleu = []

start_time = time.time()

for inp, ref in tqdm(zip(test_inputs, test_references), total=len(test_inputs), desc="Inference"):
    pred = generate_summary(inp)
    predictions.append(pred)
    references_for_bleu.append([ref.split()])  # BLEU expects list of tokenized references

inference_time = time.time() - start_time

# Compute ROUGE (expects raw strings)
rouge_result = rouge.compute(predictions=predictions, references=test_references)

# Compute BLEU (expects tokenized predictions and references)
tokenized_preds = [pred.split() for pred in predictions]
bleu_result = bleu.compute(predictions=tokenized_preds, references=references_for_bleu)

print(f"Inference time for {len(test_inputs)} samples: {inference_time:.2f} seconds")
print("\nROUGE scores:")
for key, value in rouge_result.items():
    print(f"  {key}: {value.mid.fmeasure:.4f}")

print(f"\nBLEU score: {bleu_result['bleu']:.4f}")


  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at output-lora-latest were not used when initializing LlamaForCausalLM: ['model.layers.0.self_attn.q_proj.base_layer.weight', 'model.layers.0.self_attn.q_proj.lora_A.default.weight', 'model.layers.0.self_attn.q_proj.lora_B.default.weight', 'model.layers.0.self_attn.v_proj.base_layer.weight', 'model.layers.0.self_attn.v_proj.lora_A.default.weight', 'model.layers.0.self_attn.v_proj.lora_B.default.weight', 'model.layers.1.self_attn.q_proj.base_layer.weight', 'model.layers.1.self_attn.q_proj.lora_A.default.weight', 'model.layers.1.self_attn.q_proj.lora_B.default.weight', 'model.layers.1.self_attn.v_proj.base_layer.weight', 'model.layers.1.self_attn.v_proj.lora_A.default.weight', 'model.layers.1.self_attn.v_proj.lora_B.default.weight', 'model.layers.10.self_attn.q_proj.base_layer.weight', 'model.layers.10.self_attn.q_proj.lora_A.default.weight', 'model.layers.10.self_attn.q_proj.lora_B.default.weight', 'm

ValueError: Predictions and/or references don't match the expected format.
Expected format:
Feature option 0: {'predictions': Value(dtype='string', id='sequence'), 'references': Sequence(feature=Value(dtype='string', id='sequence'), length=-1, id='references')}
Feature option 1: {'predictions': Value(dtype='string', id='sequence'), 'references': Value(dtype='string', id='sequence')},
Input predictions: ['###', 'Instruction:', 'Summarize', ..., 'appel', '###', 'Response:blattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblattblatt'],
Input references: [['The', 'appellants', 'who', 'are', 'displaced', 'persons', 'from', 'West', 'Pakistan,', 'were', 'granted', 'quasi', 'permanent', 'allotment', 'of', 'some', 'lands', 'in', 'village', 'Raikot', 'in', '1949.', 'On', 'October', '31,', '1952,', 'the', 'Assistant', 'Custodian', 'cancelled', 'the', 'allotment', 'of', '14', 'allottees', 'in', 'village', 'Karodian,', 'and', 'also', 'cancelled', 'the', 'allotment', 'of', 'the', 'Appellants', 'in', 'Raikot', 'but', 'allotted', 'lands', 'to', 'them', 'in', 'village', 'Karodian,', 'and', 'allotted', 'the', 'lands', 'of', 'Raikot', 'to', 'other', 'persons.', 'The', '14', 'allottees', 'of', 'village', 'Karodian', 'as', 'well', 'as', 'the', 'appellants', 'applied', 'for', 'review', 'of', 'the', 'orders', 'of', 'cancellation', 'of', 'their', 'allotment.', 'The', 'application', 'of', 'the', '14', 'allottees', 'was', 'dismissed.', 'They', 'preferred', 'a', 'revision', 'to', 'the', 'Custodian', 'General', 'who', 'cancelled', 'the', 'appellant', "'s", 'allotment', '(1)', 'Cal.', '926.', '329', 'in', 'Karodian', 'and', 'restored', 'the', 'allotment', 'of', 'the', '14', 'allottees', 'on', 'December', '17,', '1954', 'Thereupon,,', 'on', 'January', '6,', '1955,', 'the', 'appellants', 'moved', 'the', 'Custodian', 'General', 'for', 'calling', 'up', 'their', 'review', 'application', 'and', 'for', 'revising', 'the', 'order', 'of', 'October', '31,', '1952,', 'cancelling', 'their', 'allotment', 'in', 'Raikot.', 'The', 'Custodian', 'General', 'refused', 'to', 'revise', 'the', 'order', 'on', 'the', 'ground', 'that', 'his', 'power', 'to', 'revise', 'had', 'been', 'taken', 'away', 'by', 'the', '.', 'The', 'appellants', 'contended', 'that', 'the,', 'Custodian', 'General', 'had', 'the', 'power', 'to', 'revise', 'the', 'order.', 'Held,', 'that', 'after', 'the', 'enactment', 'of', 'the,', ',', 'the', 'Custodian', 'General', 'ceased', 'to', 'have', 'the', 'power', 'to', 'cancel', 'allotments.', 'By,', 'the', 'issuing', 'of', 'a', 'notification', 'under,', 'section', '12(1)', 'of', 'this', 'Act,', 'the', 'Fight,', 'title', 'or', 'interest', 'of', 'the', 'evacuee', 'in', 'the', 'property', 'specified', 'in', 'the', 'notification', 'was', 'extinguished', 'and', 'the', 'property', 'vested', 'absolutely', 'in', 'the', 'Central.', 'Government.', 'The', 'right', 'of', 'the', 'Custodian', 'manage', 'the', 'property', 'under', 'the', ',', 'came', 'to', 'an', 'end', 'and', 'the', 'management', 'vested', 'in', 'a', 'new', 'set', 'of', 'officers.', 'Even', 'though', 'no', 'managing', 'officer', 'was', 'appointed', 'or', 'a', 'managing', 'corporation,', 'constituted', 'under', 'the', 'new', 'Act', 'to', 'manage', 'the', 'property', 'no', 'one', 'else', 'could', "'exercise", 'the', 'power', 'of', 'cancellation', 'of', 'allotment.', 'Bal', 'Mukund', 'vs', 'The', 'State', 'of', 'Punjab,', 'I.L.R.', '1957', 'Punj.', '712,', 'approved.']]

In [5]:
import os
import json
import time
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig
import evaluate
from tqdm import tqdm

In [7]:
# Step 1: Load LoRA configuration to get base model name
peft_model_path = "output-lora-latest\checkpoint-310"  # <-- change to your LoRA output directory
config = PeftConfig.from_pretrained(peft_model_path)

In [8]:
# Step 2: Load base model and tokenizer
base_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)


In [9]:
# Step 3: Merge LoRA weights into the base model
model = PeftModel.from_pretrained(base_model, peft_model_path)
model = model.merge_and_unload()  # Important for correct weights
model.eval()


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 2048)
    (layers): ModuleList(
      (0-21): 22 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (up_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (down_proj): Linear(in_features=5632, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((2048,), eps=1e-05)
    (rotary_emb): 

In [10]:
# Step 4: Move model to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 2048)
    (layers): ModuleList(
      (0-21): 22 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (up_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (down_proj): Linear(in_features=5632, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((2048,), eps=1e-05)
    (rotary_emb): 

In [11]:
# Step 5: Load metrics
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")

In [12]:
def load_test_dataset(jsonl_file, max_input_length=1024, max_samples=None):
    system_prompt = "Summarize the following legal text."
    inputs, references = [], []

    with open(jsonl_file, "r", encoding="utf-8") as f:
        for i, line in enumerate(f):
            if max_samples and i >= max_samples:
                break
            item = json.loads(line)
            judgement = item["judgement"].strip()[:max_input_length]
            summary = item["summary"].strip()
            prompt = f"""### Instruction: {system_prompt}

### Input:
{judgement}

### Response:"""
            inputs.append(prompt)
            references.append(summary)
    return inputs, references

In [13]:
# Step 7: Set test file path
test_jsonl_path = r"processed-IN-Abs/test-data/full_summaries.jsonl"  # <-- update path

In [14]:
# Load only first 10 examples for quick evaluation (set to None for full test)
test_inputs, test_references = load_test_dataset(test_jsonl_path, max_samples=10)

In [15]:
# Step 8: Generate summary
def generate_summary(text, max_new_tokens=256):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=2048).to(device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )
    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return output_text


In [16]:
# Step 9: Inference loop
predictions = []
references_for_bleu = []

In [17]:
start_time = time.time()
for inp, ref in tqdm(zip(test_inputs, test_references), total=len(test_inputs), desc="Running Inference"):
    pred = generate_summary(inp)
    predictions.append(pred)
    references_for_bleu.append([ref.split()])  # BLEU expects tokenized reference list

inference_time = time.time() - start_time


Running Inference: 100%|██████████| 10/10 [02:13<00:00, 13.37s/it]


In [18]:
pred

'### Instruction: Summarize the following legal text.\n\n### Input:\nAppeal No. 251 of 1963.\nAppeal by special leave from the judgment and order dated March 20, 1957, of the Patna High Court in Civil Revision No. 40 of 1956.\nM. C. Setalvad, and R. C. Prasad, for the appellants.\nThe respondent did not appear.\nMarch 24, 1964.\nThe short question which arises in this appeal is whether the term "wages" as defined by section 2(vi) of the (No. 4 of 1936) (hereinafter called \'the Act \') includes wages fixed by an award in an industrial dispute between the employer and his employees.\nThis question has to be answered in the light of the definition prescribed by section 2(vi) before it was amended in 1958.\nThe subsequent amendment expressly provides by section 2(vi) (a) that any remuneration payable under any award or settlement between the parties or order of a Court, would be included in the main definition under section 2(vi).\nThe point which we have to decide in the present appeal i

In [19]:
# Step 10: Evaluate with ROUGE
rouge_result = rouge.compute(predictions=predictions, references=test_references)

In [21]:
print(f"\n🕒 Inference time for {len(test_inputs)} samples: {inference_time:.2f} seconds")
print("\n📊 ROUGE scores:")
for key, value in rouge_result.items():
    print(f"  {key}: {value:.4f}")


🕒 Inference time for 10 samples: 133.75 seconds

📊 ROUGE scores:
  rouge1: 0.3469
  rouge2: 0.0858
  rougeL: 0.1819
  rougeLsum: 0.3182
