In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from datasets import load_dataset
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from transformers import Trainer, TrainingArguments
import time

In [3]:
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True  # If required by model
)


In [5]:
# Load model with 4-bit quantization for QLoRA
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

In [6]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True  # if model needs it
)

In [7]:
model = prepare_model_for_kbit_training(model)

In [8]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],  # common for LLaMA-like models
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [9]:
model = get_peft_model(model, lora_config)

In [10]:
def preprocess(example):
    prompt = f"Summarize the following legal text:\n\n{example['judgement'][:4000]}\n\nSummary:"
    inputs = tokenizer(prompt, truncation=True, max_length=2048)
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(example["summary"], truncation=True, max_length=512)
    inputs["labels"] = labels["input_ids"]
    return inputs

In [11]:
from datasets import Dataset
import json

def load_dataset(jsonl_file, max_samples=500):
    with open(jsonl_file, "r", encoding="utf-8") as f:
        data = [json.loads(line) for line in f][:max_samples]

    prompt_template = "### Instruction: Summarize the following legal text.\n\n### Input:\n{input}\n\n### Response:\n{output}"
    samples = []

    for item in data:
        input_text = item['judgement'].strip()[:10000]
        output_text = item['summary'].strip()
        full_prompt = prompt_template.format(input=input_text, output=output_text)
        samples.append(full_prompt)

    return Dataset.from_dict({"text": samples})

train_path = "/content/full_summaries.jsonl"
train_dataset = load_dataset(train_path, max_samples=500)

In [12]:
len(train_dataset)

500

In [18]:
!pip install trl

Collecting trl
  Downloading trl-0.17.0-py3-none-any.whl.metadata (12 kB)
Collecting datasets>=3.0.0 (from trl)
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets>=3.0.0->trl)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading trl-0.17.0-py3-none-any.whl (348 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m348.0/348.0 kB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading datasets-3.6.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m33.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (193 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fsspec, datasets, trl
  Attempting uninstall: fsspec
    Found existing installation: fsspe

In [1]:
from trl import SFTTrainer, SFTConfig

In [13]:
training_args = TrainingArguments(
    output_dir="output-lora-latest",
    num_train_epochs=5,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    optim="paged_adamw_32bit",
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    save_strategy="steps",
    save_steps=500,
    logging_steps=50,
    report_to="tensorboard",
    remove_unused_columns=False
)

In [15]:
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    peft_config=lora_config

)

Converting train dataset to ChatML:   0%|          | 0/500 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (2763 > 2048). Running this sequence through the model will result in indexing errors


Truncating train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [16]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
50,1.7879
100,1.6559
150,1.6029
200,1.5908
250,1.5617
300,1.5614



Invalid credentials in Authorization header - silently ignoring the lookup for the file config.json in TinyLlama/TinyLlama-1.1B-Chat-v1.0.


TrainOutput(global_step=310, training_loss=1.6246154508283062, metrics={'train_runtime': 1836.163, 'train_samples_per_second': 1.362, 'train_steps_per_second': 0.169, 'total_flos': 1.5686937351880704e+16, 'train_loss': 1.6246154508283062})

In [17]:
model.save_pretrained("output-lora-latest")
tokenizer.save_pretrained("output-lora-latest")


Invalid credentials in Authorization header - silently ignoring the lookup for the file config.json in TinyLlama/TinyLlama-1.1B-Chat-v1.0.


('output-lora-latest/tokenizer_config.json',
 'output-lora-latest/special_tokens_map.json',
 'output-lora-latest/tokenizer.model',
 'output-lora-latest/added_tokens.json',
 'output-lora-latest/tokenizer.json')

In [18]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

def load_model_and_tokenizer(model_path):
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", trust_remote_code=True)
    return model, tokenizer

def generate_summary(model, tokenizer, judgement_text, max_input_tokens=2048, max_new_tokens=512):
    prompt = f"Summarize the following legal text:\n\n{judgement_text[:4000]}\n\nSummary:"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_input_tokens).to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            num_beams=4,
            early_stopping=True
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


In [None]:
from datasets import load_metric

def evaluate_model(model, tokenizer, test_dataset, num_samples=100):
    rouge = load_metric("rouge")
    predictions = []
    references = []

    for i in range(min(num_samples, len(test_dataset))):
        input_text = test_dataset[i]["judgement"]
        reference_summary = test_dataset[i]["summary"]
        predicted_summary = generate_summary(model, tokenizer, input_text)

        predictions.append(predicted_summary)
        references.append(reference_summary)

    results = rouge.compute(predictions=predictions, references=references, use_stemmer=True)
    return results
