### Fine tuning model

In [None]:
from huggingface_hub import login
login()

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from datasets import load_dataset, DatasetDict

# Enable 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

# Load TinyLlama model with 4-bit quantization
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Ensure padding token is set

In [None]:
# Load dataset
dataset = load_dataset("json", data_files="/kaggle/input/legal/train.jsonl")

# Split into train & validation sets
dataset_split = dataset["train"].train_test_split(test_size=0.1)
dataset = DatasetDict({"train": dataset_split["train"], "validation": dataset_split["test"]})

# Clean text
import re

def clean_text(text):
    """Removes special characters and extra whitespace from text."""
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)  # Remove special characters
    text = re.sub(r"\s+", " ", text).strip()  # Remove extra whitespace
    return text

# Tokenization function
def tokenize_function(examples):
    """Tokenizes, cleans, and processes dataset."""
    cleaned_instructions = [clean_text(instr) for instr in examples["Instruction"]]
    cleaned_responses = [clean_text(resp) for resp in examples["Response"]]
    texts = [instr + "\n" + resp for instr, resp in zip(cleaned_instructions, cleaned_responses)]

    # Tokenize text
    inputs = tokenizer(texts, truncation=True, padding="max_length", max_length=512)
    inputs["labels"] = inputs["input_ids"].copy()

    # Replace padding token ID with -100 (ignored in loss computation)
    pad_token_id = tokenizer.pad_token_id
    inputs["labels"] = [
        [label if label != pad_token_id else -100 for label in label_seq]
        for label_seq in inputs["labels"]
    ]

    return inputs

# Apply tokenization
tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Remove original columns & set format for PyTorch
tokenized_datasets = tokenized_datasets.remove_columns(["Instruction", "Response"])
tokenized_datasets.set_format("torch")

In [None]:
# Define LoRA configuration
lora_config = LoraConfig(
    r=8,  # LoRA rank
    lora_alpha=16,  # Scaling factor
    target_modules=["q_proj", "v_proj"],  # Apply LoRA to attention layers
    lora_dropout=0.05,  # Dropout for regularization
    bias="none"
)

# Wrap TinyLlama with LoRA
from peft import get_peft_model
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

In [None]:
training_args = TrainingArguments(
    output_dir="./tinyllama-legal-finetuned",
    per_device_train_batch_size=1,  # Reduce batch size to fit in GPU
    gradient_accumulation_steps=8,  # Simulates larger batch size
    learning_rate=2e-5,
    num_train_epochs=3,
    fp16=True,  # Mixed precision for speed
    save_total_limit=2,
    logging_steps=10,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    report_to="none",
    remove_unused_columns=False,
    gradient_checkpointing=False  # Disable to avoid slowdown
)

In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer
)

# Empty CUDA cache to free memory
torch.cuda.empty_cache()

# Start training
trainer.train()

In [None]:
model.save_pretrained("/kaggle/working/finetuned-legal-tinyllama")
tokenizer.save_pretrained("/kaggle/working/finetuned-legal-tinyllama")

## Inferencing the fine tuned model

In [29]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Path where you extracted the model
model_path = "/kaggle/input/tinyllama_finetuned_legal/transformers/default/1/kaggle/working/finetuned-legal-tinyllama"

# Load fine-tuned model
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token  # Set padding token if missing

In [None]:
def generate_response(instruction, max_length):
    """Generates a response for a given legal question."""
    
    # Tokenize input
    inputs = tokenizer(instruction, return_tensors="pt").to("cuda")

    # Generate output
    output = model.generate(
        **inputs,
        max_length=max_length,
        do_sample=True,
        temperature=0.5,
        top_p=0.9
    )

    # Decode and return response
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Example inference
query = "What are the legal rights of tenants in India?"
response = generate_response(query,max_length=200)
print("AI Response:", response)

In [None]:
query = "What is the punishment for theft and assault during theft?"
response = generate_response(query,max_length=150)
print("AI Response:", response)

In [None]:
query = "Explain the concept of locus standi in Indian courts."
response = generate_response(query,max_length=200)
print("AI Response:", response)

In [None]:
query = "What is the difference between a petition and a plaint in Indian law?"
response = generate_response(query,max_length=300)
print("AI Response:", response)

### Pushing to hugging face hub

In [2]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
from huggingface_hub import HfApi

api = HfApi()

# Push the model
api.upload_folder(
    folder_path="/kaggle/input/tinyllama_finetuned_legal/transformers/default/1/kaggle/working/finetuned-legal-tinyllama",  # Path to fine-tuned model
    repo_id="28p07/tinyllama-legal-finetuned",  # Change to your Hugging Face model repo
    repo_type="model"
)


Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/4.52M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/28p07/tinyllama-legal-finetuned/commit/077755c0f3464fbe200460db958a3eaf622dfebe', commit_message='Upload folder using huggingface_hub', commit_description='', oid='077755c0f3464fbe200460db958a3eaf622dfebe', pr_url=None, repo_url=RepoUrl('https://huggingface.co/28p07/tinyllama-legal-finetuned', endpoint='https://huggingface.co', repo_type='model', repo_id='28p07/tinyllama-legal-finetuned'), pr_revision=None, pr_num=None)

### Inferencing from huggingface hub

In [1]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [17]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch

# Define model IDs
base_model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # Base model
adapter_model_id = "28p07/tinyllama-legal-finetuned"   # LoRA fine-tuned model

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_id)

# Load base model
model = AutoModelForCausalLM.from_pretrained(base_model_id, torch_dtype=torch.float16, device_map="auto")

# Load LoRA adapter and merge with base model
model = PeftModel.from_pretrained(model, adapter_model_id)
model = model.merge_and_unload()  # Merging LoRA adapters into the base model

# Move model to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 2048)
    (layers): ModuleList(
      (0-21): 22 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (up_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (down_proj): Linear(in_features=5632, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): 

In [19]:
def generate_response(instruction, max_length=200):
    """Generates a response for a given legal question."""
    
    # Tokenize input
    inputs = tokenizer(instruction, return_tensors="pt").to(device)

    # Generate output
    output = model.generate(
        **inputs,
        max_length=max_length,
        do_sample=True,
        temperature=0.5,
        top_p=0.9
    )

    # Decode and return response
    return tokenizer.decode(output[0], skip_special_tokens=True)
    
# Example Legal Question
query = "What is the punishment for theft under IPC Section 378?"
response = generate_response(query)
print(response)

What is the punishment for theft under IPC Section 378?
IPC Section 378 provides for punishment for theft The punishment for theft under this section is imprisonment for life or with imprisonment for a term which may extend to seven years or with fine or with both The punishment is determined based on the value of the stolen property and the intention of the person who committed the offence If the value of the stolen property is less than Rs 500000 then the punishment is imprisonment for life or with imprisonment for a term which may extend to seven years or with fine or with both The punishment for theft under this section is a fine of Rs 50000 or imprisonment for a term which may extend to seven years or with fine or with both The punishment for theft under this section is a fine of Rs 50000 or


In [21]:
from deepmultilingualpunctuation import PunctuationModel
punctuation = PunctuationModel()

model.safetensors:   7%|7         | 157M/2.24G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/406 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]



In [22]:
from nltk.tokenize import sent_tokenize

In [25]:
def generate_response(instruction, max_new_tokens=300):
    """Generates a response and restores punctuation."""
    
    # Tokenize input
    inputs = tokenizer(instruction, return_tensors="pt").to(device)

    # Generate output
    output = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=0.5,
        top_p=0.9
    )

    # Decode the generated text
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

    return generated_text

def final_response(instruction):
    text = generate_response(instruction)
    punctuated_text = punctuation.restore_punctuation(text)

    sentences = sent_tokenize(punctuated_text)
    
    final_text = ""
    for sentence in sentences[:5]:
        final_text+=sentence
    return final_text

In [26]:
# Example usage
instruction = "What is the difference between a petition and plaint?"
response = final_response(instruction)
print(response)

What is the difference between a petition and plaint?A petition is a written request for a court to take action on a matter that has been brought to the court s attention.A plaint is a written demand for a court to take action on a matter that has not been brought to the courts attention.2.The petition and plaint are different forms of legal documents that can be used to bring a legal action in a court of law.


In [41]:
instruction = "What is the condition for writing a writ petition?"
response = final_response(instruction)
print(response)

What is the condition for writing a writ petition?The condition for writing a writ petition is that it must be in writing and signed by the petitioner or their legal representative.This means that the petitioner cannot simply make a statement in writing and submit it as a written petition to the court.It requires a formal and legal process to be followed for filing a writ petition in court.Writ petitions are filed with the court by the petitioner or their legal representative to request a decision or action from the court on a specific issue or matter.


In [44]:
instruction = "Explain the concept of locus standi in Indian courts"
response = final_response(instruction)
print(response)

Explain the concept of locus standi in Indian courts.The concept of locus standi in Indian courts refers to the legal standing or authority of a person or an organization to participate in the judicial proceedings of the court.The locus standi is a legal term that means the right or ability of a person or an organization to participate in the judicial proceedings of the court.It is essential to have a locus standi to be able to file a case in a court of law or to participate in the proceedings of the court as a witness or a litigant.The concept of locus standi is important in ensuring that all individuals have equal access to the judicial system and that their voices are heard in the courtroom.
