In [None]:
# ✅ Install bitsandbytes (8-bit quantization support)
!pip install -U bitsandbytes
!pip install -U transformers accelerate peft


Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-c

Collecting transformers
  Downloading transformers-4.54.1-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.54.1-py3-none-any.whl (11.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.2/11.2 MB[0m [31m116.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.54.0
    Uninstalling transformers-4.54.0:
      Successfully uninstalled transformers-4.54.0
Successfully installed transformers-4.54.1


## ✅ Step 1: Load Model & Tokenizer with 8bit Quantization

In [None]:

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

base_model = "deepseek-ai/deepseek-coder-1.3b-instruct"

quant_config = BitsAndBytesConfig(load_in_8bit=True)

tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=quant_config,
    device_map="auto",
    trust_remote_code=True
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/2.69G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

## ✅ Step 2: Apply LoRA

In [None]:

from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


trainable params: 1,572,864 || all params: 1,348,044,800 || trainable%: 0.1167


## ✅ Step 3: Load Dataset

In [None]:

from datasets import load_dataset

dataset = load_dataset("json", data_files="lawyer_data.jsonl")

def format_example(example):
    return {
        "text": f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}"
    }

dataset = dataset.map(format_example)


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/501 [00:00<?, ? examples/s]

## ✅ Step 4: Tokenize

In [None]:

def tokenize(example):
    prompt = f"### Instruction:\n{example['instruction']}\n\n### Response:\n"
    response = example["output"]

    input_enc = tokenizer(prompt + response, truncation=True, padding="max_length", max_length=512)
    label_enc = tokenizer(response, truncation=True, padding="max_length", max_length=512)

    label_enc["input_ids"] = [
        token if token != tokenizer.pad_token_id else -100 for token in label_enc["input_ids"]
    ]

    return {
        "input_ids": input_enc["input_ids"],
        "attention_mask": input_enc["attention_mask"],
        "labels": label_enc["input_ids"]
    }

dataset = dataset.map(tokenize, remove_columns=dataset["train"].column_names)


Map:   0%|          | 0/501 [00:00<?, ? examples/s]

## ✅ Step 5: Setup Trainer

In [None]:

from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="deepseek_lawyer",
    per_device_train_batch_size=2,
    num_train_epochs=3,
    logging_steps=10,
    save_strategy="epoch",
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    tokenizer=tokenizer
)


  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


## ✅ Step 6: Train the Model

In [None]:
trainer.train()

Step,Training Loss
10,3.3018
20,3.1797
30,2.6601
40,2.2169
50,1.841
60,1.4494
70,1.1594
80,0.8842
90,0.4871
100,0.1853


TrainOutput(global_step=753, training_loss=0.23294025037040658, metrics={'train_runtime': 418.8351, 'train_samples_per_second': 3.589, 'train_steps_per_second': 1.798, 'total_flos': 5919199400558592.0, 'train_loss': 0.23294025037040658, 'epoch': 3.0})

## ✅ Step 7: Save the Model

In [None]:

model.save_pretrained("deepseek_lawyer")
tokenizer.save_pretrained("deepseek_lawyer")


('deepseek_lawyer/tokenizer_config.json',
 'deepseek_lawyer/special_tokens_map.json',
 'deepseek_lawyer/chat_template.jinja',
 'deepseek_lawyer/tokenizer.json')

## ✅ Step 8: Inference Function for Chatbot

In [None]:

from transformers import AutoTokenizer, AutoModelForCausalLM

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("deepseek_lawyer", trust_remote_code=True)

def ask(question):
    prompt = f"### Instruction:\n{question}\n\n### Response:\n"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=200,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id
    )

    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = result.replace(prompt, "").strip()
    print("🤖 Lawyer Assistant:\n", response)

# Example
ask("what is law?")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


🤖 Lawyer Assistant:
 Law is a legal procedure or course that ensures the exercise of rights under a law. It includes procedures such as trial, pre-trial, post-trial, and custody. Laws can be legalized by various methods, including trial, pre-trial, post-trial, and custody.


In [None]:
ask("what is criminal law?")

🤖 Lawyer Assistant:
 Criminal law is a legal procedure that ensures the protection of a person from being criminalized. It includes procedures for detection, sentence preparation, and custody.


In [None]:
ask("what is FIR?")

🤖 Lawyer Assistant:
 FIR stands for First Information Report. It is a document prepared by police when they receive information about a cognizable offense.


In [None]:
!pip install streamlit


Collecting streamlit
  Downloading streamlit-1.47.1-py3-none-any.whl.metadata (9.0 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.47.1-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m55.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m97.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hInst

In [None]:
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load your fine-tuned model
model_path = "deepseek_lawyer"  # Or "checkpoint-xxx" if that's your best checkpoint
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True)

# Chat function
def generate_response(user_input):
    prompt = f"### Instruction:\n{user_input}\n\n### Response:\n"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=200,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id
    )

    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = decoded.replace(prompt, "").strip()
    return response

# Streamlit UI
st.set_page_config(page_title="📜 Legal Chatbot - FIR Assistant", layout="centered")
st.title("🧑‍⚖️ FIR & Law Chatbot")
st.markdown("Ask me anything about FIRs, criminal law, or general legal procedure.")

# User input
user_query = st.text_input("📝 Enter your legal question here:")

if user_query:
    with st.spinner("Thinking like a lawyer..."):
        answer = generate_response(user_query)
    st.success("🤖 " + answer)


2025-07-29 18:59:43.207 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-07-29 18:59:43.243 Session state does not function when running a script without `streamlit run`


In [None]:
model.save_pretrained("/content/drive/MyDrive/deepseek_lawyer")
tokenizer.save_pretrained("/content/drive/MyDrive/deepseek_lawyer")


('/content/drive/MyDrive/deepseek_lawyer/tokenizer_config.json',
 '/content/drive/MyDrive/deepseek_lawyer/special_tokens_map.json',
 '/content/drive/MyDrive/deepseek_lawyer/chat_template.jinja',
 '/content/drive/MyDrive/deepseek_lawyer/tokenizer.json')

In [None]:
from transformers import AutoConfig

# Load base model's config
base_model = "deepseek-ai/deepseek-coder-1.3b-instruct"
config = AutoConfig.from_pretrained(base_model)

# Save it inside your fine-tuned model folder
config.save_pretrained("/content/deepseek_lawyer")  # or your path


In [None]:
!zip -r /content/drive/MyDrive/deepseek_lawyer.zip /content/drive/MyDrive/deepseek_lawyer


  adding: content/drive/MyDrive/deepseek_lawyer/ (stored 0%)
  adding: content/drive/MyDrive/deepseek_lawyer/generation_config.json (deflated 21%)
  adding: content/drive/MyDrive/deepseek_lawyer/special_tokens_map.json (deflated 69%)
  adding: content/drive/MyDrive/deepseek_lawyer/config.json (deflated 50%)
  adding: content/drive/MyDrive/deepseek_lawyer/adapter_model.safetensors (deflated 8%)
  adding: content/drive/MyDrive/deepseek_lawyer/tokenizer_config.json (deflated 88%)
  adding: content/drive/MyDrive/deepseek_lawyer/tokenizer.json (deflated 81%)
  adding: content/drive/MyDrive/deepseek_lawyer/adapter_config.json (deflated 55%)
  adding: content/drive/MyDrive/deepseek_lawyer/chat_template.jinja (deflated 57%)


In [None]:
model.save_pretrained("/content/drive/MyDrive/deepseek_lawyer")
tokenizer.save_pretrained("/content/drive/MyDrive/deepseek_lawyer")
config.save_pretrained("/content/drive/MyDrive/deepseek_lawyer")