Phase 6 — Deploy Your LawBot


In [1]:
# 🧰 Step 1 — Install runtime dependencies
!pip install -U gradio==4.37.2 langchain==1.0.4 langchain-community==0.4.1 \
               langchain-huggingface==1.0.1 faiss-cpu sentence-transformers \
               transformers accelerate --quiet


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m38.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.7/93.7 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m33.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.2/318.2 kB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m32.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m469.9/469.9 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m156.8/156.8 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

Step 2 : Load Model + FAISS Retriever

In [8]:
!ls -lh /content/drive/MyDrive/LawBot_Project


ls: cannot access '/content/drive/MyDrive/LawBot_Project': No such file or directory


In [9]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [11]:
!ls -lh /content/drive/MyDrive/LawBot_Project


total 4.0M
drwx------ 2 root root 4.0K Nov  7 03:38 LawBot_Adapter
drwx------ 2 root root 4.0K Nov  7 05:00 LawBot_Adapter_Converted
drwx------ 2 root root 4.0K Nov  7 03:59 LawBot_Adapter_Final
drwx------ 2 root root 4.0K Nov  7 04:34 LawBot_FAISS_Index
-rw------- 1 root root 3.2M Nov  7 03:28 lawbot_train.jsonl
-rw------- 1 root root 791K Nov  7 03:28 lawbot_val.jsonl


In [12]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch, os

# ✅ Use your merged model folder
model_dir = "/content/drive/MyDrive/LawBot_Project/LawBot_Adapter_Converted"

# Verify the folder exists
if not os.path.exists(model_dir):
    raise FileNotFoundError(f"❌ Model path not found: {model_dir}")
else:
    print("✅ Local model folder verified!")

# Load the model locally (not from Hugging Face Hub)
print("🧠 Loading fully merged LawBot model (local mode)...")
model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path=model_dir,
    local_files_only=True,
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(
    pretrained_model_name_or_path=model_dir,
    local_files_only=True,
    trust_remote_code=True,
)

# Move model to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
model.eval()

print(f"\n✅ LawBot model loaded successfully on {device}!")


✅ Local model folder verified!
🧠 Loading fully merged LawBot model (local mode)...


OSError: /content/drive/MyDrive/LawBot_Project/LawBot_Adapter_Converted does not appear to have a file named configuration_phi3.py. Checkout 'https://huggingface.co//content/drive/MyDrive/LawBot_Project/LawBot_Adapter_Converted/tree/main' for available files.

In [16]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from safetensors.torch import load_file
import torch, os

base_model = "microsoft/Phi-3-mini-4k-instruct"
adapter_path = "/content/drive/MyDrive/LawBot_Project/LawBot_Adapter_Converted"

print("🧠 Loading Phi-3 base model...")
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    torch_dtype=torch.bfloat16,
    device_map="auto",          # ✅ Let accelerate manage placement
)

print("🔗 Loading fine-tuned LawBot adapter weights...")
safetensor_files = [f for f in os.listdir(adapter_path) if f.endswith(".safetensors")]
for file in safetensor_files:
    path = os.path.join(adapter_path, file)
    print(f"Loading weights from: {file}")
    state_dict = load_file(path)
    model.load_state_dict(state_dict, strict=False)

print("✅ All safetensor weights loaded successfully!")

tokenizer = AutoTokenizer.from_pretrained(base_model)

# ⚡ DO NOT call model.to(device); accelerate already manages this
model.eval()

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"\n✅ LawBot model (Phi-3 + fine-tuned weights) loaded successfully using Accelerate on {device}!")


🧠 Loading Phi-3 base model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



🔗 Loading fine-tuned LawBot adapter weights...
Loading weights from: model-00001-of-00004.safetensors




Loading weights from: model-00002-of-00004.safetensors




Loading weights from: model-00004-of-00004.safetensors
✅ All safetensor weights loaded successfully!

✅ LawBot model (Phi-3 + fine-tuned weights) loaded successfully using Accelerate on cpu!


Deploy LawBot using Gradio

In [1]:
import torch
print("Device:", "GPU" if torch.cuda.is_available() else "CPU")
!nvidia-smi


Device: GPU
Fri Nov  7 06:05:29 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off |   00000000:00:04.0 Off |                    0 |
| N/A   35C    P0             43W /  400W |       5MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                    

In [7]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [9]:
!ls /content/drive/MyDrive/LawBot_Project


LawBot_Adapter		  LawBot_Adapter_Final	lawbot_train.jsonl
LawBot_Adapter_Converted  LawBot_FAISS_Index	lawbot_val.jsonl


In [11]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch, os

# Paths
base_model = "microsoft/Phi-3-mini-4k-instruct"
adapter_path = "/content/drive/MyDrive/LawBot_Project/LawBot_Adapter_Converted"

# Step 1: Load the base Phi-3 model
print("🧠 Loading base Phi-3 model...")
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True
)

# Step 2: Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

# Step 3: Load your adapter weights (fine-tuned)
print("🔗 Loading fine-tuned LawBot adapter weights...")
from safetensors.torch import load_file
import glob

safetensors_files = glob.glob(os.path.join(adapter_path, "*.safetensors"))
if not safetensors_files:
    raise FileNotFoundError("❌ No safetensor files found in your adapter folder!")

for safetensor_file in safetensors_files:
    print(f"Loading: {safetensor_file}")
    state_dict = load_file(safetensor_file)
    missing, unexpected = model.load_state_dict(state_dict, strict=False)
    print(f"✅ Loaded {len(state_dict)} parameters from {os.path.basename(safetensor_file)}")

print("✅ All adapter weights loaded successfully!")

# Step 4: Move model to device
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model.eval()

print(f"\n✅ LawBot fine-tuned model (Phi-3 + adapter) ready on {device}!")


🧠 Loading base Phi-3 model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

configuration_phi3.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3-mini-4k-instruct:
- configuration_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
`torch_dtype` is deprecated! Use `dtype` instead!


modeling_phi3.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3-mini-4k-instruct:
- modeling_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/599 [00:00<?, ?B/s]

🔗 Loading fine-tuned LawBot adapter weights...
Loading: /content/drive/MyDrive/LawBot_Project/LawBot_Adapter_Converted/model-00001-of-00004.safetensors
✅ Loaded 62 parameters from model-00001-of-00004.safetensors
Loading: /content/drive/MyDrive/LawBot_Project/LawBot_Adapter_Converted/model-00002-of-00004.safetensors
✅ Loaded 66 parameters from model-00002-of-00004.safetensors
Loading: /content/drive/MyDrive/LawBot_Project/LawBot_Adapter_Converted/model-00004-of-00004.safetensors
✅ Loaded 1 parameters from model-00004-of-00004.safetensors
✅ All adapter weights loaded successfully!

✅ LawBot fine-tuned model (Phi-3 + adapter) ready on cuda!


In [15]:
import gradio as gr
import torch

def lawbot_chat(question, history):
    try:
        prompt = f"LawBot (Indian Legal Assistant)\nQuestion: {question}\nAnswer:"
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=300,
                temperature=0.3,
                top_p=0.9,
                repetition_penalty=1.1,
                do_sample=True,
                use_cache=False  # ✅ FIX for DynamicCache issue
            )

        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        if "Answer:" in response:
            response = response.split("Answer:")[-1].strip()
        return response or "⚖️ No relevant legal section found."

    except Exception as e:
        return f"⚠️ Internal error: {str(e)}"

# Launch Gradio interface again
chatbot = gr.ChatInterface(
    fn=lawbot_chat,
    title="⚖️ LawBot – Indian Legal Q&A Assistant",
    description="Ask questions about IPC, CrPC, and the Indian Constitution.",
    theme="soft",
)

chatbot.launch(share=True)


  self.chatbot = Chatbot(


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://0a3284f57b02ae9dae.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


