## Fine tuning Process

In [47]:
!pip install transformers datasets torch evaluate peft langchain faiss-cpu pypdf sentence-transformers gradio torch langchain_community


Collecting langchain_community
  Downloading langchain_community-0.2.12-py3-none-any.whl.metadata (2.7 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.21.3-py3-none-any.whl.metadata (7.1 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)
Downloading langchain_community-0.2.12-py3-none-any.whl (2.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m36.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dataclasses_json-0.6.7-py3-none-any.whl (

In [27]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset, DatasetDict
from peft import get_peft_model, LoraConfig, TaskType

In [25]:
# Load the dataset from Hugging Face
original_dataset = load_dataset("dzunggg/legal-qa-v1")

In [28]:
# Check if the dataset has a validation split
if "validation" not in original_dataset.keys():
    # If not, split the train set
    train_valid = original_dataset["train"].train_test_split(test_size=0.2, seed=42)
    valid_test = train_valid["test"].train_test_split(test_size=0.5, seed=42)

    dataset = DatasetDict({
        "train": train_valid["train"],
        "validation": valid_test["train"],
        "test": valid_test["test"]
    })
else:
    dataset = original_dataset

In [29]:
# Load TinyLlama model and tokenizer
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [30]:
# Add special tokens if needed
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))

In [31]:

# Configure LoRA
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1
)

In [32]:

# Wrap the model with LoRA
model = get_peft_model(model, peft_config)

In [33]:
# Preprocess the dataset
def preprocess_function(examples):
    questions = [q.strip() for q in examples["question"]]
    answers = examples["answer"]

    prompts = [f"Human: {q}\nAssistant: {a}" for q, a in zip(questions, answers)]

    inputs = tokenizer(prompts, truncation=True, padding="max_length", max_length=512, return_tensors="pt")
    inputs["labels"] = inputs["input_ids"].clone()

    return inputs

In [34]:
# Apply preprocessing to the dataset
tokenized_datasets = dataset.map(preprocess_function, batched=True, remove_columns=dataset["train"].column_names)

Map:   0%|          | 0/2993 [00:00<?, ? examples/s]

Map:   0%|          | 0/374 [00:00<?, ? examples/s]

Map:   0%|          | 0/375 [00:00<?, ? examples/s]

In [35]:
# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,  # Adjust based on your GPU memory
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    weight_decay=0.01,
    push_to_hub=False,
    fp16=True,  # Enable mixed precision training
)



In [36]:
# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
)

In [37]:
# Fine-tune the model
trainer.train()

We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


Epoch,Training Loss,Validation Loss
1,1.7786,1.393502
2,1.354,1.385906
3,1.3458,1.384064


TrainOutput(global_step=2247, training_loss=1.4454966376609786, metrics={'train_runtime': 2803.9753, 'train_samples_per_second': 3.202, 'train_steps_per_second': 0.801, 'total_flos': 2.856652997079859e+16, 'train_loss': 1.4454966376609786, 'epoch': 3.0})

In [38]:
# Save the fine-tuned model
model.save_pretrained("./legal_qa_tinyllama_model")
tokenizer.save_pretrained("./legal_qa_tinyllama_model")

('./legal_qa_tinyllama_model/tokenizer_config.json',
 './legal_qa_tinyllama_model/special_tokens_map.json',
 './legal_qa_tinyllama_model/tokenizer.model',
 './legal_qa_tinyllama_model/added_tokens.json',
 './legal_qa_tinyllama_model/tokenizer.json')

In [39]:
# Evaluate the model
eval_results = trainer.evaluate()
print(eval_results)

{'eval_loss': 1.3840643167495728, 'eval_runtime': 32.2267, 'eval_samples_per_second': 11.605, 'eval_steps_per_second': 2.917, 'epoch': 3.0}


In [43]:
def answer_question(question, model, tokenizer):
    # Determine the device
    device = next(model.parameters()).device

    prompt = f"Human: {question}\nAssistant:"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    try:
        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=100, num_return_sequences=1)

        answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return answer.split("Assistant:")[-1].strip()
    except RuntimeError as e:
        print(f"An error occurred during generation: {e}")
        return "I'm sorry, but I encountered an error while trying to answer your question."



In [44]:
# Example usage
question = "I was wondering if a pain management office is acting illegally/did an illegal action."
print(answer_question(question, model, tokenizer))

1. In the United States, the legal system is based on the concept of individual rights and freedoms. This means that the government is not allowed to interfere with the rights of individuals to practice their religion, to own property, to engage in business, to have a family, to have a job, to have a medical treatment, to have a medical insurance, to have a medical treatment, to have a medical insurance, to have a medical treatment, to have


# Pdf Chatbot Interface

In [48]:
import os
from langchain import LLMChain
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from transformers import LlamaForCausalLM, LlamaTokenizer, pipeline
import gradio as gr

In [51]:
# Load the fine-tuned TinyLlama model and tokenizer
model_path = "/content/legal_qa_tinyllama_model"
base_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # Use the base model name

In [52]:
try:
    model = AutoModelForCausalLM.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
except Exception as e:
    print(f"Error loading fine-tuned model: {e}")
    print("Falling back to base model...")
    model = AutoModelForCausalLM.from_pretrained(base_model_name)
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)

Error loading fine-tuned model: /content/legal_qa_tinyllama_model does not appear to have a file named config.json. Checkout 'https://huggingface.co//content/legal_qa_tinyllama_model/tree/None' for available files.
Falling back to base model...


In [53]:
# Move model to GPU if available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)

In [54]:
# Create a HuggingFacePipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=512,
    temperature=0.7,
    top_p=0.95,
    repetition_penalty=1.15,
    device=device
)

In [55]:
# Create a LangChain LLM
llm = HuggingFacePipeline(pipeline=pipe)

  warn_deprecated(


In [56]:
# Create embeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  warn_deprecated(


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [57]:
# Initialize an empty vector store
vector_store = FAISS.from_texts([""], embeddings)

In [58]:
# Function to process PDF and add to vector store
def process_pdf(file_path):
    loader = PyPDFLoader(file_path)
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    docs = text_splitter.split_documents(documents)
    vector_store.add_documents(docs)
    return f"Processed {len(docs)} chunks from {file_path}"

In [59]:
# Create a prompt template
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}

Answer:"""

prompt = PromptTemplate(template=template, input_variables=["context", "question"])

In [60]:
# Create a chain
chain = LLMChain(llm=llm, prompt=prompt)

  warn_deprecated(


In [61]:
# Chatbot function
def chatbot(question, history):
    docs = vector_store.similarity_search(question, k=3)
    context = " ".join([doc.page_content for doc in docs])
    response = chain.run(context=context, question=question)
    return response

In [62]:
# Gradio interface
iface = gr.Interface(
    fn=chatbot,
    inputs=["text", "state"],
    outputs=["text", "state"],
    title="Legal Chatbot",
    description="Ask legal questions based on uploaded documents."
)

In [63]:
# File upload function
def upload_file(file):
    file_path = file.name
    return process_pdf(file_path)

In [64]:
# Create the file upload interface
upload_interface = gr.Interface(
    fn=upload_file,
    inputs=gr.File(label="Upload PDF"),
    outputs="text"
)

In [65]:
# Combine the interfaces
combined_interface = gr.TabbedInterface([upload_interface, iface], ["Upload Document", "Chat"])

In [66]:
# Launch the interface
combined_interface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://d751e0664c1c7f1ee4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


