In [5]:
!pip install -q \
    transformers \
    peft \
    huggingface_hub \
    accelerate \
    bitsandbytes \
    sentence-transformers \
    langchain \
    langchain-community \
    PyPDF2


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [10]:
import os
import torch
import gc

gc.collect()
torch.cuda.empty_cache()
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
from huggingface_hub import login
from PyPDF2 import PdfReader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document

login(token="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")

def load_model_and_tokenizer():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.float16,
    )

    tokenizer = AutoTokenizer.from_pretrained(
        "NishKook/legal-qa-lora", use_auth_token=True
    )

    base_model = AutoModelForCausalLM.from_pretrained(
        "mistralai/Mistral-7B-Instruct-v0.2",
        device_map="auto",
        torch_dtype=torch.float16,
        quantization_config=bnb_config,
        use_auth_token=True,
        max_memory={0: "13GiB", "cpu": "12GiB"} 
    )

    model = PeftModel.from_pretrained(
        base_model,
        "NishKook/legal-qa-lora",
        device_map="auto",
        torch_dtype=torch.float16,
        use_auth_token=True
    )

    model.eval()
    return model, tokenizer

model, tokenizer = load_model_and_tokenizer()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def extract_text_from_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])

def build_vector_index(text):
    splitter = RecursiveCharacterTextSplitter(chunk_size=384, chunk_overlap=32)
    docs = [Document(page_content=chunk) for chunk in splitter.split_text(text)]
    return FAISS.from_documents(docs, HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"))

def get_context(question, vectordb, k=7):
    top_docs = vectordb.similarity_search(question, k=k)
    return "\n".join([doc.page_content for doc in top_docs])

def generate_answer(question, context):
    prompt = f"### Question:\n{question}\n\n### Context:\n{context}\n\n### Answer:\n"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=256,
            do_sample=False
        )
    return tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)

def answer_from_pdf(pdf_path, question):
    print("Extracting text from PDF...")
    text = extract_text_from_pdf(pdf_path)

    print("Building FAISS index...")
    vectordb = build_vector_index(text)

    print(f"Question: {question}")
    context = get_context(question, vectordb)

    print("\n Retrieved Context Snippet:\n")
    print(context[:500], "...\n")

    print("Generating answer...\n")
    return generate_answer(question, context)


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [11]:
pdf_path = "/kaggle/input/case-file/Case File.pdf"
question = "What did the Dobbs Court decide about Roe v. Wade?"
answer = answer_from_pdf(pdf_path, question)
print("\n Final Answer:\n", answer)


Extracting text from PDF...
Building FAISS index...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Question: What did the Dobbs Court decide about Roe v. Wade?

 Retrieved Context Snippet:

APPENDIX 
This Appendix analyzes in full each of the 28 cases the
majority says support today’ s decision to overrule Roe v. 
Wade , 410 U. S. 113 (1973), and Planned Parenthood of 
Southeastern Pa. v. Casey , 505 U. S. 833 (1992).  As ex-
plained herein, the Court in each case relied on traditional 
stare decisis  factors in overruling.
6 DOBBS v. JACKSON WOMEN’S HEALTH ORGANIZATION 
Opinion of the Court 
opinion was based, does not compel unending adherence to 
Roe’s abuse of judicial authorit ...

Generating answer...


 Final Answer:
 The Dobbs Court decided to overrule Roe v. Wade and Planned Parenthood of Southeastern Pennsylvania v. Casey. The Court held that the Constitution does not confer a right to abortion on demand, and that the power to regulate abortion rests with the States.
