In [1]:
!pip install -q \
    transformers \
    peft \
    huggingface_hub \
    accelerate \
    bitsandbytes \
    sentence-transformers \
    langchain \
    langchain-community \
    PyPDF2


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m21.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m69.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m41.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m437.9/437.9 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.5/65.5 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m


In [6]:
!pip install faiss-cpu --quiet


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m59.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25h

In [3]:
import os
import torch
import gc

gc.collect()
torch.cuda.empty_cache()
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
from huggingface_hub import login
from PyPDF2 import PdfReader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document

login(token="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")

def load_model_and_tokenizer():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.float16,
    )

    tokenizer = AutoTokenizer.from_pretrained("NishKook/legal-qa-lora", use_auth_token=True)

    base_model = AutoModelForCausalLM.from_pretrained(
        "mistralai/Mistral-7B-Instruct-v0.2",
        device_map="auto",
        torch_dtype=torch.float16,
        quantization_config=bnb_config,
        use_auth_token=True,
        max_memory={0: "13GiB", "cpu": "12GiB"}
    )

    model = PeftModel.from_pretrained(
        base_model,
        "NishKook/legal-qa-lora",
        device_map="auto",
        torch_dtype=torch.float16,
        use_auth_token=True
    )

    model.eval()
    return model, tokenizer, torch.device("cuda" if torch.cuda.is_available() else "cpu")

model, tokenizer, device = load_model_and_tokenizer()

def extract_text_from_multiple_pdfs(pdf_paths):
    full_text = ""
    for path in pdf_paths:
        reader = PdfReader(path)
        for page in reader.pages:
            if page.extract_text():
                full_text += page.extract_text() + "\n"
    return full_text

def build_vector_index(text):
    splitter = RecursiveCharacterTextSplitter(chunk_size=384, chunk_overlap=32)
    docs = [Document(page_content=chunk) for chunk in splitter.split_text(text)]
    return FAISS.from_documents(docs, HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"))

def get_context(question, vectordb, k=7):
    top_docs = vectordb.similarity_search(question, k=k)
    return "\n".join([doc.page_content for doc in top_docs])

def generate_answer(question, context):
    prompt = f"### Question:\n{question}\n\n### Context:\n{context}\n\n### Answer:\n"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=256,
            do_sample=False
        )
    return tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)

def answer_from_pdfs(pdf_paths, question):
    print(" Reading multiple PDFs...")
    full_text = extract_text_from_multiple_pdfs(pdf_paths)

    print(" Building FAISS index...")
    vectordb = build_vector_index(full_text)

    print(f" Question: {question}")
    context = get_context(question, vectordb)

    print("\n Retrieved Context Snippet:\n")
    print(context[:500], "...\n")

    print(" Generating answer...\n")
    return generate_answer(question, context)




tokenizer_config.json:   0%|          | 0.00/12.3k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.51M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/643 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/557 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/792 [00:00<?, ?B/s]



adapter_model.safetensors:   0%|          | 0.00/27.3M [00:00<?, ?B/s]

In [7]:
pdfs = ["/kaggle/input/obergefell-v-hodges/Department of Justice Brief.pdf", "/kaggle/input/obergefell-v-hodges/Majority Opinion.pdf", "/kaggle/input/obergefell-v-hodges/Sumary.pdf"]
question = "What were the main disagreements between the majority opinion and the dissenting justices in Obergefell v. Hodges?"
answer = answer_from_pdfs(pdfs, question)
print(" Final Answer:\n", answer)


 Reading multiple PDFs...
 Building FAISS index...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 Question: What were the main disagreements between the majority opinion and the dissenting justices in Obergefell v. Hodges?

 Retrieved Context Snippet:

*Bold sentences give the big idea of the ex cerpt and are not a part of the primary source.  
 
 
 
 
Obergefell  v. Hodges  
U.S. Supreme Court Opinion and Dissents Summary  
 
The Supreme Court (Justice Kennedy writing for himself and Justices Ginsburg, Breyer, 
Sotomayor, and Kagan) held that the right to marry is a fundamental right that is inherent in the
ROBERT A. KOCH  
ABBY C. WRIGHT  
JEFFREY E. SANDBERG  
Attorneys  
MARCH  2015 
 
CONSTITUTION 101  
Module 14: Battles for Freedom and  ...

 Generating answer...

 Final Answer:
 The main disagreements between the majority opinion and the dissenting justices in Obergefell v. Hodges were over the interpretation of the Fourteenth Amendment's Due Process Clause and Equal Protection Clause. The majority held that the right to marry is a fundamental right under the Due Process C