cell 1

In [None]:
!pip install -q transformers sentence-transformers pdfminer.six requests

cell 2

In [None]:
import warnings
warnings.filterwarnings("ignore", message=".*encoder_attention_mask.*")

from sentence_transformers import SentenceTransformer, util
from pdfminer.high_level import extract_text
import numpy as np
import requests
import json
import re
import os
import gc
import torch

cell 3

In [None]:
TOGETHER_API_KEY = "tgp_v1_imcJRFUCBAH6XwMoMxhWX0O3nlx8bfQJdKFJmi9oehw"
API_URL = "https://api.together.xyz/v1/completions"
headers = {
    "Authorization": f"Bearer {TOGETHER_API_KEY}",
    "Content-Type": "application/json"
}

def query_llama(prompt, max_tokens=500, temperature=0.1):
    payload = {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "prompt": prompt,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "stop": ["<|eot_id|>", "<|end_of_text|>"],
        "top_p": 0.7
    }
    try:
        response = requests.post(API_URL, headers=headers, json=payload)
        response.raise_for_status()
        return response.json()['choices'][0]['text'].strip()
    except Exception as e:
        print(f"API Error: {str(e)}")
        return f"API Error: {str(e)}"

cell 4

In [None]:
# text extraction and chunking
def extract_text_from_pdf(pdf_path):
    return extract_text(pdf_path)

def chunk_text(text, max_words=400):
    """Efficient text chunking preserving sentence boundaries"""
    sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
    chunks = []
    current_chunk = []
    word_count = 0

    for sentence in sentences:
        words = sentence.split()
        if word_count + len(words) <= max_words:
            current_chunk.append(sentence)
            word_count += len(words)
        else:
            chunks.append(" ".join(current_chunk))
            current_chunk = [sentence]
            word_count = len(words)

    if current_chunk:
        chunks.append(" ".join(current_chunk))

    return chunks

cell 5

In [None]:
# document processing with GPU acceleration
embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device='cuda')

def create_document_db(pdf_paths):
    document_db = []
    for path in pdf_paths:
        filename = os.path.basename(path)
        print(f"Processing {filename}...")
        try:
            text = extract_text_from_pdf(path)
            chunks = chunk_text(text)

            # batch processing for memory efficiency
            batch_size = 64
            embeddings = []
            for i in range(0, len(chunks), batch_size):
                batch = chunks[i:i+batch_size]
                batch_embeddings = embedding_model.encode(
                    batch,
                    convert_to_tensor=True,
                    show_progress_bar=False,
                    batch_size=32
                )
                embeddings.append(batch_embeddings.cpu())
                del batch_embeddings
                torch.cuda.empty_cache()

            document_db.append({
                "path": path,
                "filename": filename,
                "chunks": chunks,
                "embeddings": torch.cat(embeddings)
            })
            print(f"Processed {len(chunks)} chunks from {filename}")
        except Exception as e:
            print(f"Error processing {filename}: {str(e)}")
        gc.collect()
    return document_db

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


cell 6

In [None]:
# semantic search with GPU optimization
def find_relevant_clauses(query, document_db, top_k=5):
    query_embedding = embedding_model.encode(query, convert_to_tensor=True)
    results = []

    for doc in document_db:
        # process in batches to save memory
        batch_size = 100
        for i in range(0, len(doc["chunks"]), batch_size):
            chunk_batch = doc["chunks"][i:i+batch_size]
            embedding_batch = doc["embeddings"][i:i+batch_size].to('cuda')

            similarities = util.pytorch_cos_sim(query_embedding, embedding_batch)[0]
            top_local_indices = similarities.topk(min(top_k, len(similarities))).indices

            for idx in top_local_indices:
                results.append({
                    "document": doc["filename"],
                    "text": chunk_batch[idx],
                    "score": similarities[idx].item()
                })

            # cleanup
            del embedding_batch, similarities
            torch.cuda.empty_cache()

    # sort and return top results
    results.sort(key=lambda x: x["score"], reverse=True)
    return results[:top_k]

cell 7

In [None]:
# decision generation with JSON output
def generate_decision_response(query, relevant_clauses):
    if not relevant_clauses:
        return {"error": "No relevant clauses found"}

    clauses_text = "\n\n".join([f"Document: {clause['document']}\nClause: {clause['text']}"
                              for clause in relevant_clauses])

    # Llama-4 Maverick prompt
    prompt = f"""
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an insurance claim analyst. Evaluate this claim based EXCLUSIVELY on the provided policy clauses.
Return ONLY VALID JSON with these keys:
- "decision" (string: "approved", "rejected", or "needs_review")
- "amount" (number or null)
- "justification" (string)
- "clause_references" (list of document filenames)

IMPORTANT:
1. If information is missing, return "needs_review"
2. Reference EXACT document filenames
3. Amount must be null if not applicable
4. Output ONLY JSON with no additional text<|eot_id|>
<|start_header_id|>user<|end_header_id|>
## CLAIM DETAILS:
{query}

## RELEVANT POLICY CLAUSES:
{clauses_text}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
"""

    print("Sending request to LLM...")
    response = query_llama(prompt)

    # handle API errors
    if "API Error" in response:
        return {"error": response}

    # extract JSON from response
    try:
        # find JSON in response
        json_start = response.find('{')
        json_end = response.rfind('}') + 1

        if json_start == -1 or json_end == 0:
            return {"error": "No JSON found in response", "raw_response": response}

        json_str = response[json_start:json_end]
        result = json.loads(json_str)

        # validate required fields
        required_keys = ["decision", "justification", "clause_references"]
        if not all(key in result for key in required_keys):
            return {"error": "Missing required JSON fields", "raw_response": response}

        return result
    except Exception as e:
        return {"error": f"JSON parsing failed: {str(e)}", "raw_response": response}

cell 8

In [None]:
# processing pipeline
def process_claim(query, pdf_paths):
    print("Building document database...")
    document_db = create_document_db(pdf_paths)

    if not document_db:
        return {"error": "No valid documents processed"}

    print("Finding relevant clauses...")
    relevant_clauses = find_relevant_clauses(query, document_db)

    print(f"Found {len(relevant_clauses)} relevant clauses")
    print("Generating decision...")
    result = generate_decision_response(query, relevant_clauses)

    # cleanup GPU memory
    del document_db
    torch.cuda.empty_cache()
    gc.collect()

    return result

cell 9

In [None]:
# upload
from google.colab import files

print("Please upload your policy documents (PDFs):")
uploaded = files.upload()
pdf_paths = list(uploaded.keys())

print("\nUploaded documents:")
for path in pdf_paths:
    size_mb = os.path.getsize(path) / (1024 * 1024)
    print(f"- {os.path.basename(path)} ({size_mb:.2f} MB)")

Please upload your policy documents (PDFs):


Saving doc1.pdf to doc1 (5).pdf

Uploaded documents:
- doc1 (5).pdf (1.33 MB)


cell 10

In [None]:
# prompt
user_query = input("\nEnter your query (eg : '46M, knee surgery, Pune, 3-month policy'):\n")

print("\nProcessing claim...")
result = process_claim(user_query, pdf_paths)


print("\n" + "="*60)
print("CLAIM PROCESSING RESULT")
print("="*60)

if "error" in result:
    print(f"ERROR: {result['error']}")
    if "raw_response" in result:
        print("\nLLM Raw Response:")
        print(result["raw_response"][:1000] + "..." if len(result["raw_response"]) > 1000 else result["raw_response"])
else:

    amount = result.get('amount', 'N/A')
    if amount is None:
        amount = "Not Applicable"


    justification = result.get('justification', 'N/A')
    wrapped_justification = '\n'.join([justification[i:i+80] for i in range(0, len(justification), 80)])


    references = ', '.join(result.get('clause_references', []))


    print(f"DECISION: {result.get('decision', 'N/A').upper()}")
    print(f"\nAMOUNT: {amount}")
    print("\nJUSTIFICATION:")
    print(wrapped_justification)
    print(f"\nCLAUSE REFERENCES: {references}")

print("="*60)


Enter your query (eg : '46M, knee surgery, Pune, 3-month policy'):
46M, knee surgery, Pune, 3-month policy

Processing claim...
Building document database...
Processing doc1 (5).pdf...
Processed 79 chunks from doc1 (5).pdf
Finding relevant clauses...
Found 5 relevant clauses
Generating decision...
Sending request to LLM...

CLAIM PROCESSING RESULT
DECISION: APPROVED

AMOUNT: Not Applicable

JUSTIFICATION:
Knee surgery is covered under the policy as per clause 312 'Revision/Removal of 
Knee cap' listed in doc1 (5).pdf.

CLAUSE REFERENCES: doc1 (5).pdf
