In [None]:
!pip install -U transformers

## Local Inference on GPU
Model page: https://huggingface.co/ibm-granite/granite-3.3-2b-instruct

⚠️ If the generated code snippets do not work, please open an issue on either the [model repo](https://huggingface.co/ibm-granite/granite-3.3-2b-instruct)
			and/or on [huggingface.js](https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/model-libraries-snippets.ts) 🙏

In [1]:
# ===============================
# 1. Install dependencies
# ===============================
!pip install transformers accelerate torch pypdf gradio

# ===============================
# 2. Import libraries
# ===============================
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from pypdf import PdfReader
import torch
import gradio as gr

# ===============================
# 3. Load Granite Model
# ===============================
model_name = "ibm-granite/granite-3.3-2b-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

nlp_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto"
)

# ===============================
# 4. Function to read PDF or TXT
# ===============================
def read_document(file_path):
    if file_path.endswith(".pdf"):
        reader = PdfReader(file_path)
        text = ""
        for page in reader.pages:
            text += page.extract_text() + "\n"
        return text
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

# ===============================
# 5. Analyzer Function
# ===============================
def analyze_legal_doc(doc, query):
    # Limit text length to avoid token overflow
    text = doc[:4000]
    prompt = f"""You are a legal assistant AI.
Document: {text}
Task: {query}"""

    response = nlp_pipeline(prompt, max_new_tokens=500, temperature=0.3, do_sample=False)
    return response[0]['generated_text']

# ===============================
# 6. Gradio Web App
# ===============================
def process_file(file, query):
    doc_text = read_document(file.name)
    return analyze_legal_doc(doc_text, query)

with gr.Blocks() as demo:
    gr.Markdown("## 📑 Legal Document Analyzer (Granite 3.3 2B Instruct)")
    file = gr.File(label="Upload Legal Document (PDF/TXT)")
    query = gr.Textbox(label="Enter your query", value="Summarize this legal document")
    output = gr.Textbox(label="Analysis Result")
    btn = gr.Button("Analyze")
    btn.click(process_file, inputs=[file, query], outputs=output)

demo.launch()

Collecting pypdf
  Downloading pypdf-6.0.0-py3-none-any.whl.metadata (7.1 kB)
Downloading pypdf-6.0.0-py3-none-any.whl (310 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.5/310.5 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-6.0.0


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/207 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/801 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/787 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

Device set to use cuda:0


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c3a3fd9443cd25b37c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


