<a href="https://colab.research.google.com/github/SamratChakraborty99/Python/blob/main/Document_from_Samrat_Chakraborty.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install google-genai pillow PyMuPDF

In [None]:
GEMINI_API_KEY = 'your api key here'
BASE_SYSTEM_PROMPT = """You are a Translatr with knowledge in translating text from one language to another.Your task is to translate the provided text from the source language to englih while maintaining the original meaning and context. Ensure that the translation is accurate, fluent, and culturally appropriate."""
import openai
import os
import json

In [None]:
import fitz  # PyMuPDF
from PIL import Image
import io
from typing import List
from IPython.display import display

def pdf_binary_to_images(pdf_binary_data: bytes) -> List[Image.Image]:
    try:
        doc = fitz.open(stream=pdf_binary_data, filetype="pdf")
        images = []
        for page_num in range(len(doc)):
            # Load the page
            page = doc.load_page(page_num)
            pix = page.get_pixmap()
            img_data = pix.tobytes("png")
            img = Image.open(io.BytesIO(img_data))
            images.append(img)
        doc.close()
        return images

    except Exception as e:
        raise Exception(f"Error processing PDF: {str(e)}")

image_list = pdf_binary_to_images(open("3bfbcb39-20df-41d4-b129-850889f85635.pdf", "rb").read())

In [None]:
# To run this code you need to install the following dependencies:
# pip install google-genai pillow

import base64
import os
import json
from typing import Union, Optional
from PIL import Image
import io
from google import genai
from google.genai import types


def analyze_document_with_gemini(
    question: str,
    system_prompt: str,
    document: Union[str, Image.Image, bytes],
    return_json: bool = False,
    model: str = "gemini-2.0-flash-exp",
    max_tokens: Optional[int] = None,
    api_key: Optional[str] = None
) -> Union[str, dict]:
    """
    Analyze documents (images, PDFs, etc.) using Google Gemini AI.

    Args:
        question: The question or prompt to ask about the document
        system_prompt: System instructions for the AI
        document: Document input - can be file path (str), PIL Image, or raw bytes
        return_json: Whether to return response as JSON
        model: Gemini model to use
        max_tokens: Maximum tokens in response (optional)
        api_key: Gemini API key (optional, will use GEMINI_API_KEY env var if not provided)

    Returns:
        String response or JSON dict depending on return_json parameter
    """

    def encode_document(doc_data):
        """Encode document data to base64 and determine MIME type."""
        mime_type = "application/octet-stream"  # Default

        if isinstance(doc_data, str):
            # Assume it's a file path
            with open(doc_data, "rb") as file:
                data = file.read()

            # Determine MIME type based on file extension
            if doc_data.lower().endswith('.pdf'):
                mime_type = "application/pdf"
            elif doc_data.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp')):
                mime_type = f"image/{doc_data.split('.')[-1].lower()}"
                if mime_type == "image/jpg":
                    mime_type = "image/jpeg"

            return base64.b64encode(data).decode('utf-8'), mime_type

        elif isinstance(doc_data, Image.Image):
            # PIL Image object
            buffer = io.BytesIO()
            doc_data.save(buffer, format='PNG')
            data = buffer.getvalue()
            return base64.b64encode(data).decode('utf-8'), "image/png"

        elif isinstance(doc_data, bytes):
            # Raw bytes - assume PDF for now, but could be enhanced
            return base64.b64encode(doc_data).decode('utf-8'), "application/pdf"

        else:
            raise ValueError("Unsupported document format. Use file path, PIL Image, or bytes.")

    try:
        # Get API key
        if api_key is None:
            api_key = GEMINI_API_KEY

        if not api_key:
            raise ValueError("API key not provided. Set GEMINI_API_KEY environment variable or pass api_key parameter.")

        # Initialize client
        client = genai.Client(api_key=api_key)

        # Encode the document
        base64_document, mime_type = encode_document(document)

        # Prepare system instruction
        system_instruction = system_prompt
        if return_json:
            system_instruction += "\n\nPlease format your response as valid JSON."

        # Prepare content
        contents = [
            types.Content(
                role="user",
                parts=[
                    types.Part.from_bytes(
                        mime_type=mime_type,
                        data=base64.b64decode(base64_document),
                    ),
                    types.Part.from_text(text=question),
                ],
            ),
        ]

        # Configure generation
        generate_content_config = types.GenerateContentConfig(
            max_output_tokens=64000,
            response_mime_type="application/json" if return_json else "text/plain",
            system_instruction=[
                types.Part.from_text(text=system_instruction),
            ],
        )

        # Add max_tokens if specified
        if max_tokens:
            generate_content_config.max_output_tokens = max_tokens

        # Generate response
        response = client.models.generate_content(
            model=model,
            contents=contents,
            config=generate_content_config,
        )
        print(system_instruction)
        print(question)

        # Extract the response content
        answer = response.text

        # Return as JSON if requested
        if return_json:
            try:
                return json.loads(answer)
            except json.JSONDecodeError as e:
                # If response isn't valid JSON, wrap it in a JSON structure
                return {"answer": answer, "error": "Response was not valid JSON: " + str(e)}

        return answer

    except Exception as e:
        error_msg = f"Error analyzing document: {str(e)}"
        if return_json:
            return {"error": error_msg}
        return error_msg


# Example usage
if __name__ == "__main__":
    # Example 1: Analyze a PDF file
    try:
        result = analyze_document_with_gemini(
            question='Translate the text in the attached image into English. exactly as it is line by line. in this json format [{"question_no": int, "question": str, "options": list[str], "answer": str}, ...]',
            system_prompt=BASE_SYSTEM_PROMPT,
            document=image_list[0],  # Use the first image from the PDF
            return_json=True
        )
        print("JSON Result:", result)
    except Exception as e:
        print(f"Error: {e}")

You are a Translatr with knowledge in translating text from one language to another.Your task is to translate the provided text from the source language to englih while maintaining the original meaning and context. Ensure that the translation is accurate, fluent, and culturally appropriate.

Please format your response as valid JSON.
Translate the text in the attached image into English. exactly as it is line by line. in this json format [{"question_no": int, "question": str, "options": list[str], "answer": str}, ...]
JSON Result: [{'question_no': 1, 'question': 'What is the characteristic of acoelomata?', 'options': ['Absence of mesoderm', 'Presence of mesoderm', 'Coelom which is incompletely surrounded by mesoderm', 'Solid body without coelom around internal organs'], 'answer': 'Absence of mesoderm'}, {'question_no': 2, 'question': 'Salamander belongs to which class?', 'options': ['Pisces', 'Birds', 'Reptiles', 'Amphibian'], 'answer': 'Amphibian'}, {'question_no': 3, 'question': 'Fla

In [None]:
# print(result['answer'])
result

[{'question_no': 1,
  'question': 'What is the characteristic of acoelomata?',
  'options': ['Absence of mesoderm',
   'Presence of mesoderm',
   'Coelom which is incompletely surrounded by mesoderm',
   'Solid body without coelom around internal organs'],
  'answer': 'Absence of mesoderm'},
 {'question_no': 2,
  'question': 'Salamander belongs to which class?',
  'options': ['Pisces', 'Birds', 'Reptiles', 'Amphibian'],
  'answer': 'Amphibian'},
 {'question_no': 3,
  'question': 'Flame cells are excretory structures for which of the following?',
  'options': ['Annelida', 'Coelenterata', 'Platyhelminthes', 'Echinodermata'],
  'answer': 'Platyhelminthes'},
 {'question_no': 4,
  'question': 'Phylum Porifera has been classified based on this:',
  'options': ['In branches', 'Symmetry', 'Skeleton', 'Reproduction'],
  'answer': 'Skeleton'},
 {'question_no': 5,
  'question': 'Spongocoel canal system develops due to whom?',
  'options': ['Choanocytes',
   'Gastrovascular system',
   'Reproducti