In [None]:
#  Install necessary packages
!pip install -q google-generativeai python-magic
!sudo apt-get install -y libmagic1
!pip install openpyxl
!pip install PyMuPDF


In [None]:
# Set your API Key securely
import os

# Replace this with your actual Gemini API key from https://makersuite.google.com/app/apikey
GOOGLE_API_KEY = "enter your api"
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY


In [5]:
# Authenticate with Gemini and import required libraries
import google.generativeai as genai
from google.generativeai.types import content_types
import mimetypes
from pathlib import Path

genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

# Load Gemini Pro Vision (multimodal model for OCR)
model = genai.GenerativeModel(model_name="gemini-pro-vision")




In [None]:
from google.colab import files
from pathlib import Path
import mimetypes
import google.generativeai as genai
import time # For checking file upload status

# Configure your Gemini API key (replace with your actual API key or environment variable)
# genai.configure(api_key="YOUR_API_KEY")

#  Upload your document file (PDF, JPG, PNG, etc.)
uploaded = files.upload()
file_name = next(iter(uploaded))
file_path = Path(file_name)

#  Determine MIME type
mime_type, _ = mimetypes.guess_type(file_path)

# Upload the file to Gemini and get its URI
print(f"Uploading {file_name} as {mime_type}...")
try:
    # Use a unique name for the file to avoid conflicts
    # For example, appending a timestamp: f"{file_path.stem}-{int(time.time())}{file_path.suffix}"
    uploaded_file = genai.upload_file(
        path=str(file_path),
        display_name=f"{file_path.stem}_uploaded", # A human-readable name
        mime_type=mime_type
    )
    print(f"File uploaded: {uploaded_file.uri}")

    # Wait for the file to be processed
    while uploaded_file.state.name == "PROCESSING":
        print("Waiting for file to be processed...", end="")
        time.sleep(5)
        uploaded_file = genai.get_file(uploaded_file.name)

    if uploaded_file.state.name == "FAILED":
        raise ValueError(f"File processing failed: {uploaded_file.state.name}")

    # Now you can use the uploaded_file.uri in your prompt
    file_part = uploaded_file # The uploaded_file object can be directly used as a Part in the prompt
    print("File ready for use in prompt.")

    # Example of how to use it in a prompt:
    # model = genai.GenerativeModel('gemini-1.5-flash-latest')
    # response = model.generate_content([file_part, "What is in this document?"])
    # print(response.text)

except Exception as e:
    print(f"An error occurred during file upload or processing: {e}")



In [None]:
import google.generativeai as genai
# Assuming uploaded_file is available from the previous successful upload code

# 1. Initialize the Gemini Model
# Choose an appropriate model that supports multimodal input
# 'gemini-1.5-flash-latest' is generally good for speed and cost-effectiveness
# 'gemini-1.5-pro-latest' offers more advanced reasoning
model = genai.GenerativeModel('gemini-1.5-flash-latest')

# 2. Construct Your Prompt
# Your prompt can be a mix of text and the uploaded file.
# The uploaded_file object from genai.upload_file can be directly included.
prompt_parts = [
    uploaded_file, # This is the object referencing your uploaded file
    "What is this document about? Summarize its key points in bullet points."
    # You can customize your prompt based on the file type:
    # For an image: "Describe this image in detail."
    # For a PDF: "Extract the main headings and their content from this PDF."
    # For a video: "What are the main events in this video and at what timestamps?"
]

# 3. Generate Content
print("\nGenerating content with the Gemini model...")
try:
    response = model.generate_content(prompt_parts)

    # Print the response
    print("\n--- Gemini's Response ---")
    print(response.text)

    # You can also access other attributes of the response, like safety ratings, etc.
    # print("\n--- Safety Ratings ---")
    # for rating in response.prompt_feedback.safety_ratings:
    #     print(f"Category: {rating.category.name}, Probability: {rating.probability.name}")

except Exception as e:
    print(f"An error occurred during content generation: {e}")

# Optional: Clean up the uploaded file if you no longer need it
# Files uploaded via genai.upload_file are automatically deleted after 48 hours,
# but you can delete them manually if you're done with them immediately.
# print(f"\nDeleting uploaded file: {uploaded_file.display_name}...")
# genai.delete_file(uploaded_file.name)
# print("File deleted.")



In [13]:
#  Function to Extract Text from PDF
import fitz  # PyMuPDF

def extract_text_from_pdf(file_path):
    text = ""
    doc = fitz.open(file_path)
    for page in doc:
        text += page.get_text()
    return text


In [14]:
def generate_summary_with_gemini(text):
    print(" Generating summary using dummy Gemini")
    return "Summary: " + text[:1000] + "..."

In [None]:
#  Full Process Execution
from datetime import datetime
import pandas as pd

# Gemini Summary Function (moved outside try block and properly indented)
def generate_summary_with_gemini(text):
    if not text.strip():
        return "No text extracted from document to summarize."
    try:
        print(" Generating summary from Gemini...")
        model = genai.GenerativeModel('gemini-1.5-flash-latest')
        prompt = [
            text[:3000],  # Truncate if needed
            "Please summarize the key points of this document in bullet points."
        ]
        response = model.generate_content(prompt)
        return response.text.strip()
    except Exception as e:
        return f"Error from Gemini: {e}"

#  Extract → Summarize → Store in Table
try:
    extracted_text = extract_text_from_pdf(file_path)
    summary = generate_summary_with_gemini(extracted_text)
    upload_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    data = {
        "File Name": [file_path.name],
        "Upload Time": [upload_time],
        "Extracted Text (first 1000 chars)": [extracted_text[:1000]],
        "Gemini Summary": [summary]
    }

    df = pd.DataFrame(data)
    print(" Final Table:")
    display(df)

except Exception as e:
    print(" Error occurred:", str(e))

