In [None]:
!pip install boto3

In [36]:
import google.generativeai as genai
from google.colab import userdata
import os
import cv2
import numpy as np
from PIL import Image
import io


API_KEY = userdata.get('API_KEY2')

In [38]:
# --- Configuration ---
# Replace with your actual Gemini API key.
genai.configure(api_key=API_KEY)
image_path = "/content/53e2c7ad-b789-4ab9-be1e-466a8cdaa1a0 (1).jpg"  # Path to your image

# --- Step 1: Load the image using OpenCV ---
# Note: OpenCV loads images in BGR format.
original_image = cv2.imread(image_path)
if original_image is None:
    raise FileNotFoundError(f"Image file not found at path: {image_path}")

# --- Step 2: Convert to Grayscale ---
gray = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)

# --- Step 3: Binarization using Adaptive Thresholding ---
# This step converts the image to pure black and white.
binarized = cv2.adaptiveThreshold(
    gray,
    maxValue=255,
    adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
    thresholdType=cv2.THRESH_BINARY,
    blockSize=11,
    C=2
)

# --- Step 4: Noise Removal using Median Blur ---
# Blurring helps remove small artifacts.
denoised = cv2.medianBlur(binarized, ksize=3)

# --- Step 5: Skew Correction ---
# Find coordinates of non-zero pixels and compute the minimum area rectangle.
coords = np.column_stack(np.where(denoised > 0))
angle = cv2.minAreaRect(coords)[-1]
# Adjust the angle as needed.
if angle < -45:
    angle = -(90 + angle)
else:
    angle = -angle

# Get image center and compute rotation matrix.
(h, w) = denoised.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(denoised, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)

# --- Step 6: Contrast Enhancement using Histogram Equalization ---
enhanced = cv2.equalizeHist(rotated)

# --- Step 7: Morphological Operations (Dilation/Erosion) ---
# Define a small kernel for morphological transformation.
kernel = np.ones((1, 1), np.uint8)
processed = cv2.morphologyEx(enhanced, cv2.MORPH_CLOSE, kernel)

# --- Optional: Save or display the preprocessed image for debugging ---
# Uncomment the lines below to visualize the result using OpenCV.
# cv2.imshow("Processed Image", processed)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# --- Step 8: Convert the preprocessed image to a format for Gemini API ---
# Convert the OpenCV image (numpy array) to a PIL Image.
processed_pil = Image.fromarray(processed)

# Save the PIL image to a bytes buffer.
buffered = io.BytesIO()
# Use JPEG format; you can adjust this as needed.
processed_pil.save(buffered, format="JPEG")
img_bytes = buffered.getvalue()

new_prompt = (
    "You are an advanced text extraction and summarization engine with a strategic mindset. "
    "I will provide you with an image containing a news article. Your task is to first extract all the text from the image exactly as it appears, preserving line breaks, punctuation, and spacing. Then create a concise and precise summary that captures the core information of the news. Next, compose an intense and motivational military dialogue that amplifies the summary and inspires determination, valor and strategic resolve in a manner akin to an elite military briefing. "
    "The final output must be a single coherent paragraph that seamlessly integrates the summarized news with the motivational military dialogue. Do not include the full extracted text and do not add any extra formatting characters such as numbers or asterisks; only use regular characters, punctuation and spaces."
)


# --- Step 9: Prepare the Gemini API Request ---
contents = [
    {
        "parts": [
            new_prompt,
            {"mime_type": "image/jpeg", "data": img_bytes},
        ]
    },
]

# --- Step 10: Generate Content (Extract Text via OCR) ---
model = genai.GenerativeModel('gemini-1.5-flash')
response = model.generate_content(contents)

# --- Step 11: Handle the Response ---
if response.prompt_feedback and response.prompt_feedback.block_reason:
    print(f"Error: Prompt was blocked due to: {response.prompt_feedback.block_reason}")
    print(f"Safety ratings: {response.prompt_feedback.safety_ratings}")
else:
    print("Extracted Text (OCR):\n")
    print(response.text)


Extracted Text (OCR):

The Indian government extended two crop insurance schemes until FY26, increasing their allocation to ₹69,515 crore.  An additional ₹3,850 crore subsidy for DAP fertilizer was approved to maintain retail prices.  Prime Minister Modi emphasized these decisions' dedication to enhancing farmers' prosperity.  Soldiers, the fate of our farmers rests on our shoulders! This is not a drill; this is a fight for the economic well-being of our nation. We will secure the supply chain and ensure affordable fertilizer for every farmer. We will adapt to global market volatility, we will overcome logistical challenges, and we will deliver!  Our resolve will ensure the prosperity of our farmers, bolstering our nation's strength and securing our future. Our mission is clear, our commitment unwavering. Execute the plan flawlessly!



In [39]:
import boto3

from google.colab import userdata


# Set AWS credentials
aws_access_key = userdata.get('aws1')
aws_secret_key = userdata.get('aws2')
aws_region = "us-east-1"  # Change region if needed

# Initialize Polly client
polly = boto3.client(
    "polly",
    aws_access_key_id=aws_access_key,
    aws_secret_access_key=aws_secret_key,
    region_name=aws_region
)

# Plain text example (Neural TTS)
response = polly.synthesize_speech(
    Engine="neural",              # 'neural' or 'standard' are valid
    LanguageCode="en-US",         # optional but good to specify
    VoiceId="Gregory",            # or any other supported neural voice
    OutputFormat="mp3",
    Text= response.text
)

# Save the audio file
with open("RAH.mp3", "wb") as file:
    file.write(response["AudioStream"].read())

print("Audio saved as Gregory.mp3")


Audio saved as Gregory.mp3
