In [None]:
import os
import io
from google.cloud import vision
from PIL import Image, ImageEnhance, ImageFilter
import numpy as np
from vertexai.preview.generative_models import GenerativeModel, Part
from time import time

# 設置 Google Cloud 認證
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'gcp.json檔'

def detect_text_orientation(path):
    """Detects text in the file and returns the rotation angle."""
    client = vision.ImageAnnotatorClient()

    with io.open(path, 'rb') as image_file:
        content = image_file.read()

    image = vision.Image(content=content)
    response = client.document_text_detection(image=image)
    document = response.full_text_annotation

    # Assume the first block contains the main text orientation
    vertices = document.pages[0].blocks[0].bounding_box.vertices
    angle = get_rotation_angle(vertices)
    return angle

def get_rotation_angle(vertices):
    """Calculates the rotation angle based on bounding box vertices."""
    dx = vertices[1].x - vertices[0].x
    dy = vertices[1].y - vertices[0].y
    angle = np.degrees(np.arctan2(dy, dx))
    return angle

def rotate_crop_and_enlarge_image(path, angle, scale_factor=4):
    """Rotates, crops, and enlarges the image."""
    image = Image.open(path)
    rotated_image = image.rotate(-angle, expand=True)

    width, height = rotated_image.size
    left = 0
    top = 0
    right = width // 2
    bottom = height

    cropped_image = rotated_image.crop((left, top, right, bottom))
    
    # 打印調試信息
    print(f"Original cropped size: {cropped_image.size}")
    
    # 放大圖片
    new_size = (int(cropped_image.width * scale_factor), int(cropped_image.height * scale_factor))
    enlarged_image = cropped_image.resize(new_size, Image.LANCZOS)
    
    # 打印調試信息
    print(f"New size: {enlarged_image.size}")

    # 增強圖像
    enhanced_image = enhance_image(enlarged_image)

    return enhanced_image

def enhance_image(image):
    """Enhances the image by increasing sharpness, contrast, and brightness."""
    enhancer = ImageEnhance.Sharpness(image)
    image = enhancer.enhance(2.0)  # Increase sharpness

    enhancer = ImageEnhance.Contrast(image)
    image = enhancer.enhance(1.5)  # Increase contrast

    enhancer = ImageEnhance.Brightness(image)
    image = enhancer.enhance(1.2)  # Increase brightness

    return image

def process_image(image_path):
    angle = detect_text_orientation(image_path)
    enlarged_image = rotate_crop_and_enlarge_image(image_path, angle)
    
    # 直接返回處理過的圖像數據而不保存
    return enlarged_image

# 設置圖像路徑
image_path = '讀取圖片路徑'

# 處理圖像
processed_image = process_image(image_path)

# 保存到內存中的字節數據
image_byte_array = io.BytesIO()
processed_image.save(image_byte_array, format='JPEG')
image_data = image_byte_array.getvalue()

# GEMINI 模型部分
model = GenerativeModel("gemini-1.5-pro-preview-0409")  # model name may vary

generation_config = {
    "max_output_tokens": 1000,
    "temperature": 0,
    "top_k": 1,  # 限制候選 tokens 為機率最高的 top_k 個
    "top_p": 0.75  # 限制候選 tokens 為加總機率 (從機率機率開始) 達到 top_p 的 tokens
}

prompt = "只要辨識圖片數值，遵循規則: 1.去雜質化 2.檢驗項目換行並對齊 3.不要取範圍值"

data = Part.from_data(data=image_data, mime_type='image/jpeg')

start = time()

r = model.generate_content(
    [prompt, data],
    generation_config=generation_config
)

print(f'{time()-start:.3f} secs elapsed')
print(r.text.strip())
print(r.usage_metadata)
