# **Install Library**

In [1]:
from IPython.display import clear_output


In [2]:
!pip install torch
!pip install tiktoken
!pip install verovio
!pip install transformers
!pip install accelerate
!pip install python-docx

clear_output()

# **Import Library**

In [3]:
import cv2
import re
from google.colab.patches import cv2_imshow
import os
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output
from docx import Document

# **Input Token**

In [None]:
from transformers import AutoModel, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
model = model.eval().cuda()


# input your test image
image_file = '/content/ktp.jpg'

# plain texts OCR
res = model.chat(tokenizer, image_file, ocr_type='format')

# format texts OCR:
# res = model.chat(tokenizer, image_file, ocr_type='format')

# fine-grained OCR:
# res = model.chat(tokenizer, image_file, ocr_type='ocr', ocr_box='')
# res = model.chat(tokenizer, image_file, ocr_type='format', ocr_box='')
# res = model.chat(tokenizer, image_file, ocr_type='ocr', ocr_color='')
# res = model.chat(tokenizer, image_file, ocr_type='format', ocr_color='')

# multi-crop OCR:
# res = model.chat_crop(tokenizer, image_file, ocr_type='ocr')
# res = model.chat_crop(tokenizer, image_file, ocr_type='format')

# render the formatted OCR results:
# res = model.chat(tokenizer, image_file, ocr_type='format', render=True, save_render_file = './demo.html')

print(res)


# **Tripe Brosur**

In [None]:
import cv2
import os
from docx import Document
from google.colab import files


os.makedirs('/content/split_images', exist_ok=True)
output_dir = '/content/split_images'

def split_image(image_path):
    img = cv2.imread(image_path)
    height, width, _ = img.shape
    third_width = width // 3
    img_parts = [img[:, i*third_width:(i+1)*third_width] for i in range(3)]
    return img_parts

def save_image_parts(img_parts):
    part_paths = []
    for i, img_part in enumerate(img_parts):
        part_path = f'/content/split_images/part_{i+1}.jpg'
        cv2.imwrite(part_path, img_part)
        part_paths.append(part_path)
    return part_paths

def perform_ocr_on_parts(part_paths):
    combined_ocr_result = ""
    for part_path in part_paths:
        res = model.chat(tokenizer, part_path, ocr_type='format',
                         render=True, save_render_file='./result.html')
        combined_ocr_result += res + "\n\n"
    return combined_ocr_result



def process_brochure(image_path):
    img_parts = split_image(image_path)
    part_paths = save_image_parts(img_parts)
    combined_result = perform_ocr_on_parts(part_paths)
    return combined_result

def save_to_txt(file_path, content):
    with open(file_path, 'w') as f:
        f.write(content)
    print(f"Results have been saved to {file_path}")

def save_to_word(file_path, content):
    document = Document()
    document.add_paragraph(content)
    document.save(file_path)
    print(f"Results have been saved to {file_path}")


uploaded = files.upload()
image_file = list(uploaded.keys())[0]

ocr_result = process_brochure(image_file)


txt_file_path = '/content/ocr_result.txt'
save_to_txt(txt_file_path, ocr_result)

word_file_path = '/content/ocr_result.docx'
save_to_word(word_file_path, ocr_result)

print(ocr_result)


# **One Browsur**

In [None]:
from google.colab import files
uploaded = files.upload()
image_file = list(uploaded.keys())[0]


def perform_ocr(image_file):
    return model.chat(tokenizer, image_file, ocr_type='format', render=True, save_render_file='./result.html')


ocr_result = perform_ocr(image_file)
with open('/content/ocr_result.txt', 'w') as f:
    f.write(ocr_result)

doc = Document()
doc.add_paragraph(ocr_result)
doc.save('/content/ocr_result.docx')



print(ocr_result)


In [None]:
os.makedirs('/content/split_images', exist_ok=True)
output_dir = '/content/split_images'

def split_image(image_path):
    img = cv2.imread(image_path)
    height, width, _ = img.shape
    third_width = width // 3
    img_parts = [img[:, i*third_width:(i+1)*third_width] for i in range(3)]
    return img_parts

def save_image_parts(img_parts):
    part_paths = []
    for i, img_part in enumerate(img_parts):
        part_path = f'/content/split_images/part_{i+1}.jpg'
        cv2.imwrite(part_path, img_part)
        part_paths.append(part_path)
    return part_paths

def perform_ocr_on_parts(part_paths):
    combined_html_content = "<html><head><title>Combined OCR Result</title></head><body>"

    for part_path in part_paths:
        res = model.chat(tokenizer, part_path, ocr_type='format', render=True, save_render_file=f'/content/result_{os.path.basename(part_path)}.html')
        print("OCR Result for part:", part_path)

        html_path = f'/content/result_{os.path.basename(part_path)}.html'

        if os.path.exists(html_path):
            with open(html_path, 'r') as html_file:
                html_content = html_file.read()
                body_content_match = re.search(r"<body[^>]*>(.*?)</body>", html_content, re.DOTALL)
                if body_content_match:
                    body_content = body_content_match.group(1).strip()
                    combined_html_content += body_content + "<br><br>"


    combined_html_content += "</body></html>"

    with open('/content/combined_result.html', 'w') as f:
        f.write(combined_html_content)

    return combined_html_content

def process_brochure(image_path):
    img_parts = split_image(image_path)
    part_paths = save_image_parts(img_parts)
    combined_result = perform_ocr_on_parts(part_paths)
    return combined_result

def save_to_txt(file_path, content):
    with open(file_path, 'w') as f:
        f.write(content)
    print(f"Results have been saved to {file_path}")

def save_to_word(file_path, content):
    document = Document()
    document.add_paragraph(content)
    document.save(file_path)
    print(f"Results have been saved to {file_path}")

uploaded = files.upload()
image_file = list(uploaded.keys())[0]


ocr_result = process_brochure(image_file)
txt_file_path = '/content/ocr_result.txt'
save_to_txt(txt_file_path, ocr_result)

word_file_path = '/content/ocr_result.docx'
save_to_word(word_file_path, ocr_result)

print(ocr_result)


files.download('/content/combined_result.html')
