### Text Extraction code for a single textbook page (a png)

In [None]:

import surya
import argparse
import json
from collections import defaultdict

from surya.input.langs import replace_lang_with_code, get_unique_langs
from surya.input.load import load_from_folder, load_from_file, load_lang_file
from surya.model.detection.segformer import load_model as load_detection_model, load_processor as load_detection_processor
from surya.model.recognition.model import load_model as load_recognition_model
from surya.model.recognition.processor import load_processor as load_recognition_processor
from surya.model.recognition.tokenizer import _tokenize
from surya.ocr import run_ocr
from surya.postprocessing.text import draw_text_on_image
from surya.settings import settings
import os

def main(input_path,
    results_dir,
    lang_file=None,
    langs='hi,en',
    max=None,
    start_page=None,
    images=False):

    if os.path.isdir(input_path):
        images, names = load_from_folder(input_path, max, start_page)
        folder_name = os.path.basename(input_path)
    else:
        images, names = load_from_file(input_path, max, start_page)
        folder_name = os.path.basename(input_path).split(".")[0]

    if lang_file:
        # We got all of our language settings from a file
        langs = load_lang_file(lang_file, names) 
        for lang in langs:
            replace_lang_with_code(lang)
        image_langs = langs
    else:
        # We got our language settings from the input
        langs = langs.split(",")
        replace_lang_with_code(langs)
        image_langs = [langs] * len(images)

    det_processor = load_detection_processor()
    det_model = load_detection_model()

    _, lang_tokens = _tokenize("", get_unique_langs(image_langs))
    rec_model = load_recognition_model(langs=lang_tokens) # Prune model moe layer to only include languages we need
    rec_processor = load_recognition_processor()

    result_path = os.path.join(results_dir, folder_name)
    os.makedirs(result_path, exist_ok=True)

    predictions_by_image = run_ocr(images, image_langs, det_model, det_processor, rec_model, rec_processor)

    if images:
        for idx, (name, image, pred, langs) in enumerate(zip(names, images, predictions_by_image, image_langs)):
            bboxes = [l.bbox for l in pred.text_lines]
            pred_text = [l.text for l in pred.text_lines]
            page_image = draw_text_on_image(bboxes, pred_text, image.size, langs, has_math="_math" in langs)
            page_image.save(f'./temp{idx}.jpg')
            page_image.save(os.path.join(result_path, f"{name}_{idx}_text.png"))

        out_preds = defaultdict(list)
        for name, pred, image in zip(names, predictions_by_image, images):
            out_pred = pred.model_dump()
            out_pred["page"] = len(out_preds[name]) + 1
            out_preds[name].append(out_pred)

        with open(os.path.join(result_path, "results.json"), "w+", encoding="utf-8") as f:
            json.dump(out_preds, f, ensure_ascii=False)

        print(f"Wrote results to {result_path}")

        # from PIL import Image, ImageDraw
        # indices = []
        # prev = -1
        # for idx in range(0, len(out_preds['img6'][0]['text_lines'])):
        #     x_axis = out_preds['img6'][0]['text_lines'][idx]['polygon'][0][0]
            
        #     if prev != -1:
        #         if x_axis > (prev + 5):
        #             indices.append(idx)
            
        #     prev = x_axis
            
        # str = ''
        # for idx in range(0, len(out_preds['img6'][0]['text_lines'])):
        #     if idx not in indices:
        #         str = str + out_preds['img6'][0]['text_lines'][idx]['text'] + ' '
        # print(str)
        


if __name__ == "__main__":
    input_path = '/Users/fajrzafar/Documents/Semester 8 /FYP 1/Books PNGS/FED2/page_4.png'
    results_dir = '/Users/fajrzafar/Documents/Semester 8 /FYP 1'
    main(input_path, results_dir)

### Text Extraction code for Grade 1 federal book (all pages)

In [1]:
import argparse
import json
from collections import defaultdict

from surya.input.langs import replace_lang_with_code, get_unique_langs
from surya.input.load import load_from_folder, load_from_file, load_lang_file
from surya.model.detection.segformer import load_model as load_detection_model, load_processor as load_detection_processor
from surya.model.recognition.model import load_model as load_recognition_model
from surya.model.recognition.processor import load_processor as load_recognition_processor
from surya.model.recognition.tokenizer import _tokenize
from surya.ocr import run_ocr
from surya.postprocessing.text import draw_text_on_image
from surya.settings import settings
import os
import csv 



def process_folder(input_folder, results_dir):
    # Get a list of all PNG files in the folder
    png_files = [f for f in os.listdir(input_folder) if f.endswith('.png')]

    for png_file in png_files:
        # Construct the full path to the PNG file
        png_path = os.path.join(input_folder, png_file)

        # Call the main function for each PNG file
        main(png_path, results_dir)


def main(input_path,
    results_dir,
    lang_file=None,
    langs='hi,en',
    max=None,
    start_page=None,
    images=False):

    if os.path.isdir(input_path):
        images, names = load_from_folder(input_path, max, start_page)
        folder_name = os.path.basename(input_path)
    else:
        images, names = load_from_file(input_path, max, start_page)
        folder_name = os.path.basename(input_path).split(".")[0]

    if lang_file:
        # We got all of our language settings from a file
        langs = load_lang_file(lang_file, names) 
        for lang in langs:
            replace_lang_with_code(lang)
        image_langs = langs
    else:
        # We got our language settings from the input
        langs = langs.split(",")
        replace_lang_with_code(langs)
        image_langs = [langs] * len(images)

    det_processor = load_detection_processor()
    det_model = load_detection_model()

    _, lang_tokens = _tokenize("", get_unique_langs(image_langs))
    rec_model = load_recognition_model(langs=lang_tokens) # Prune model moe layer to only include languages we need
    rec_processor = load_recognition_processor()

    result_path = os.path.join(results_dir, folder_name)
    os.makedirs(result_path, exist_ok=True)

    predictions_by_image = run_ocr(images, image_langs, det_model, det_processor, rec_model, rec_processor)

    if images:
        for idx, (name, image, pred, langs) in enumerate(zip(names, images, predictions_by_image, image_langs)):
            bboxes = [l.bbox for l in pred.text_lines]
            pred_text = [l.text for l in pred.text_lines]
            page_image = draw_text_on_image(bboxes, pred_text, image.size, langs, has_math="_math" in langs)
            page_image.save(f'./temp{idx}.jpg')
            page_image.save(os.path.join(result_path, f"{name}_{idx}_text.png"))

             # Save text to CSV file
            csv_file_path = os.path.join(result_path, f"{name}_{idx}_text.csv")
            with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
                writer = csv.writer(csv_file)
                writer.writerow(['Text'])
                for line in pred_text:
                    writer.writerow([line])


        out_preds = defaultdict(list)
        for name, pred, image in zip(names, predictions_by_image, images):
            out_pred = pred.model_dump()
            out_pred["page"] = len(out_preds[name]) + 1
            out_preds[name].append(out_pred)

        with open(os.path.join(result_path, "results.json"), "w+", encoding="utf-8") as f:
            json.dump(out_preds, f, ensure_ascii=False)

        print(f"Wrote results to {result_path}")

        # from PIL import Image, ImageDraw
        # indices = []
        # prev = -1
        # for idx in range(len(preds['text_lines'])):
        #     x_axis = preds['text_lines'][idx]['polygon'][0][0]
        #     if prev != -1 and x_axis > (prev + 5):
        #         indices.append(idx)
        #     prev = x_axis
            
        # text = ''
        # for idx in range(len(preds['text_lines'])):
        #     if idx not in indices:
        #         text += preds['text_lines'][idx]['text'] + ' '

        # print(f"Text for page {page_idx + 1}: {text}")
        


if __name__ == "__main__":
     input_folder = '/Users/fajrzafar/Documents/Semester 8 /FYP 1/Books PNGS/FED1'
     results_dir = '/Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED1'
     process_folder(input_folder, results_dir)

Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:24<00:00, 24.96s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:16<00:00, 16.05s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED1/page_2
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:29<00:00, 29.82s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:25<00:00, 25.23s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED1/page_3
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.70s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:33<00:00, 33.85s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED1/page_1
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.42s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:31<00:00, 31.87s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED1/page_4
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.73s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:47<00:00, 47.25s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED1/page_5
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.77s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [01:59<00:00, 119.20s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED1/page_7
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:28<00:00, 28.41s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:03<00:00, 63.21s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED1/page_6
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.92s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [04:09<00:00, 249.33s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED1/page_10
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.14s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:08<00:00, 128.23s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED1/page_11
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:25<00:00, 25.99s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:19<00:00, 79.10s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED1/page_13
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.92s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:33<00:00, 93.82s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED1/page_12
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.67s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:33<00:00, 93.34s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED1/page_14
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.55s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:54<00:00, 54.29s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED1/page_8
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.59s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:07<00:00, 127.54s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED1/page_9


### Text Extraction code for Grade 2 federal book (all pages)

In [2]:
import argparse
import json
from collections import defaultdict

from surya.input.langs import replace_lang_with_code, get_unique_langs
from surya.input.load import load_from_folder, load_from_file, load_lang_file
from surya.model.detection.segformer import load_model as load_detection_model, load_processor as load_detection_processor
from surya.model.recognition.model import load_model as load_recognition_model
from surya.model.recognition.processor import load_processor as load_recognition_processor
from surya.model.recognition.tokenizer import _tokenize
from surya.ocr import run_ocr
from surya.postprocessing.text import draw_text_on_image
from surya.settings import settings
import os
import csv 



def process_folder(input_folder, results_dir):
    # Get a list of all PNG files in the folder
    png_files = [f for f in os.listdir(input_folder) if f.endswith('.png')]

    for png_file in png_files:
        # Construct the full path to the PNG file
        png_path = os.path.join(input_folder, png_file)

        # Call the main function for each PNG file
        main(png_path, results_dir)


def main(input_path,
    results_dir,
    lang_file=None,
    langs='hi,en',
    max=None,
    start_page=None,
    images=False):

    if os.path.isdir(input_path):
        images, names = load_from_folder(input_path, max, start_page)
        folder_name = os.path.basename(input_path)
    else:
        images, names = load_from_file(input_path, max, start_page)
        folder_name = os.path.basename(input_path).split(".")[0]

    if lang_file:
        # We got all of our language settings from a file
        langs = load_lang_file(lang_file, names) 
        for lang in langs:
            replace_lang_with_code(lang)
        image_langs = langs
    else:
        # We got our language settings from the input
        langs = langs.split(",")
        replace_lang_with_code(langs)
        image_langs = [langs] * len(images)

    det_processor = load_detection_processor()
    det_model = load_detection_model()

    _, lang_tokens = _tokenize("", get_unique_langs(image_langs))
    rec_model = load_recognition_model(langs=lang_tokens) # Prune model moe layer to only include languages we need
    rec_processor = load_recognition_processor()

    result_path = os.path.join(results_dir, folder_name)
    os.makedirs(result_path, exist_ok=True)

    predictions_by_image = run_ocr(images, image_langs, det_model, det_processor, rec_model, rec_processor)

    if images:
        for idx, (name, image, pred, langs) in enumerate(zip(names, images, predictions_by_image, image_langs)):
            bboxes = [l.bbox for l in pred.text_lines]
            pred_text = [l.text for l in pred.text_lines]
            page_image = draw_text_on_image(bboxes, pred_text, image.size, langs, has_math="_math" in langs)
            page_image.save(f'./temp{idx}.jpg')
            page_image.save(os.path.join(result_path, f"{name}_{idx}_text.png"))

             # Save text to CSV file
            csv_file_path = os.path.join(result_path, f"{name}_{idx}_text.csv")
            with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
                writer = csv.writer(csv_file)
                writer.writerow(['Text'])
                for line in pred_text:
                    writer.writerow([line])


        out_preds = defaultdict(list)
        for name, pred, image in zip(names, predictions_by_image, images):
            out_pred = pred.model_dump()
            out_pred["page"] = len(out_preds[name]) + 1
            out_preds[name].append(out_pred)

        with open(os.path.join(result_path, "results.json"), "w+", encoding="utf-8") as f:
            json.dump(out_preds, f, ensure_ascii=False)

        print(f"Wrote results to {result_path}")

        # from PIL import Image, ImageDraw
        # indices = []
        # prev = -1
        # for idx in range(len(preds['text_lines'])):
        #     x_axis = preds['text_lines'][idx]['polygon'][0][0]
        #     if prev != -1 and x_axis > (prev + 5):
        #         indices.append(idx)
        #     prev = x_axis
            
        # text = ''
        # for idx in range(len(preds['text_lines'])):
        #     if idx not in indices:
        #         text += preds['text_lines'][idx]['text'] + ' '

        # print(f"Text for page {page_idx + 1}: {text}")
        


if __name__ == "__main__":
     input_folder = '/Users/fajrzafar/Documents/Semester 8 /FYP 1/Books PNGS/FED2'
     results_dir = '/Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED2'
     process_folder(input_folder, results_dir)

Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:24<00:00, 24.60s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:33<00:00, 33.62s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED2/page_2
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:25<00:00, 25.49s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:43<00:00, 43.33s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED2/page_3
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:25<00:00, 25.70s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [01:41<00:00, 101.68s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED2/page_1
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:25<00:00, 25.83s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:50<00:00, 170.65s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED2/page_4
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:25<00:00, 25.84s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [05:03<00:00, 303.55s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED2/page_5
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.05s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:10<00:00, 70.93s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED2/page_7
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.44s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:53<00:00, 53.20s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED2/page_6
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.80s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:29<00:00, 29.61s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED2/page_10
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.74s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:26<00:00, 146.48s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED2/page_11
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.43s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:27<00:00, 87.83s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED2/page_13
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.31s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:36<00:00, 156.24s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED2/page_12
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.38s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:44<00:00, 44.55s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED2/page_8
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:29<00:00, 29.90s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:24<00:00, 24.35s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED2/page_9


### Text Extraction code for Grade 3 federal book (all pages)

In [3]:
import argparse
import json
from collections import defaultdict

from surya.input.langs import replace_lang_with_code, get_unique_langs
from surya.input.load import load_from_folder, load_from_file, load_lang_file
from surya.model.detection.segformer import load_model as load_detection_model, load_processor as load_detection_processor
from surya.model.recognition.model import load_model as load_recognition_model
from surya.model.recognition.processor import load_processor as load_recognition_processor
from surya.model.recognition.tokenizer import _tokenize
from surya.ocr import run_ocr
from surya.postprocessing.text import draw_text_on_image
from surya.settings import settings
import os
import csv 



def process_folder(input_folder, results_dir):
    # Get a list of all PNG files in the folder
    png_files = [f for f in os.listdir(input_folder) if f.endswith('.png')]

    for png_file in png_files:
        # Construct the full path to the PNG file
        png_path = os.path.join(input_folder, png_file)

        # Call the main function for each PNG file
        main(png_path, results_dir)


def main(input_path,
    results_dir,
    lang_file=None,
    langs='hi,en',
    max=None,
    start_page=None,
    images=False):

    if os.path.isdir(input_path):
        images, names = load_from_folder(input_path, max, start_page)
        folder_name = os.path.basename(input_path)
    else:
        images, names = load_from_file(input_path, max, start_page)
        folder_name = os.path.basename(input_path).split(".")[0]

    if lang_file:
        # We got all of our language settings from a file
        langs = load_lang_file(lang_file, names) 
        for lang in langs:
            replace_lang_with_code(lang)
        image_langs = langs
    else:
        # We got our language settings from the input
        langs = langs.split(",")
        replace_lang_with_code(langs)
        image_langs = [langs] * len(images)

    det_processor = load_detection_processor()
    det_model = load_detection_model()

    _, lang_tokens = _tokenize("", get_unique_langs(image_langs))
    rec_model = load_recognition_model(langs=lang_tokens) # Prune model moe layer to only include languages we need
    rec_processor = load_recognition_processor()

    result_path = os.path.join(results_dir, folder_name)
    os.makedirs(result_path, exist_ok=True)

    predictions_by_image = run_ocr(images, image_langs, det_model, det_processor, rec_model, rec_processor)

    if images:
        for idx, (name, image, pred, langs) in enumerate(zip(names, images, predictions_by_image, image_langs)):
            bboxes = [l.bbox for l in pred.text_lines]
            pred_text = [l.text for l in pred.text_lines]
            page_image = draw_text_on_image(bboxes, pred_text, image.size, langs, has_math="_math" in langs)
            page_image.save(f'./temp{idx}.jpg')
            page_image.save(os.path.join(result_path, f"{name}_{idx}_text.png"))

             # Save text to CSV file
            csv_file_path = os.path.join(result_path, f"{name}_{idx}_text.csv")
            with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
                writer = csv.writer(csv_file)
                writer.writerow(['Text'])
                for line in pred_text:
                    writer.writerow([line])


        out_preds = defaultdict(list)
        for name, pred, image in zip(names, predictions_by_image, images):
            out_pred = pred.model_dump()
            out_pred["page"] = len(out_preds[name]) + 1
            out_preds[name].append(out_pred)

        with open(os.path.join(result_path, "results.json"), "w+", encoding="utf-8") as f:
            json.dump(out_preds, f, ensure_ascii=False)

        print(f"Wrote results to {result_path}")

        # from PIL import Image, ImageDraw
        # indices = []
        # prev = -1
        # for idx in range(len(preds['text_lines'])):
        #     x_axis = preds['text_lines'][idx]['polygon'][0][0]
        #     if prev != -1 and x_axis > (prev + 5):
        #         indices.append(idx)
        #     prev = x_axis
            
        # text = ''
        # for idx in range(len(preds['text_lines'])):
        #     if idx not in indices:
        #         text += preds['text_lines'][idx]['text'] + ' '

        # print(f"Text for page {page_idx + 1}: {text}")
        


if __name__ == "__main__":
     input_folder = '/Users/fajrzafar/Documents/Semester 8 /FYP 1/Books PNGS/FED3'
     results_dir = '/Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED3'
     process_folder(input_folder, results_dir)

Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:28<00:00, 28.23s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [03:46<00:00, 226.78s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED3/page_2
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:25<00:00, 25.65s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:17<00:00, 77.31s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED3/page_3
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.24s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:41<00:00, 41.54s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED3/page_1
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.90s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:54<00:00, 174.03s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED3/page_4
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.43s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:36<00:00, 36.96s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED3/page_5
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.19s/it]
Recognizing Text: 100%|█████████████████████████| 1/1 [35:27<00:00, 2127.24s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED3/page_7
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.82s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [04:23<00:00, 263.42s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED3/page_6
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.51s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:04<00:00, 64.74s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED3/page_10
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.49s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:34<00:00, 34.82s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED3/page_11
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.76s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [10:17<00:00, 617.91s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED3/page_12
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:25<00:00, 25.77s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [09:53<00:00, 593.01s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED3/page_8
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.39s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:07<00:00, 67.18s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/FED3/page_9


### Text Extraction code for Grade1 PTB book (all pages)

In [4]:
import argparse
import json
from collections import defaultdict

from surya.input.langs import replace_lang_with_code, get_unique_langs
from surya.input.load import load_from_folder, load_from_file, load_lang_file
from surya.model.detection.segformer import load_model as load_detection_model, load_processor as load_detection_processor
from surya.model.recognition.model import load_model as load_recognition_model
from surya.model.recognition.processor import load_processor as load_recognition_processor
from surya.model.recognition.tokenizer import _tokenize
from surya.ocr import run_ocr
from surya.postprocessing.text import draw_text_on_image
from surya.settings import settings
import os
import csv 



def process_folder(input_folder, results_dir):
    # Get a list of all PNG files in the folder
    png_files = [f for f in os.listdir(input_folder) if f.endswith('.png')]

    for png_file in png_files:
        # Construct the full path to the PNG file
        png_path = os.path.join(input_folder, png_file)

        # Call the main function for each PNG file
        main(png_path, results_dir)


def main(input_path,
    results_dir,
    lang_file=None,
    langs='hi,en',
    max=None,
    start_page=None,
    images=False):

    if os.path.isdir(input_path):
        images, names = load_from_folder(input_path, max, start_page)
        folder_name = os.path.basename(input_path)
    else:
        images, names = load_from_file(input_path, max, start_page)
        folder_name = os.path.basename(input_path).split(".")[0]

    if lang_file:
        # We got all of our language settings from a file
        langs = load_lang_file(lang_file, names) 
        for lang in langs:
            replace_lang_with_code(lang)
        image_langs = langs
    else:
        # We got our language settings from the input
        langs = langs.split(",")
        replace_lang_with_code(langs)
        image_langs = [langs] * len(images)

    det_processor = load_detection_processor()
    det_model = load_detection_model()

    _, lang_tokens = _tokenize("", get_unique_langs(image_langs))
    rec_model = load_recognition_model(langs=lang_tokens) # Prune model moe layer to only include languages we need
    rec_processor = load_recognition_processor()

    result_path = os.path.join(results_dir, folder_name)
    os.makedirs(result_path, exist_ok=True)

    predictions_by_image = run_ocr(images, image_langs, det_model, det_processor, rec_model, rec_processor)

    if images:
        for idx, (name, image, pred, langs) in enumerate(zip(names, images, predictions_by_image, image_langs)):
            bboxes = [l.bbox for l in pred.text_lines]
            pred_text = [l.text for l in pred.text_lines]
            page_image = draw_text_on_image(bboxes, pred_text, image.size, langs, has_math="_math" in langs)
            page_image.save(f'./temp{idx}.jpg')
            page_image.save(os.path.join(result_path, f"{name}_{idx}_text.png"))

             # Save text to CSV file
            csv_file_path = os.path.join(result_path, f"{name}_{idx}_text.csv")
            with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
                writer = csv.writer(csv_file)
                writer.writerow(['Text'])
                for line in pred_text:
                    writer.writerow([line])


        out_preds = defaultdict(list)
        for name, pred, image in zip(names, predictions_by_image, images):
            out_pred = pred.model_dump()
            out_pred["page"] = len(out_preds[name]) + 1
            out_preds[name].append(out_pred)

        with open(os.path.join(result_path, "results.json"), "w+", encoding="utf-8") as f:
            json.dump(out_preds, f, ensure_ascii=False)

        print(f"Wrote results to {result_path}")

        # from PIL import Image, ImageDraw
        # indices = []
        # prev = -1
        # for idx in range(len(preds['text_lines'])):
        #     x_axis = preds['text_lines'][idx]['polygon'][0][0]
        #     if prev != -1 and x_axis > (prev + 5):
        #         indices.append(idx)
        #     prev = x_axis
            
        # text = ''
        # for idx in range(len(preds['text_lines'])):
        #     if idx not in indices:
        #         text += preds['text_lines'][idx]['text'] + ' '

        # print(f"Text for page {page_idx + 1}: {text}")
        


if __name__ == "__main__":
     input_folder = '/Users/fajrzafar/Documents/Semester 8 /FYP 1/Books PNGS/PTB1'
     results_dir = '/Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1'
     process_folder(input_folder, results_dir)

Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.18s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:38<00:00, 98.61s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_2
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.12s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:28<00:00, 88.93s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_3
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.71s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:28<00:00, 28.72s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_1
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.03s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:32<00:00, 152.13s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_4
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.35s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:15<00:00, 15.49s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_5
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.05s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:50<00:00, 50.57s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_7
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.11s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:04<00:00, 64.74s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_6
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.76s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:44<00:00, 44.84s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_19
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.73s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:07<00:00, 127.43s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_25
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.54s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [04:28<00:00, 268.38s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_31
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.98s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [03:38<00:00, 218.10s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_30
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:25<00:00, 25.89s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:00<00:00, 120.65s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_24
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.74s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:55<00:00, 55.42s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_18
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:28<00:00, 28.62s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:18<00:00, 78.54s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_32
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:28<00:00, 28.35s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.81s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_26
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:30<00:00, 30.41s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:23<00:00, 144.00s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_27
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:30<00:00, 30.22s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:30<00:00, 150.87s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_33
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:29<00:00, 29.20s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:54<00:00, 54.74s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_23
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:30<00:00, 30.84s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [03:31<00:00, 211.16s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_22
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:29<00:00, 29.90s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:28<00:00, 28.07s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_20
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:36<00:00, 36.45s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:39<00:00, 39.72s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_34
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:39<00:00, 39.21s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:58<00:00, 178.42s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_21
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:36<00:00, 36.06s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:38<00:00, 38.83s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_10
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:39<00:00, 39.42s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:20<00:00, 20.46s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_11
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:43<00:00, 43.41s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:29<00:00, 89.30s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_13
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:33<00:00, 33.60s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:39<00:00, 39.30s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_12
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:31<00:00, 31.54s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:29<00:00, 89.94s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_16
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:30<00:00, 30.58s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:59<00:00, 59.71s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_17
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:29<00:00, 29.53s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:14<00:00, 14.42s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_15
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:30<00:00, 30.55s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:24<00:00, 24.85s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_29
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:31<00:00, 32.00s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:50<00:00, 170.88s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_28
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.07s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:08<00:00, 68.35s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_14
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.67s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [03:14<00:00, 194.15s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_8
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:30<00:00, 30.58s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:47<00:00, 47.48s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB1/page_9


In [2]:
import argparse
import json
from collections import defaultdict

from surya.input.langs import replace_lang_with_code, get_unique_langs
from surya.input.load import load_from_folder, load_from_file, load_lang_file
from surya.model.detection.segformer import load_model as load_detection_model, load_processor as load_detection_processor
from surya.model.recognition.model import load_model as load_recognition_model
from surya.model.recognition.processor import load_processor as load_recognition_processor
from surya.model.recognition.tokenizer import _tokenize
from surya.ocr import run_ocr
from surya.postprocessing.text import draw_text_on_image
from surya.settings import settings
import os
import csv 



def process_folder(input_folder, results_dir):
    # Get a list of all PNG files in the folder
    png_files = [f for f in os.listdir(input_folder) if f.endswith('.png')]

    for png_file in png_files:
        # Construct the full path to the PNG file
        png_path = os.path.join(input_folder, png_file)

        # Call the main function for each PNG file
        main(png_path, results_dir)


def main(input_path,
    results_dir,
    lang_file=None,
    langs='hi,en',
    max=None,
    start_page=None,
    images=False):

    if os.path.isdir(input_path):
        images, names = load_from_folder(input_path, max, start_page)
        folder_name = os.path.basename(input_path)
    else:
        images, names = load_from_file(input_path, max, start_page)
        folder_name = os.path.basename(input_path).split(".")[0]

    if lang_file:
        # We got all of our language settings from a file
        langs = load_lang_file(lang_file, names) 
        for lang in langs:
            replace_lang_with_code(lang)
        image_langs = langs
    else:
        # We got our language settings from the input
        langs = langs.split(",")
        replace_lang_with_code(langs)
        image_langs = [langs] * len(images)

    det_processor = load_detection_processor()
    det_model = load_detection_model()

    _, lang_tokens = _tokenize("", get_unique_langs(image_langs))
    rec_model = load_recognition_model(langs=lang_tokens) # Prune model moe layer to only include languages we need
    rec_processor = load_recognition_processor()

    result_path = os.path.join(results_dir, folder_name)
    os.makedirs(result_path, exist_ok=True)

    predictions_by_image = run_ocr(images, image_langs, det_model, det_processor, rec_model, rec_processor)

    if images:
        for idx, (name, image, pred, langs) in enumerate(zip(names, images, predictions_by_image, image_langs)):
            bboxes = [l.bbox for l in pred.text_lines]
            pred_text = [l.text for l in pred.text_lines]
            page_image = draw_text_on_image(bboxes, pred_text, image.size, langs, has_math="_math" in langs)
            page_image.save(f'./temp{idx}.jpg')
            page_image.save(os.path.join(result_path, f"{name}_{idx}_text.png"))

             # Save text to CSV file
            csv_file_path = os.path.join(result_path, f"{name}_{idx}_text.csv")
            with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
                writer = csv.writer(csv_file)
                writer.writerow(['Text'])
                for line in pred_text:
                    writer.writerow([line])


        out_preds = defaultdict(list)
        for name, pred, image in zip(names, predictions_by_image, images):
            out_pred = pred.model_dump()
            out_pred["page"] = len(out_preds[name]) + 1
            out_preds[name].append(out_pred)

        with open(os.path.join(result_path, "results.json"), "w+", encoding="utf-8") as f:
            json.dump(out_preds, f, ensure_ascii=False)

        print(f"Wrote results to {result_path}")

        # from PIL import Image, ImageDraw
        # indices = []
        # prev = -1
        # for idx in range(len(preds['text_lines'])):
        #     x_axis = preds['text_lines'][idx]['polygon'][0][0]
        #     if prev != -1 and x_axis > (prev + 5):
        #         indices.append(idx)
        #     prev = x_axis
            
        # text = ''
        # for idx in range(len(preds['text_lines'])):
        #     if idx not in indices:
        #         text += preds['text_lines'][idx]['text'] + ' '

        # print(f"Text for page {page_idx + 1}: {text}")
        


if __name__ == "__main__":
     input_folder = '/Users/fajrzafar/Documents/Semester 8 /FYP 1/Books PNGS/PTB2'
     results_dir = '/Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2'
     process_folder(input_folder, results_dir)

Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:25<00:00, 25.55s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:13<00:00, 13.07s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_2
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:25<00:00, 25.73s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:29<00:00, 29.88s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_3
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.48s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:13<00:00, 73.25s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_1
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.40s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:36<00:00, 96.19s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_4
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.57s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:46<00:00, 46.45s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_5
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.63s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:04<00:00, 64.51s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_7
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:28<00:00, 28.05s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:29<00:00, 89.51s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_6
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.15s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:24<00:00, 84.26s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_19
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.26s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [03:07<00:00, 187.97s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_25
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.96s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [01:46<00:00, 106.65s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_31
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:29<00:00, 29.08s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:02<00:00, 62.35s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_30
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:28<00:00, 28.35s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:24<00:00, 84.22s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_24
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:30<00:00, 30.11s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [03:34<00:00, 214.96s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_18
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:33<00:00, 33.57s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [01:59<00:00, 119.21s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_32
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:29<00:00, 29.06s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:20<00:00, 80.82s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_26
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:31<00:00, 31.34s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:43<00:00, 43.10s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_27
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:33<00:00, 33.78s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:35<00:00, 95.24s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_33
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:33<00:00, 33.15s/it]
Recognizing Text: 100%|█████████████████████████| 1/1 [17:01<00:00, 1021.14s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_37
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:25<00:00, 25.96s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:53<00:00, 173.09s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_23
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.75s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:14<00:00, 134.24s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_22
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.54s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:08<00:00, 128.59s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_36
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:29<00:00, 29.83s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:59<00:00, 59.05s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_20
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:31<00:00, 31.31s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:53<00:00, 53.14s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_34
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:34<00:00, 34.54s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:44<00:00, 44.24s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_35
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:36<00:00, 36.43s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [01:47<00:00, 107.25s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_21
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:39<00:00, 39.69s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:51<00:00, 51.64s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_10
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:44<00:00, 44.50s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:01<00:00, 121.59s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_11
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:42<00:00, 42.84s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:41<00:00, 41.02s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_13
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:46<00:00, 46.28s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:56<00:00, 176.84s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_12
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:38<00:00, 38.78s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:30<00:00, 30.28s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_16
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:40<00:00, 40.21s/it]
Recognizing Text: 100%|█████████████████████████| 1/1 [51:52<00:00, 3112.91s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_17
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.45s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [03:53<00:00, 233.35s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_15
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.93s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:37<00:00, 37.70s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_29
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.70s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:35<00:00, 35.78s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_28
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:28<00:00, 28.65s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:12<00:00, 12.71s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_14
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:29<00:00, 29.28s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:19<00:00, 19.05s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_8
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:29<00:00, 29.75s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [12:44<00:00, 764.34s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB2/page_9


In [1]:
import argparse
import json
from collections import defaultdict

from surya.input.langs import replace_lang_with_code, get_unique_langs
from surya.input.load import load_from_folder, load_from_file, load_lang_file
from surya.model.detection.segformer import load_model as load_detection_model, load_processor as load_detection_processor
from surya.model.recognition.model import load_model as load_recognition_model
from surya.model.recognition.processor import load_processor as load_recognition_processor
from surya.model.recognition.tokenizer import _tokenize
from surya.ocr import run_ocr
from surya.postprocessing.text import draw_text_on_image
from surya.settings import settings
import os
import csv 



def process_folder(input_folder, results_dir):
    # Get a list of all PNG files in the folder
    png_files = [f for f in os.listdir(input_folder) if f.endswith('.png')]

    for png_file in png_files:
        # Construct the full path to the PNG file
        png_path = os.path.join(input_folder, png_file)

        # Call the main function for each PNG file
        main(png_path, results_dir)


def main(input_path,
    results_dir,
    lang_file=None,
    langs='hi,en',
    max=None,
    start_page=None,
    images=False):

    if os.path.isdir(input_path):
        images, names = load_from_folder(input_path, max, start_page)
        folder_name = os.path.basename(input_path)
    else:
        images, names = load_from_file(input_path, max, start_page)
        folder_name = os.path.basename(input_path).split(".")[0]

    if lang_file:
        # We got all of our language settings from a file
        langs = load_lang_file(lang_file, names) 
        for lang in langs:
            replace_lang_with_code(lang)
        image_langs = langs
    else:
        # We got our language settings from the input
        langs = langs.split(",")
        replace_lang_with_code(langs)
        image_langs = [langs] * len(images)

    det_processor = load_detection_processor()
    det_model = load_detection_model()

    _, lang_tokens = _tokenize("", get_unique_langs(image_langs))
    rec_model = load_recognition_model(langs=lang_tokens) # Prune model moe layer to only include languages we need
    rec_processor = load_recognition_processor()

    result_path = os.path.join(results_dir, folder_name)
    os.makedirs(result_path, exist_ok=True)

    predictions_by_image = run_ocr(images, image_langs, det_model, det_processor, rec_model, rec_processor)

    if images:
        for idx, (name, image, pred, langs) in enumerate(zip(names, images, predictions_by_image, image_langs)):
            bboxes = [l.bbox for l in pred.text_lines]
            pred_text = [l.text for l in pred.text_lines]
            page_image = draw_text_on_image(bboxes, pred_text, image.size, langs, has_math="_math" in langs)
            page_image.save(f'./temp{idx}.jpg')
            page_image.save(os.path.join(result_path, f"{name}_{idx}_text.png"))

             # Save text to CSV file
            csv_file_path = os.path.join(result_path, f"{name}_{idx}_text.csv")
            with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
                writer = csv.writer(csv_file)
                writer.writerow(['Text'])
                for line in pred_text:
                    writer.writerow([line])


        out_preds = defaultdict(list)
        for name, pred, image in zip(names, predictions_by_image, images):
            out_pred = pred.model_dump()
            out_pred["page"] = len(out_preds[name]) + 1
            out_preds[name].append(out_pred)

        with open(os.path.join(result_path, "results.json"), "w+", encoding="utf-8") as f:
            json.dump(out_preds, f, ensure_ascii=False)

        print(f"Wrote results to {result_path}")

        # from PIL import Image, ImageDraw
        # indices = []
        # prev = -1
        # for idx in range(len(preds['text_lines'])):
        #     x_axis = preds['text_lines'][idx]['polygon'][0][0]
        #     if prev != -1 and x_axis > (prev + 5):
        #         indices.append(idx)
        #     prev = x_axis
            
        # text = ''
        # for idx in range(len(preds['text_lines'])):
        #     if idx not in indices:
        #         text += preds['text_lines'][idx]['text'] + ' '

        # print(f"Text for page {page_idx + 1}: {text}")
        


if __name__ == "__main__":
     input_folder = '/Users/fajrzafar/Documents/Semester 8 /FYP 1/Books PNGS/PTB3'
     results_dir = '/Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB3'
     process_folder(input_folder, results_dir)

Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.14s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:14<00:00, 14.73s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB3/page_2
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.53s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:38<00:00, 38.67s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB3/page_3
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.10s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:37<00:00, 157.76s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB3/page_1
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:25<00:00, 25.59s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [01:54<00:00, 114.05s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB3/page_4
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.23s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:46<00:00, 46.15s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB3/page_5
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.95s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:35<00:00, 95.66s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB3/page_7
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.30s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:22<00:00, 142.94s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB3/page_6
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.91s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:53<00:00, 173.19s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB3/page_19
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.46s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:48<00:00, 168.17s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB3/page_25
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:28<00:00, 28.90s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:37<00:00, 97.01s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB3/page_31
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:28<00:00, 28.19s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [01:43<00:00, 103.65s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/PTB3/page_30
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:28<00:00, 28.29s/it]
Recognizing Text:   0%|                                   | 0/1 [00:09<?, ?it/s]


RuntimeError: MPS backend out of memory (MPS allocated: 7.18 GB, other allocations: 1.58 GB, max allowed: 9.07 GB). Tried to allocate 332.28 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

In [2]:
import argparse
import json
from collections import defaultdict

from surya.input.langs import replace_lang_with_code, get_unique_langs
from surya.input.load import load_from_folder, load_from_file, load_lang_file
from surya.model.detection.segformer import load_model as load_detection_model, load_processor as load_detection_processor
from surya.model.recognition.model import load_model as load_recognition_model
from surya.model.recognition.processor import load_processor as load_recognition_processor
from surya.model.recognition.tokenizer import _tokenize
from surya.ocr import run_ocr
from surya.postprocessing.text import draw_text_on_image
from surya.settings import settings
import os
import csv 



def process_folder(input_folder, results_dir):
    # Get a list of all PNG files in the folder
    png_files = [f for f in os.listdir(input_folder) if f.endswith('.png')]

    for png_file in png_files:
        # Construct the full path to the PNG file
        png_path = os.path.join(input_folder, png_file)

        # Call the main function for each PNG file
        main(png_path, results_dir)


def main(input_path,
    results_dir,
    lang_file=None,
    langs='hi,en',
    max=None,
    start_page=None,
    images=False):

    if os.path.isdir(input_path):
        images, names = load_from_folder(input_path, max, start_page)
        folder_name = os.path.basename(input_path)
    else:
        images, names = load_from_file(input_path, max, start_page)
        folder_name = os.path.basename(input_path).split(".")[0]

    if lang_file:
        # We got all of our language settings from a file
        langs = load_lang_file(lang_file, names) 
        for lang in langs:
            replace_lang_with_code(lang)
        image_langs = langs
    else:
        # We got our language settings from the input
        langs = langs.split(",")
        replace_lang_with_code(langs)
        image_langs = [langs] * len(images)

    det_processor = load_detection_processor()
    det_model = load_detection_model()

    _, lang_tokens = _tokenize("", get_unique_langs(image_langs))
    rec_model = load_recognition_model(langs=lang_tokens) # Prune model moe layer to only include languages we need
    rec_processor = load_recognition_processor()

    result_path = os.path.join(results_dir, folder_name)
    os.makedirs(result_path, exist_ok=True)

    predictions_by_image = run_ocr(images, image_langs, det_model, det_processor, rec_model, rec_processor)

    if images:
        for idx, (name, image, pred, langs) in enumerate(zip(names, images, predictions_by_image, image_langs)):
            bboxes = [l.bbox for l in pred.text_lines]
            pred_text = [l.text for l in pred.text_lines]
            page_image = draw_text_on_image(bboxes, pred_text, image.size, langs, has_math="_math" in langs)
            page_image.save(f'./temp{idx}.jpg')
            page_image.save(os.path.join(result_path, f"{name}_{idx}_text.png"))

             # Save text to CSV file
            csv_file_path = os.path.join(result_path, f"{name}_{idx}_text.csv")
            with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
                writer = csv.writer(csv_file)
                writer.writerow(['Text'])
                for line in pred_text:
                    writer.writerow([line])


        out_preds = defaultdict(list)
        for name, pred, image in zip(names, predictions_by_image, images):
            out_pred = pred.model_dump()
            out_pred["page"] = len(out_preds[name]) + 1
            out_preds[name].append(out_pred)

        with open(os.path.join(result_path, "results.json"), "w+", encoding="utf-8") as f:
            json.dump(out_preds, f, ensure_ascii=False)

        print(f"Wrote results to {result_path}")

        # from PIL import Image, ImageDraw
        # indices = []
        # prev = -1
        # for idx in range(len(preds['text_lines'])):
        #     x_axis = preds['text_lines'][idx]['polygon'][0][0]
        #     if prev != -1 and x_axis > (prev + 5):
        #         indices.append(idx)
        #     prev = x_axis
            
        # text = ''
        # for idx in range(len(preds['text_lines'])):
        #     if idx not in indices:
        #         text += preds['text_lines'][idx]['text'] + ' '

        # print(f"Text for page {page_idx + 1}: {text}")
        


if __name__ == "__main__":
     input_folder = '/Users/fajrzafar/Documents/Semester 8 /FYP 1/Books PNGS/SINDH1'
     results_dir = '/Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/SINDH1'
     process_folder(input_folder, results_dir)

Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:25<00:00, 25.53s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:15<00:00, 75.00s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/SINDH1/page_2
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.23s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:58<00:00, 58.96s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/SINDH1/page_3
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.51s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:44<00:00, 44.09s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/SINDH1/page_1
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.10s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:34<00:00, 94.44s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/SINDH1/page_4
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:28<00:00, 28.36s/it]
Recognizing Text:   0%|                                   | 0/1 [00:08<?, ?it/s]


RuntimeError: MPS backend out of memory (MPS allocated: 7.22 GB, other allocations: 1.58 GB, max allowed: 9.07 GB). Tried to allocate 332.28 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

In [3]:
import argparse
import json
from collections import defaultdict

from surya.input.langs import replace_lang_with_code, get_unique_langs
from surya.input.load import load_from_folder, load_from_file, load_lang_file
from surya.model.detection.segformer import load_model as load_detection_model, load_processor as load_detection_processor
from surya.model.recognition.model import load_model as load_recognition_model
from surya.model.recognition.processor import load_processor as load_recognition_processor
from surya.model.recognition.tokenizer import _tokenize
from surya.ocr import run_ocr
from surya.postprocessing.text import draw_text_on_image
from surya.settings import settings
import os
import csv 



def process_folder(input_folder, results_dir):
    # Get a list of all PNG files in the folder
    png_files = [f for f in os.listdir(input_folder) if f.endswith('.png')]

    for png_file in png_files:
        # Construct the full path to the PNG file
        png_path = os.path.join(input_folder, png_file)

        # Call the main function for each PNG file
        main(png_path, results_dir)


def main(input_path,
    results_dir,
    lang_file=None,
    langs='hi,en',
    max=None,
    start_page=None,
    images=False):

    if os.path.isdir(input_path):
        images, names = load_from_folder(input_path, max, start_page)
        folder_name = os.path.basename(input_path)
    else:
        images, names = load_from_file(input_path, max, start_page)
        folder_name = os.path.basename(input_path).split(".")[0]

    if lang_file:
        # We got all of our language settings from a file
        langs = load_lang_file(lang_file, names) 
        for lang in langs:
            replace_lang_with_code(lang)
        image_langs = langs
    else:
        # We got our language settings from the input
        langs = langs.split(",")
        replace_lang_with_code(langs)
        image_langs = [langs] * len(images)

    det_processor = load_detection_processor()
    det_model = load_detection_model()

    _, lang_tokens = _tokenize("", get_unique_langs(image_langs))
    rec_model = load_recognition_model(langs=lang_tokens) # Prune model moe layer to only include languages we need
    rec_processor = load_recognition_processor()

    result_path = os.path.join(results_dir, folder_name)
    os.makedirs(result_path, exist_ok=True)

    predictions_by_image = run_ocr(images, image_langs, det_model, det_processor, rec_model, rec_processor)

    if images:
        for idx, (name, image, pred, langs) in enumerate(zip(names, images, predictions_by_image, image_langs)):
            bboxes = [l.bbox for l in pred.text_lines]
            pred_text = [l.text for l in pred.text_lines]
            page_image = draw_text_on_image(bboxes, pred_text, image.size, langs, has_math="_math" in langs)
            page_image.save(f'./temp{idx}.jpg')
            page_image.save(os.path.join(result_path, f"{name}_{idx}_text.png"))

             # Save text to CSV file
            csv_file_path = os.path.join(result_path, f"{name}_{idx}_text.csv")
            with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
                writer = csv.writer(csv_file)
                writer.writerow(['Text'])
                for line in pred_text:
                    writer.writerow([line])


        out_preds = defaultdict(list)
        for name, pred, image in zip(names, predictions_by_image, images):
            out_pred = pred.model_dump()
            out_pred["page"] = len(out_preds[name]) + 1
            out_preds[name].append(out_pred)

        with open(os.path.join(result_path, "results.json"), "w+", encoding="utf-8") as f:
            json.dump(out_preds, f, ensure_ascii=False)

        print(f"Wrote results to {result_path}")

        # from PIL import Image, ImageDraw
        # indices = []
        # prev = -1
        # for idx in range(len(preds['text_lines'])):
        #     x_axis = preds['text_lines'][idx]['polygon'][0][0]
        #     if prev != -1 and x_axis > (prev + 5):
        #         indices.append(idx)
        #     prev = x_axis
            
        # text = ''
        # for idx in range(len(preds['text_lines'])):
        #     if idx not in indices:
        #         text += preds['text_lines'][idx]['text'] + ' '

        # print(f"Text for page {page_idx + 1}: {text}")
        


if __name__ == "__main__":
     input_folder = '/Users/fajrzafar/Documents/Semester 8 /FYP 1/Books PNGS/SINDH2'
     results_dir = '/Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/SINDH2'
     process_folder(input_folder, results_dir)

Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:25<00:00, 25.38s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:32<00:00, 32.21s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/SINDH2/page_2
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:25<00:00, 25.82s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:10<00:00, 70.22s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/SINDH2/page_3
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:26<00:00, 26.55s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [03:47<00:00, 227.90s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/SINDH2/page_1
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:25<00:00, 25.39s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:11<00:00, 131.92s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/SINDH2/page_4
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:25<00:00, 25.58s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:42<00:00, 42.87s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/SINDH2/page_5
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:28<00:00, 28.75s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [01:25<00:00, 85.51s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/SINDH2/page_7
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.74s/it]
Recognizing Text: 100%|██████████████████████████| 1/1 [02:07<00:00, 127.33s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/SINDH2/page_6
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:27<00:00, 27.71s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:11<00:00, 11.19s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/SINDH2/page_19
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:29<00:00, 29.21s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:42<00:00, 42.11s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/SINDH2/page_18
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:29<00:00, 29.74s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:35<00:00, 35.63s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/SINDH2/page_20
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:30<00:00, 30.59s/it]
Recognizing Text: 100%|███████████████████████████| 1/1 [00:23<00:00, 23.31s/it]


Wrote results to /Users/fajrzafar/Documents/Semester 8 /FYP 1/Extracted_Text/SINDH2/page_10
Loading detection model vikp/surya_det2 on device cpu with dtype torch.float32
Loading recognition model vikp/surya_rec on device mps with dtype torch.float16


Detecting bboxes: 100%|███████████████████████████| 1/1 [00:33<00:00, 33.71s/it]
Recognizing Text:   0%|                                   | 0/1 [00:12<?, ?it/s]


RuntimeError: MPS backend out of memory (MPS allocated: 7.27 GB, other allocations: 1.76 GB, max allowed: 9.07 GB). Tried to allocate 58.19 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).