In [1]:
import cv2
import numpy as np
import pytesseract
from matplotlib import pyplot as plt
from skimage.filters import threshold_otsu
from scipy.ndimage import interpolation as inter
from pytesseract import Output


In [2]:
#loading and displayng image
image = cv2.imread('/home/kkay/IIT-H/OCR/ILP-OCR/images/san/sansk.jpg', cv2.IMREAD_GRAYSCALE)


In [3]:
#denoising
if image is None:
    print("Error: Image not loaded correctly.")
else:
    denoised_image = cv2.medianBlur(image, 3)


In [4]:
#binarisation
_, binary_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY_INV)


In [5]:
#skew correction
def deskew(image):
    thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    coords = np.column_stack(np.where(thresh > 0))
    angle = cv2.minAreaRect(coords)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return rotated

deskewed_image = deskew(binary_image)


In [6]:
#image resizing
resized_image = cv2.resize(deskewed_image, (1024, 1024), interpolation=cv2.INTER_AREA)

In [7]:
#segmentation
contours, _ = cv2.findContours(255 - resized_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

segmented_image = cv2.cvtColor(resized_image, cv2.COLOR_GRAY2BGR)
for contour in contours:
    x, y, w, h = cv2.boundingRect(contour)
    cv2.rectangle(segmented_image, (x, y), (x + w, y + h), (0, 255, 0), 2)


In [8]:
#boundary box maipulation

image_path = '/home/kkay/IIT-H/OCR/ILP-OCR/images/san/sansk.jpg'
image = cv2.imread(image_path)

gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

_, binary_image = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY_INV)

contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

for contour in contours:
    x, y, w, h = cv2.boundingRect(contour)
    if w > 30 and h > 30:
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

output_path = 'sanskbbb.png'
cv2.imwrite(output_path, image)

print(f"Image saved with bounding boxes as {output_path}")

Image saved with bounding boxes as sanskbbb.png


In [9]:
#normalisation
normalized_image = cv2.normalize(resized_image, None, 0, 255, cv2.NORM_MINMAX)


In [10]:
#ocr
custom_config = r'--psm 6'
ocr_result = pytesseract.image_to_string(normalized_image, config=custom_config, lang='san')

print("OCR Result:")
print(ocr_result)


OCR Result:
!
¦
प
|]
{9
शरधाते
दः #
|
थ शं ०] 1077 प्क वी
11
। थ
| ^ ।
स व
च) 11 0
[भण ~
(९ ९1१३8 |
(। ग
॥ ह, भगे नि
ग | 1, द
॥ शि व
॥ म
( न ध प.
(सि 1 ध
| । 1
गा ध
[र स व
- 1 4.
1 1 1
ध ८
1 3 (0
[4
1 (4
व, ८ ः (अ 1: न
1
(व ए फ व ,
ध क १. ध ४ ) म न ल धि
॥ [र न ~; स
., ॥ हि 4. 2 . शि
॥: 2 (द व
॥ नि 0. (१ (६ त -
1 `. ध ध. ८ ८ न .
`` 4 ध (1 द द.
ध < ४ < 4 ८2 . .
` , न 1
` "४4 वि (42 प ध ।
~>“. . "क्यक्‌ [र ॥ [क 1: ॐ [त ध

