In [94]:
import os 
from pathlib import Path
from random import choice, sample, seed
from typing import List, Tuple, cast
from cv2 import cvtColor,threshold,COLOR_RGB2GRAY,THRESH_BINARY,THRESH_OTSU
import numpy as np
from numpy import ndarray,array
from matplotlib.pyplot import subplots, Figure ,imshow,title,axis,show # type: ignore
from PIL.Image import open as open_image
from PIL import Image
from torch.cuda import is_available
from torchvision.transforms import Grayscale, Resize, ToTensor, Normalize, ToPILImage, Compose
from torchvision.datasets import ImageFolder # type: ignore
from torch.utils.data import DataLoader,random_split
from torchvision import transforms, datasets
import pytesseract
from pytesseract import image_to_string

In [95]:
DEVICE = "cuda" if is_available() else "cpu"
pytesseract.pytesseract.tesseract_cmd = r'D:\tesseract\tesseract.exe' 

In [52]:
path = Path.cwd() / "data"

In [None]:
image_list = [*path.glob("*/*")]
image_path = choice(image_list)
image = open_image(image_path)

print(f"Random Image Path : {image_path}")
print(f"Image Class : {image_path.parent.stem}")
print(f"Image Height : {image.height}")
print(f"Image Width : {image.width}")
image

In [None]:
transforms = Compose([
    Grayscale(num_output_channels=1),
    Resize((128, 128)),
    ToTensor(),
    Normalize((0.5,), (.5,))
])

transforms

In [62]:
def plot_transformed_image(image_paths: List[Path], transform: Compose, n: int = 3, r_seed: int = 42):
    seed(r_seed)
    random_image_paths = sample(image_paths, k=n)
    for image_path in random_image_paths:
        with open_image(image_path) as f:
            fig, ax = cast(Tuple[Figure, ndarray], subplots(1, 2)) # type: ignore
            print(type(fig), type(ax)) # type: ignore
            ax[0].imshow(f)
            ax[0].set_title(f"Original Size : {f.size}")
            ax[0].axis(False)
            transformed_image = transform(f).permute(1,2,0) # type: ignore
            ax[1].imshow(transformed_image)
            ax[1].set_title(f"Transformed \nsize : {transformed_image.shape}") # type: ignore
            ax[1].axis(False)
            
            fig.suptitle(f"Class : {image_path.parent.stem}", fontsize=16) # type: ignore

In [None]:
plot_transformed_image(image_list, transforms)

In [None]:
train_data = ImageFolder(
    root=path.as_posix(),
    transform=transforms,
    target_transform=None,
)
train_data

In [58]:
val_size=int(0.2*len(train_data))
train_size=len(train_data)-val_size
train_subset,val_subset=random_split(train_data,[train_size,val_size,])

In [None]:
train_dataloader = DataLoader(dataset = train_subset,
                             batch_size = 32,
                             shuffle=True,
                             num_workers = os.cpu_count())
val_dataloader = DataLoader(dataset = val_subset,
                           batch_size = 32,
                           shuffle = False,
                           num_workers = os.cpu_count())

train_dataloader , val_dataloader

In [None]:
image,label = train_data[0]
imshow(transforms.ToPILImage()(image),cmap="gray")
title(f"No : {label}")
axis(False)
show()

In [116]:
def preprocess_img(img):
    if isinstance(img, str):
        img = Image.open(img)  
    if img.mode != 'RGB':
        img = img.convert('RGB')
    img_arr = np.array(img)  
    gray_img = cvtColor(img_arr, COLOR_RGB2GRAY)
    _, thresh_image = threshold(gray_img, 0, 255, THRESH_BINARY + THRESH_OTSU)
    return thresh_image

def extract_text(img):
    preprocessed_img = preprocess_img(img)
    pil_image = Image.fromarray(preprocessed_img)
    text =image_to_string(pil_image,config='--psm 11')
    return text.strip()


In [None]:
img_path_list=[*path.glob("*/*")]
random_img_path=choice(img_path_list)
img=Image.open(random_img_path)
extracted_txt=extract_text(img)
print(f"Extracted Image : {extracted_txt}")


In [None]:
img