Library

In [52]:
import pytesseract
import glob
from sklearn.cluster import KMeans
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
import re

In [None]:
def textProcessing(text):
    NIK= re.findall(r'\b\w*35\w*\b', text)
    

In [53]:
def pathProcessing (path):
    image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.gif']  # Add more extensions if needed
    image_paths = []

    for ext in image_extensions:
        image_paths.extend(glob.glob(os.path.join(path, ext)))

    return image_paths

In [54]:
def gaussianBlur(images):
    blurredImages=[]
    for image in images:
        kernel_size = (5, 5)
        sigma_x = 0
        blurredImage = cv2.GaussianBlur(image, kernel_size, sigma_x)
        blurredImages.append(blurredImage)
    
    return blurredImages   

In [55]:
def readImage (paths):
    images=[]

    for path in paths:
        image= cv2.imread(path,cv2.IMREAD_GRAYSCALE)
        if image is not None:
            images.append(image)
        else:
            print(f"Failed to read image at path: {path}")

    return images

In [56]:
def kmeans(images):
    newImages=[]
    for image in images : 
        k = 2
        newImage = image.reshape((-1, 1))
        kmeans = KMeans(n_clusters=k)
        kmeans.fit(newImage)
        cluster_centers = kmeans.cluster_centers_.astype(np.uint8)
        segmented_image = cluster_centers[kmeans.labels_].reshape(image.shape)
    newImages.append(segmented_image)
    
    return segmented_image

In [57]:
from PIL import Image
import cv2
import numpy as np

def imageToText(images):
    texts = []
    for image in images:
        # Ubah gambar dari cv2 (numpy array) menjadi gambar PIL
        pil_image = Image.fromarray(image)

        # Ubah gambar menjadi mode grayscale ('L')
        image_for_ocr = pil_image.convert('L')

        pytesseract.pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
        text = pytesseract.image_to_string(image_for_ocr)
        texts.append(text)

    return texts

In [58]:
# konvolusi pada gambar
def convolution2d(images,kernel,stride,padding):
    
    convolvedImages=[]
    for image in images:
      image_height, image_width = image.shape
      kernel_height, kernel_width = kernel.shape

      padded_image = np.zeros((image_height + kernel_height - 1, image_width + kernel_width - 1))
      padded_image[kernel_height // padding:image_height + kernel_height // padding, kernel_width // padding:image_width + kernel_width // padding] = image

      convolved_image = np.zeros((image_height, image_width))
      for i in range(image_height):
        for j in range(image_width):
          convolved_image[i, j] = np.sum(padded_image[i*stride:i*stride + kernel_height, j*stride:j*stride + kernel_width] * kernel)
      convolvedImages.append(convolved_image)

    return convolvedImages

In [59]:
def showImages(images, num_cols=3):
    num_images = len(images)
    num_rows = (num_images + num_cols - 1) // num_cols

    plt.figure(figsize=(10, 8))  # Atur ukuran figur sesuai kebutuhan

    for i, image in enumerate(images, 1):
        plt.subplot(num_rows, num_cols, i)
        plt.imshow(image, cmap='gray')  # Menggunakan cmap='gray' jika gambar dalam mode grayscale
        plt.axis('off')

    plt.tight_layout()
    plt.show()

In [60]:
def main ():
    path= 'DataSet'
    imagesPath=pathProcessing(path)
    imageReady=readImage(imagesPath)
    blurredImages=gaussianBlur(imageReady)
    kernel = np.array([[0, -1, 0],
                      [-1, 5, -1],
                      [0, -1, 0]])
    convolved_image = convolution2d(blurredImages, kernel, 1, 2)
    texts=imageToText(convolved_image)
    print (texts[2])
    return {
        "images":convolved_image,
        "text": texts
    }
    
    

In [61]:
main()

PROVINS| JAWA TIMUR

KABUPATEN PASURUAN
NIK > 3514101803030001
Nama - AGUNG AFRIZAL DAFFA
Tempat/Tgi Lahir : PASURUAN, 18-03-2003
Jenis kelamin - LAKI-LAKI Gol. Darah ;:-
Alamat ; DUSUN COWEK

RT/RW : 013/006
Kel/Desa : JATIARJO
Kecamatan : PRIGEN

Agama - ISLAM
Status Perkawinan: BELUM KAWIN
Pekerjaan : PELAJAR/MAHASISWA

Kewarganegaraan : WNI
Berlaku Hingga =: SEUMUR HIDUP

PASURUAN
13-07-2020

\




{'images': [array([[703., 464., 466., ..., 462., 462., 693.],
         [467., 236., 231., ..., 232., 231., 461.],
         [472., 232., 236., ..., 229., 232., 467.],
         ...,
         [474., 236., 238., ..., 235., 234., 469.],
         [472., 240., 238., ..., 238., 237., 473.],
         [715., 475., 480., ..., 472., 472., 708.]]),
  array([[298., 200., 198., ..., 451., 450., 675.],
         [196.,  99.,  98., ..., 227., 226., 451.],
         [194.,  98.,  97., ..., 224., 224., 448.],
         ...,
         [334., 167., 167., ..., 181., 183., 363.],
         [334., 167., 167., ..., 181., 182., 368.],
         [501., 334., 334., ..., 362., 364., 550.]]),
  array([[762., 508., 508., ..., 217., 220., 327.],
         [508., 254., 255., ..., 108., 108., 220.],
         [508., 256., 250., ..., 105., 109., 217.],
         ...,
         [510., 255., 255., ...,  87.,  88., 180.],
         [510., 255., 255., ...,  91.,  88., 181.],
         [765., 510., 510., ..., 178., 177., 271.]])],
 'tex