In [1]:
import numpy as np
import pandas as pd
import cv2 as cv
import pytesseract as ts

import os
from glob import glob
from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

imgPaths = glob('images/*.png')

In [2]:
imgPath = imgPaths[0]
_, filename = os.path.split(imgPath)

In [3]:
image = cv.imread(imgPath)
data = ts.image_to_data(image)

dataList = list(map(lambda x: x.split('\t'),data.split('\n')))
df = pd.DataFrame(dataList[1:], columns=dataList[0])

In [5]:
df.dropna(inplace=True)

# Convert 'conf' to numeric, coercing errors to NaN
df['conf'] = pd.to_numeric(df['conf'], errors='coerce')

# Drop rows where 'conf' is NaN
df.dropna(subset=['conf'], inplace=True)

# Convert 'conf' to integer
df['conf'] = df['conf'].astype(int)

# Query the DataFrame
textData = df.query('conf >= 30')

# Create a new DataFrame with the required columns
IdentityCard = pd.DataFrame()
IdentityCard['text'] = textData['text']
IdentityCard['id'] = filename  # Ensure 'filename' is defined somewhere in your code

In [10]:
import tensorflow as tf

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  0


In [11]:
allIdentityCards = pd.DataFrame(columns=['id', 'text'])

for imgPath in tqdm(imgPaths, desc="Identity Card"):

    # Get Filenames
    _, filename = os.path.split(imgPath)
    
    # Extract Data
    image = cv.imread(imgPath)
    data = ts.image_to_data(image)
    
    # Write Data to Frame
    dataList = list(map(lambda x: x.split('\t'), data.split('\n')))
    df = pd.DataFrame(dataList[1:], columns=dataList[0])
    
    # Drop Everything that is not useful
    df.dropna(inplace=True)
    
    # Convert 'conf' to numeric, coercing errors to NaN
    df['conf'] = pd.to_numeric(df['conf'], errors='coerce')
    
    # Drop rows where 'conf' is NaN
    df.dropna(subset=['conf'], inplace=True)
    
    # Convert 'conf' to integer
    df['conf'] = df['conf'].astype(int)
    
    # Filter rows based on 'conf' value
    textData = df.query('conf >= 30')
    
    # Define an Identity Card Entity
    IdentityCard = pd.DataFrame()
    IdentityCard['text'] = textData['text']
    IdentityCard['id'] = filename
    
    # Add Card to All Cards
    allIdentityCards = pd.concat((allIdentityCards, IdentityCard), ignore_index=True)

Identity Card: 100%|██████████| 68/68 [01:14<00:00,  1.10s/it]


In [12]:
allIdentityCards.to_csv('IdentityCards.csv', index=False)