In [2]:
from PIL import Image, ImageEnhance
import pytesseract
import re
import numpy as np

# Load image
image_path = '../train/img_000101.jpg'
image = Image.open(image_path)

# Get image dimensions
width, height = image.size

# Define crop box (center square or rectangle)
crop_width = int(width * 0.5)   # crop 50% width around center
crop_height = int(height * 0.4) # crop 40% height around center

left = (width - crop_width) // 2
top = (height - crop_height) // 2
right = left + crop_width
bottom = top + crop_height

# Crop first
cropped = image.crop((left, top, right, bottom))

# Convert cropped image to grayscale
gray = cropped.convert('L')

# Convert to NumPy array for brightness analysis
gray_np = np.array(gray)
mean_brightness = gray_np.mean()
print(f"Mean brightness: {mean_brightness:.2f}")

# Dynamically choose brightness factor
if mean_brightness < 20:
    factor = 15
if mean_brightness < 60:
    factor = 5
elif mean_brightness < 100:
    factor = 1.5
elif mean_brightness < 150:
    factor = 1.2
else:
    factor = 1.0
print(f"Brightness enhance factor: {factor}")

# Brighten grayscale image
enhancer = ImageEnhance.Brightness(gray)
brightened = enhancer.enhance(factor)

# ✨ Enhance contrast
contrast_enhancer = ImageEnhance.Contrast(brightened)
brightened = contrast_enhancer.enhance(5)  # Try 1.2 - 2.0 if needed

# Apply binary thresholding
threshold = 128
bw = brightened.point(lambda x: 255 if x > threshold else 0, mode='1')

# Optional: show preprocessed image
bw.show()

# Resize to help OCR
bw = bw.resize((bw.width * 2, bw.height * 2), Image.LANCZOS)

# OCR config for digits only
custom_config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789'

# Run OCR
raw_text = pytesseract.image_to_string(bw, config=custom_config)

# Extract digits
numbers_only = re.findall(r'\d+', raw_text)

if numbers_only:
    print("✅ Detected numbers:", ' '.join(numbers_only))
else:
    print("❌ No digits detected.")

Mean brightness: 169.07
Brightness enhance factor: 1.0
❌ No digits detected.


# Dataframe conversion

In [3]:
import os
import re
import numpy as np
import pandas as pd
from PIL import Image, ImageEnhance
import pytesseract
from tqdm import tqdm

# Directory with images
folder_path = '../test/'

# Store results
data = []

# Loop through image files
for filename in tqdm(os.listdir(folder_path)):
    if filename.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")):
        image_path = os.path.join(folder_path, filename)

        try:
            # Load image
            image = Image.open(image_path)

            # Crop center
            width, height = image.size
            crop_width = int(width * 0.5)
            crop_height = int(height * 0.4)
            left = (width - crop_width) // 2
            top = (height - crop_height) // 2
            right = left + crop_width
            bottom = top + crop_height
            cropped = image.crop((left, top, right, bottom))

            # Grayscale
            gray = cropped.convert('L')
            gray_np = np.array(gray)
            mean_brightness = gray_np.mean()

            # Dynamic brightness factor
            if mean_brightness < 20:
                factor = 15
            elif mean_brightness < 60:
                factor = 5
            elif mean_brightness < 100:
                factor = 1.5
            elif mean_brightness < 150:
                factor = 1.2
            else:
                factor = 1.0

            # Brighten
            enhancer = ImageEnhance.Brightness(gray)
            brightened = enhancer.enhance(factor)

            # Contrast boost
            contrast_enhancer = ImageEnhance.Contrast(brightened)
            brightened = contrast_enhancer.enhance(5)

            # Threshold
            threshold = 128
            bw = brightened.point(lambda x: 255 if x > threshold else 0, mode='1')

            # Resize
            bw = bw.resize((bw.width * 2, bw.height * 2), Image.LANCZOS)

            # OCR
            custom_config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789'
            raw_text = pytesseract.image_to_string(bw, config=custom_config)

            # Extract digits
            digits_found = set(re.findall(r'\d', raw_text))

            # Record presence of each digit
            row = {'image_path': image_path}
            for digit in '0123456789':
                row[digit] = 1 if digit in digits_found else 0
            data.append(row)

        except Exception as e:
            print(f"⚠️ Error processing {filename}: {e}")

# Create DataFrame
df = pd.DataFrame(data)

# Save or display
print(df.head())
# df.to_csv("ocr_digit_detection.csv", index=False)

100%|██████████| 2355/2355 [06:19<00:00,  6.20it/s]

               image_path  0  1  2  3  4  5  6  7  8  9
0  ../test/img_005489.jpg  0  0  0  0  0  0  0  0  0  0
1  ../test/img_005490.jpg  0  0  0  0  0  0  0  0  0  0
2  ../test/img_005491.jpg  0  0  0  0  1  0  0  0  0  0
3  ../test/img_005492.jpg  0  0  0  0  0  0  0  0  0  0
4  ../test/img_005493.jpg  0  0  0  0  0  0  0  0  0  0





In [4]:
# Get digit columns (exclude 'image_path')
digit_columns = [str(d) for d in range(10)]

# Filter rows where at least one digit is present
filtered_df = df.loc[df[digit_columns].any(axis=1)].reset_index(drop=True)

# Display or save
filtered_df.head(20)
# filtered_df.to_csv("filtered_ocr_digits.csv", index=False)

Unnamed: 0,image_path,0,1,2,3,4,5,6,7,8,9
0,../test/img_005491.jpg,0,0,0,0,1,0,0,0,0,0
1,../test/img_005501.jpg,1,0,0,0,0,0,0,1,0,0
2,../test/img_005510.jpg,1,0,0,0,0,0,0,0,0,0
3,../test/img_005512.jpg,0,0,0,0,0,1,0,0,0,0
4,../test/img_005520.jpg,1,0,0,1,0,0,0,0,0,0
5,../test/img_005524.jpg,0,0,0,0,1,0,0,0,0,0
6,../test/img_005540.jpg,0,0,0,0,1,0,0,0,0,0
7,../test/img_005548.jpg,0,0,1,0,0,0,0,0,0,0
8,../test/img_005551.jpg,1,0,0,0,1,0,0,0,0,0
9,../test/img_005554.jpg,0,0,0,0,1,0,0,0,0,0


In [6]:
df["image_path"] = df["image_path"].str.replace("../test/", "", regex=False)
df

Unnamed: 0,image_path,0,1,2,3,4,5,6,7,8,9
0,img_005489.jpg,0,0,0,0,0,0,0,0,0,0
1,img_005490.jpg,0,0,0,0,0,0,0,0,0,0
2,img_005491.jpg,0,0,0,0,1,0,0,0,0,0
3,img_005492.jpg,0,0,0,0,0,0,0,0,0,0
4,img_005493.jpg,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
2348,img_007837.jpg,0,0,0,0,0,0,0,0,0,0
2349,img_007838.jpg,0,0,0,0,0,0,0,0,0,0
2350,img_007839.jpg,0,0,0,0,0,0,0,0,0,0
2351,img_007840.jpg,0,0,1,0,0,0,0,0,0,0


In [7]:
df.to_csv("ocr_digit_detection_test.csv", index=False)