### preparation

In [1]:
import cv2
from tqdm import tqdm
import numpy as np
import os
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

import tensorflow as tf
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Input
from keras.models import Sequential, load_model
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam

In [2]:
base_X_size = 32
base_Y_size = 32

### Load and show images

In [3]:
def load_image(image_path):
    img = cv2.imread(image_path, 0)
    resized_img = cv2.resize(img, (base_X_size, base_Y_size))
    return np.expand_dims(resized_img, axis=2)

def show_image(caption, img, show=True, destroy=True, waite_ms=1000):
    if not show:
        return
    cv2.imshow(caption, img)
    cv2.waitKey(waite_ms)
    if destroy:
        cv2.destroyAllWindows()


### Upload Dataset images

In [4]:
ds_images = np.empty((0, base_X_size, base_Y_size, 1), dtype=int)
ds_temp = np.empty((0, base_X_size, base_Y_size, 1), dtype=int)
ds_target = np.empty((0), dtype=int)

dataset_dir = os.listdir("dataset")
pbar = tqdm(total=80000, desc="Uploading")

for file_numbers_dir in dataset_dir:
    file_index = 0
    file_dirs = os.listdir("dataset/" + file_numbers_dir)
    target_number = int(file_numbers_dir)

    for image_path in (file_dirs):
        file_index += 1
        img = load_image("dataset/" + file_numbers_dir + "/" + image_path)
        ds_target = np.append(ds_target, target_number)
        ds_temp = np.append(ds_temp, [img], axis=0)
        pbar.update(1)

        if file_index % 100 == 0:
            ds_images = np.append(ds_images, ds_temp, axis=0)
            ds_temp = np.empty((0, base_X_size, base_Y_size, 1), dtype=int)
pbar.close()

Uploading: 100%|██████████| 80000/80000 [01:22<00:00, 968.20it/s] 


In [5]:
print("ds_images shape: ", ds_images.shape)
print("ds_target shape: ", ds_target.shape)

ds_images shape:  (80000, 32, 32, 1)
ds_target shape:  (80000,)


### Normalizing images

In [6]:
ds_images = ds_images.astype(float) / 255.0

In [7]:
show_image("test image", ds_images[33333])

### split train and test data

In [8]:
train_images, test_images, train_target, test_target = train_test_split(
    ds_images, ds_target, test_size=0.25, random_state=42
)

### Build model

In [9]:
input_shape = train_images.shape[1:]
print("input shape: ", input_shape)

model = Sequential()
model.add(Input(shape=input_shape))
model.add(Conv2D(10, (3,3), activation='relu'))
model.add(MaxPooling2D())
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(10, activation='softmax'))


optimizer = Adam(learning_rate=0.001)
model.compile(
        optimizer = optimizer, 
        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
        metrics=["accuracy"])

model.summary()

input shape:  (32, 32, 1)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 30, 30, 10)        100       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 15, 15, 10)       0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 2250)              0         
                                                                 
 dense (Dense)               (None, 32)                72032     
                                                                 
 dense_1 (Dense)             (None, 10)                330       
                                                                 
Total params: 72,462
Trainable params: 72,462
Non-trainable params: 0
__________________________

Train model

In [None]:
early_stopping = EarlyStopping(
    monitor="val_accuracy", 
    patience=3
)

history = model.fit(
    train_images, 
    train_target, 
    epochs=15, 
    validation_data=(test_images, test_target), 
    callbacks=[early_stopping], batch_size=32
)

### save and evaluate model

In [28]:
model.save("Farsi_OCR_Model.h5")

In [11]:
model = load_model("Farsi_OCR_Model.h5")

In [12]:
model.evaluate(train_images, train_target)



[0.007888835854828358, 0.9971666932106018]

### Process image numbers

In [13]:
def process_image(image, show_process=True):
    if image is None:
        print("Error: could not read image")

    else:
        img_b = cv2.bitwise_not(image)
        gray_img = cv2.cvtColor(img_b, cv2.COLOR_BGR2GRAY)
        blured_img = cv2.blur(gray_img, (5, 5))
        _, threshold_img = cv2.threshold(blured_img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

    return threshold_img

### detect and recognize numbers

In [14]:
def split_and_predict_numbers(thresholded_image, show_process=True):
    height, width = thresholded_image.shape
    thresh_img_copy = thresholded_image.copy()

    numbers = ""

    for x in range(width):
        num_pixel = np.where(thresh_img_copy[:, x] == 255)[0]
        if num_pixel.size > 0:
            y = num_pixel[0]

            # ------ find number ------
            retval, flood_filled_region, _, rect = cv2.floodFill(thresh_img_copy.copy(), None, (x, y), 0)

            detected_num = thresh_img_copy - flood_filled_region
            thresh_img_copy = flood_filled_region
            
            x1, y1, x2, y2 = rect[0],rect[1], rect[0] + rect[2],rect[1] + rect[3]
            image_number = detected_num[y1:y2, x1:x2]

            #------ Preparing the photo for prediction -------
            resized_img = np.expand_dims(cv2.resize(image_number, (base_X_size, base_Y_size)), axis=2)
            resized_img = np.array([resized_img])

            # ------ prediction ------
            prediction = model.predict(resized_img, verbose=0)[0]
            pred_number = tf.math.argmax(prediction).numpy()
            numbers += str(pred_number)

    return numbers

### test on Handwritten numbers

In [15]:
def Farsi_OCR(image_path):
    my_phone = cv2.imread(image_path)
    processed_image = process_image(my_phone)
    numbers = split_and_predict_numbers(processed_image)
    return "Numbers: "+numbers

Farsi_OCR("Numbers 2.png")

'Numbers: 09119629874'

### GUI

In [53]:
import tkinter as tk
from tkinter import filedialog, ttk
from PIL import Image, ImageTk

def my_farsi_ocr(image_path):
    result = Farsi_OCR(image_path)
    return result

class OCRApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Farsi OCR")
        self.root.geometry("600x500")
        self.root.configure(bg="#2c2f33")

        style = ttk.Style()
        style.configure("TButton", font=("Segoe UI", 11, "bold"), padding=6)

        self.btn = ttk.Button(root, text="📂 Select File", command=self.load_file)
        self.btn.pack(pady=14)

        # جایی برای نمایش عکس
        self.image_label = tk.Label(root, bg="#23272a", fg="#99aab5",
                                    text="No image selected")
        self.image_label.pack(pady=10)

        # نمایش نتیجه OCR
        self.result_label = tk.Label(root, text="OCR Result will appear here",
                                     fg="#ffffff", bg="#2c2f33", font=("Segoe UI", 11),
                                     wraplength=520, justify="center")
        self.result_label.pack(pady=12)

        self.run_btn = ttk.Button(root, text="✅ Yes", command=self.run_ocr)
        self.run_btn.pack(pady=8)

        self.image_path = None
        self.image_label.image = None

    def load_file(self):
        path = filedialog.askopenfilename(
            filetypes=[("Image Files", "*.png;*.jpg;*.jpeg;*.bmp;*.tif;*.tiff")]
        )
        if not path:
            return
        try:
            img = Image.open(path)
            if img.mode in ("RGBA", "P"):
                img = img.convert("RGB")

            max_w, max_h = 500, 320
            img.thumbnail((max_w, max_h))  

            tk_img = ImageTk.PhotoImage(img)
            self.image_label.configure(image=tk_img, text="")
            self.image_label.image = tk_img
            self.image_path = path
            self.result_label.config(text="Ready to run OCR")

        except Exception as e:
            self.result_label.config(text=f"خطا در بارگذاری تصویر: {e}")

    def run_ocr(self):
        if not self.image_path:
            self.result_label.config(text="⚠️ هیچ عکسی انتخاب نشده!")
            return
        self.result_label.config(text=my_farsi_ocr(self.image_path))

if __name__ == "__main__":
    root = tk.Tk()
    OCRApp(root)
    root.mainloop()
