In [154]:
import os
from io import BytesIO
import openpyxl
from openpyxl.styles import PatternFill
import cv2
import joblib
import numpy as np
from utils import *
from PIL import Image
import pytesseract
import numpy as np
from functools import cmp_to_key
import tkinter as tk
from tkinter import filedialog, messagebox
from tkinter.ttk import Button, Label, Entry

In [138]:
# Load pre-trained SVM models for digits and symbols
DIGIT_MODEL_PATH = "./digits_model.joblib"
SYMBOL_MODEL_PATH = "./symbols_model.joblib"
digit_model = joblib.load(DIGIT_MODEL_PATH)
symbol_model = joblib.load(SYMBOL_MODEL_PATH)
# Configure Tesseract path
pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"

In [144]:
#paper extraction
def preprocess_image_paper(input_img):
    # Converts the input image to grayscale and applies edge detection.
    img_gray = cv2.cvtColor(input_img, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(img_gray, 100, 255)  # Edge detection
    kernel = np.ones((5, 5), np.uint8)
    edges_dilated = cv2.dilate(edges, kernel, iterations=2)  # Dilate edges
    cv2.imwrite('./processing/extract-paper/edges_dilated.jpg',edges_dilated)
    return img_gray, edges_dilated

def find_largest_contour(edges):
    # Finds the largest quadrilateral contour in the image.
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    largest_contour = None
    max_area = 0
    
    for contour in contours:
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)
        if area > max_area and len(approx) == 4:
            max_area = area
            largest_contour = approx

    return largest_contour

def warp_perspective(img, contour):
    # Warps the perspective of the input image to align the given contour.
    reordered_contour = reorder_points(contour)
    h, w = img.shape[:2]
    src_pts = np.array(reordered_contour, dtype=np.float32)
    dst_pts = np.array([[0, 0], [w, 0], [0, h], [w, h]], dtype=np.float32)
    transformation_matrix = cv2.getPerspectiveTransform(src_pts, dst_pts)
    warped_image = cv2.warpPerspective(img, transformation_matrix, (w, h))
    return warped_image
def reorder_points(points):
    # Reorders the given points to ensure they are in the order:
    # Top-left, Top-right, Bottom-left, Bottom-right.
    points = points.reshape((4, 2))
    reordered = np.zeros((4, 1, 2), dtype=np.int32)
    sum_points = points.sum(axis=1)
    diff_points = np.diff(points, axis=1)
    
    reordered[0] = points[np.argmin(sum_points)]  # Top-left
    reordered[3] = points[np.argmax(sum_points)]  # Bottom-right
    reordered[1] = points[np.argmin(diff_points)]  # Top-right
    reordered[2] = points[np.argmax(diff_points)]  # Bottom-left
    return reordered

def extract_paper(input_img):
    output_img = input_img
    img_gray, edges = preprocess_image_paper(output_img)
    largest_contour = find_largest_contour(edges)
    if largest_contour is None:
        raise ValueError("No paper-like region found in the image.")
    output_img = warp_perspective(img_gray, largest_contour)
    cv2.imwrite('./processing/extract-paper/image_after_extracting_paper.jpg',output_img)
    return output_img
# explanation
#1-apply canny for edge detection then dilate
#2-find largest rect contour of the dilated image
#3- warp_perspective

In [150]:
#grid extraction
def preprocess_image_grid(img):
    #Convert the image to grayscale and apply binary thresholding.#
    if img.ndim == 3:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    _, img_bin = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    return 255 - img_bin  # Invert binary image


def remove_image_borders(img, border_size=20):
    #Remove borders from the image by setting pixels near edges to zero.#
    img[:border_size, :] = 0
    img[-border_size:, :] = 0
    img[:, :border_size] = 0
    img[:, -border_size:] = 0
    return img

def compare_contours(a, b):
    x_a, y_a, _, _ = cv2.boundingRect(a)
    x_b, y_b, _, _ = cv2.boundingRect(b)
    # Compare x-coordinates if sufficiently different, else compare y-coordinates
    if abs(x_a - x_b) > 7:
        return x_a - x_b
    return y_a - y_b

def extract_lines(img, kernel_size, orientation='vertical'):
    #Extract vertical or horizontal lines using morphological operations.#
    if orientation == 'vertical':
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_size))
    else:
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, 1))

    eroded = cv2.erode(img, kernel, iterations=3)
    dilated = cv2.dilate(eroded, kernel, iterations=3)
    return dilated
    
def sort_contours(contours):
    return sorted(contours, key=cmp_to_key(compare_contours))

def draw_hough_lines(img, lines, orientation='vertical'):
    #Draw Hough lines to enhance line segments.#
    for line in lines:
        for x1, y1, x2, y2 in line:
            if orientation == 'vertical':
                cv2.line(img, (x1, 0), (x2, img.shape[0]), (255, 255, 255), 1)
            else:
                cv2.line(img, (0, y1), (img.shape[1], y2), (255, 255, 255), 1)
    return img


def extract_grid(img):
    #Extract the grid structure and return individual cell images.#
    # Preprocess the image
    img_bin = preprocess_image_grid(img)
    img_bin = remove_image_borders(img_bin)

    # Extract vertical and horizontal lines
    kernel_length = img_bin.shape[1] // 30
    vertical_lines = extract_lines(img_bin, kernel_length, 'vertical')
    horizontal_lines = extract_lines(img_bin, kernel_length, 'horizontal')

    # Apply Hough Transform to enhance lines
    vertical_hough = cv2.HoughLinesP(vertical_lines, 1, np.pi / 180, 127, minLineLength=20, maxLineGap=10)
    horizontal_hough = cv2.HoughLinesP(horizontal_lines, 2, np.pi / 180, 127, minLineLength=20, maxLineGap=10)
    
    if vertical_hough is not None:
        vertical_lines = draw_hough_lines(vertical_lines, vertical_hough, 'vertical')
    if horizontal_hough is not None:
        horizontal_lines = draw_hough_lines(horizontal_lines, horizontal_hough, 'horizontal')

    # Combine vertical and horizontal lines to form the grid
    grid_img = cv2.bitwise_and(vertical_lines, horizontal_lines)
    general_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    grid_img = cv2.dilate(grid_img, general_kernel, iterations=3)
    grid_img = cv2.erode(grid_img, general_kernel, iterations=1)

    # Find and sort contours
    contours, _ = cv2.findContours(grid_img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours = sort_contours(contours)

    # Detect rows and columns
    rows = []
    for idx in range(len(contours) - 1):
        x1, y1, _, _ = cv2.boundingRect(contours[idx])
        x2, y2, _, _ = cv2.boundingRect(contours[idx + 1])
        rows.append(y1)
        if x1 != x2:  # Break if a new column starts
            break

    num_rows = len(rows)
    num_columns = len(contours) // num_rows
    grid = []

    # Extract grid cells
    for row_idx in range(1, num_rows - 1):
        grid_row = []
        for col_idx in range(num_columns - 1):
            
            x1, y1, w1, h1 = cv2.boundingRect(contours[row_idx + col_idx * num_rows])
            x2, y2, w2, h2 = cv2.boundingRect(contours[row_idx + col_idx * num_rows + 1])
            x3, y3, w3, h3 = cv2.boundingRect(contours[row_idx + (col_idx + 1) * num_rows + 1])

            # Crop the cell from the original image
            cell_img = img[y1 + h1: y3, x2 + w2: x3]
            grid_row.append(cell_img)

        grid.append(grid_row)

    return grid


In [147]:
#grid_data_extraction

HOG_IMG_SIZE = (32, 32)

def extract_hog_features(img):
    #Extract HOG features from an image.#
    if img is None or img.size == 0:
        return None
    img_resized = cv2.resize(img, HOG_IMG_SIZE)
    win_size = HOG_IMG_SIZE
    block_size = (8, 8)  # Block size in pixels
    block_stride = (4, 4)  # Block stride in pixels
    cell_size = (4, 4)  # Cell size in pixels
    nbins = 9  # Number of bins for the histogram
    hog = cv2.HOGDescriptor(win_size, block_size, block_stride, cell_size, nbins)
    return hog.compute(img_resized).flatten()

def read_arabic_text(img):
    _, img_bin = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
    text = pytesseract.image_to_string(img_bin, lang='ara', config="--psm 6 --oem 3")
    return text

def read_english_text(img):
    _, img_bin = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
    text = pytesseract.image_to_string(img_bin, lang='eng', config="--psm 6 --oem 3")
    return text



def predict_digit(img, use_ocr=False):
    #Predict a digit using OCR or SVM.#
    if use_ocr:
        _, img_bin = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
        return pytesseract.image_to_string(
            img_bin, config="--psm 10 --oem 3 -c tessedit_char_whitelist=0123456789"
        )
    hog_features = extract_hog_features(img)
    if hog_features is None:
        return ""
    return digit_model.predict([hog_features])[0]


def predict_symbol(img):
    #Predict a symbol using the SVM model.#
    hog_features = extract_hog_features(img)
    if hog_features is None:
        return ""
    return symbol_model.predict([hog_features])[0]


def segment_image(img):
    #Segment an image to extract contours.#
    blurred = cv2.GaussianBlur(img, (5, 5), 0)
    edged = cv2.Canny(blurred, 50, 200)
    contours, _ = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr))
    return [(cv2.boundingRect(contour)) for contour in contours if cv2.contourArea(contour) > 50]


def get_id_from_image(img, use_ocr=True):
    #Extract the ID from an image.#
    if use_ocr:
        return pytesseract.image_to_string(img)
    img_resized = cv2.resize(img, (128, 64))
    contours = segment_image(img_resized)
    predictions = [predict_digit(img[y:y+h, x:x+w]) for x, y, w, h in contours]
    return "".join(map(str, predictions))


def map_symbol_to_value(symbol):
    #Convert symbolic string to a numeric value.#
    mappings = {
        "box": 0,
        "correct": 5,
        "empty": -1,
        "question": -2
    }
    if symbol in mappings:
        return mappings[symbol]
    if symbol.startswith("horizontal") and symbol[10:].isdigit():
        return 5 - int(symbol[10:])
    if symbol.startswith("vertical") and symbol[8:].isdigit():
        return int(symbol[8:])
    return None


def extract_data_from_grid(grid, use_ocr_id=True, use_ocr_digit=False):
    #Extract structured data from a grid of images.#
    print(len(grid[0]))
    data = [["Code","اسم الطالب","Student Name In English", "1", "2", "3"]]
    for row in grid:
        row_data = []
        for idx, cell in enumerate(row):
            if idx == 0:
                row_data.append(get_id_from_image(cell, use_ocr_id))
            elif idx == 1:
                row_data.append(read_arabic_text(cell))
            elif idx == 2:
                row_data.append(read_english_text(cell))
            elif idx == 3:
                row_data.append(predict_digit(cell, use_ocr_digit))
            else:
                row_data.append(map_symbol_to_value(predict_symbol(cell)))
        data.append(row_data)
    return data

In [142]:
# generate_excel_sheet
def generate_excel_sheet(data=None, file_path='./new-output.xlsx', skip_value=-1, highlight_value=-2, highlight_color="FF0000"):
    if data is None:
        data = []        
    os.makedirs(os.path.dirname(file_path), exist_ok=True)
    # Create a new Excel workbook and select the active sheet
    workbook = openpyxl.Workbook()
    sheet = workbook.active

    # Populate data into the sheet
    for row_idx, row_data in enumerate(data, start=1):
        for col_idx, value in enumerate(row_data, start=1):
            if value == skip_value:
                continue
            cell = sheet.cell(row=row_idx, column=col_idx, value="" if value == highlight_value else value)
            if value == highlight_value:
                cell.fill = PatternFill(start_color=highlight_color, end_color=highlight_color, fill_type='solid')

    workbook.save(file_path) #in disk
    # Save the workbook to a binary stream for in-memory usage
    excel_buffer = BytesIO()
    workbook.save(excel_buffer)
     # Reset buffer pointer to the start because the writing moves cursor to the end
    excel_buffer.seek(0) 
    return excel_buffer

In [152]:
# the main
#todo:
# why -1 and -2
# add exctracting the names
# add gui
#grid_data_extraction test the old code

input_image = cv2.imread('./1.jpg', cv2.IMREAD_COLOR)
extracted_paper_image = extract_paper(input_image)
extracted_grid = extract_grid(extracted_paper_image)
extracted_data_from_grid = extract_data_from_grid(
extracted_grid, use_ocr_id=True, use_ocr_digit=False)
excel_buffer = generate_excel_sheet(extracted_data_from_grid,'./updated.xlsx')

6


In [189]:
def process_image(status_label):
    file_types = [('Jpg Files', '*.jpg')]
    selected_files = filedialog.askopenfilenames(
        multiple=True, filetypes=file_types, title='Select Image Files'
    )

    for file_path in selected_files:
        try:
            # Update the status label with the current file name
            status_label.config(text=f"Processing: {os.path.basename(file_path)}")
            status_label.update_idletasks()

            input_image = cv2.imread(file_path, cv2.IMREAD_COLOR)
            extracted_paper_image = extract_paper(input_image)
            extracted_grid = extract_grid(extracted_paper_image)
            extracted_data_from_grid = extract_data_from_grid(
                extracted_grid, use_ocr_id=True, use_ocr_digit=False
            )
            save_path = filedialog.asksaveasfilename(
                defaultextension=".xlsx",
                filetypes=[("Excel Files", "*.xlsx")],
            )
            if not save_path:
                return

            excel_buffer = generate_excel_sheet(extracted_data_from_grid, save_path)
            messagebox.showinfo("Success", f"Data saved to {save_path}")
        except Exception as e:
            messagebox.showerror("Error", f"An error occurred: {e}")
        finally:
            # Clear the status label after processing
            status_label.config(text="")

def create_gui():
    root = tk.Tk()
    root.title("Grades Sheet Extractor")
    root.geometry("400x200")

    label = tk.Label(root, text="Grades Sheet Extraction Tool", font=("Helvetica", 16))
    label.pack(pady=10)

    status_label = tk.Label(root, text="", font=("Helvetica", 12), fg="blue")
    status_label.pack(pady=5)

    button = tk.Button(
        root, 
        text="Select Images and Process", 
        command=lambda: process_image(status_label), 
        font=("Helvetica", 12)
    )
    button.pack(pady=20)

    root.mainloop()

if __name__ == "__main__":
    create_gui()


2
