In [4]:
import cv2
import numpy as np
import json
import matplotlib.pyplot as plt

from google.colab import files
uploaded = files.upload()  # Upload OMR_page-001.jpg

# Get the uploaded filename
template_filename = next(iter(uploaded))

# Use the uploaded filename to load the image
template_path = template_filename # Use the actual uploaded filename here
template_img = cv2.imread(template_path)

# Check if the image was loaded successfully and is not empty
if template_img is None: # Changed from template_img.empty() as imread returns None for failure
    print(f"Error: Could not load image from {template_path}")
    # Exit the script or handle the error appropriately
    # exit() # Removed exit()
    raise FileNotFoundError(f"Image not found or is empty at {template_path}") # Raise an exception instead

gray = cv2.cvtColor(template_img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                              cv2.THRESH_BINARY_INV, 11, 2)

# Find contours (to detect the bubble grid)
# Note: cv2.findContours returns a hierarchy as a third value in some OpenCV versions.
# We are ignoring it with _
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Filter contours to find bubbles (by area and circularity)
bubble_contours = []
for cnt in contours:
    area = cv2.contourArea(cnt)
    if 150 < area < 800:  # adjust as needed for your resolution
        perimeter = cv2.arcLength(cnt, True)
        # Added a small epsilon to the denominator to avoid division by zero if perimeter is 0
        # Check if perimeter is not zero before calculating circularity
        if perimeter > 0:
            circularity = 4 * np.pi * (area / (perimeter * perimeter))
            if 0.7 < circularity < 1.2: # adjust range as needed
                bubble_contours.append(cnt)

# Get centers of all bubbles
bubble_centers = []
for cnt in bubble_contours:
    (x, y), radius = cv2.minEnclosingCircle(cnt)
    bubble_centers.append((int(x), int(y)))

# Cluster bubbles into rows and columns
bubble_centers = np.array(bubble_centers)
# Check if bubble_centers is not empty before sorting
if bubble_centers.size > 0:
    bubble_centers = bubble_centers[bubble_centers[:,1].argsort()]  # sort by y
else:
    print("No bubble centers found. Cannot cluster into rows.")
    # Initialize rows and json_template as empty to avoid errors later
    rows = []
    json_template = {
        "fields": {
            "name": {}, "exam": {}, "date": {}, "roll_no": []
        },
        "questions": []
    }
    print("No bubbles detected. Skipping JSON generation and visualization.")
    # You might want to exit or return here if no bubbles are critical for the rest of the script
    # exit() # Or return

# Group bubbles by rows (using y-coordinate proximity)
rows = []
row = []
last_y = None
# Only proceed if bubble_centers is not empty
if bubble_centers.size > 0:
    for center in bubble_centers:
        x, y = center
        # Adjust threshold as needed based on vertical distance between rows
        if last_y is None or abs(y - last_y) < 18:
            row.append((x, y))
        else:
            rows.append(row)
            row = [(x, y)]
        last_y = y
    if row: # Append the last row
        rows.append(row)

# Sort bubbles in each row by x
for i in range(len(rows)):
    rows[i] = sorted(rows[i], key=lambda k: k[0])

# Build JSON template (only if rows were populated)
if rows:
    json_template = {
        "fields": {
            "name": {}, "exam": {}, "date": {}, "roll_no": []
        },
        "questions": []
    }

    # Optionally, you can manually add the header fields if needed (use OCR or fixed boxes)
    # Here we focus on bubbles

    # Assume roll_no is in the first column, first 10 rows, 1 bubble per row
    # Ensure that the slice is valid and there are enough rows for roll_no
    num_roll_no_rows = min(10, len(rows))
    for i in range(num_roll_no_rows):
        # Ensure that the row exists and has at least one element before accessing rows[i][0]
        if len(rows[i]) > 0:
            json_template["fields"]["roll_no"].append({
                "digit": i,
                "x": rows[i][0][0],
                "y": rows[i][0][1],
                "radius": 12  # adjust as needed
            })

    # For questions: assume each row after roll_no contains bubbles for questions
    question_number = 1
    # Ensure that the slice rows[num_roll_no_rows:] is valid
    start_row_for_questions = num_roll_no_rows
    for i, row in enumerate(rows[start_row_for_questions:]):  # skip roll_no rows
        # Assuming each question row has 4 options (bubbles)
        if len(row) == 4:
            options = []
            for opt, (x, y) in enumerate(row):
                options.append({
                    "option": opt + 1,
                    "x": x,
                    "y": y,
                    "radius": 12  # adjust as needed
                })
            json_template["questions"].append({
                "q_no": question_number,
                "options": options
            })
            question_number += 1

    # Save JSON template
    with open('omr_auto_template.json', 'w') as f:
        json.dump(json_template, f, indent=2)

    print(f"JSON template created for {question_number-1} questions and saved as 'omr_auto_template.json'.")

    # (Optional) Visualize detected bubbles for verification
    vis = template_img.copy()
    for row in rows:
        for (x, y) in row:
            # Draw a circle around the bubble center
            cv2.circle(vis, (x, y), 12, (0,255,0), 2) # Green color, thickness 2
    plt.figure(figsize=(12,16))
    # Convert BGR to RGB for matplotlib display
    plt.imshow(cv2.cvtColor(vis, cv2.COLOR_BGR2RGB))
    plt.title("Detected Bubbles")
    plt.axis('off')
    plt.show()
else:
    print("No rows of bubbles detected. Skipping JSON generation and visualization.")

Saving OMR_page-0001.jpg to OMR_page-0001.jpg


TypeError: Object of type int64 is not JSON serializable