In [1]:
# pip install opencv-python==4.5.5.64 opencv-python-headless==4.5.5.64

In [24]:
import os
import cv2
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from tensorflow.config import list_physical_devices

In [25]:
gpus = list_physical_devices('GPU')

for gpu in gpus:
    print(f'Name: {gpu.name}')
    print(f'Type: {gpu.device_type}')

Name: /physical_device:GPU:0
Type: GPU


In [89]:
image_path = 'images/img.png'  # Replace with your image path

print(os.path.exists(image_path))  # Should return True

True


In [90]:
# Step 1: Preprocess the image
def preprocess_image_for_segmentation(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (800, 800))  # Resize to manageable dimensions
    _, binary_image = cv2.threshold(image, 130, 255, cv2.THRESH_BINARY_INV)  # Lower value
    # _, binary_image = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY_INV)
    return image, binary_image

In [91]:
# Step 2: Detect contours
def segment_image(binary_image, min_contour_size=10):
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    print(f"Contours detected: {len(contours)}")
    bounding_boxes = [cv2.boundingRect(c) for c in contours if cv2.contourArea(c) > min_contour_size]
    return bounding_boxes

In [92]:
# Step 3: Visualize contours
def draw_contours(image, contours):
    return cv2.drawContours(image, contours, -1, (0, 255, 0), 2)  # Draw all contours in green


In [93]:
# Step 4: Extract segments
def extract_segments(image, bounding_boxes, target_size=(28, 28)):
    segments = []
    for (x, y, w, h) in bounding_boxes:
        segment = image[y:y+h, x:x+w]  # Crop the segment
        segment = cv2.resize(segment, target_size)  # Resize to target size
        segments.append(segment)
    return segments

In [94]:
# Preprocessing
original_image, binary_image = preprocess_image_for_segmentation(image_path)
cv2.imwrite('images/binary_image_output.png', binary_image)
print("Binary image saved!")

Binary image saved!


In [95]:
# Contour detection and visualization
bounding_boxes = segment_image(binary_image)
print(f"Detected {len(bounding_boxes)} segments.")

Contours detected: 5
Detected 5 segments.


In [96]:
# Visualize detected contours
contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
image_with_contours = draw_contours(original_image.copy(), contours)
cv2.imwrite('images/ContoursImage.png', image_with_contours)

True

In [97]:
# Segment extraction
segments = extract_segments(original_image, bounding_boxes)
for i, segment in enumerate(segments[:5]):  # Save the first 5 segments
    cv2.imwrite(f'images/segment_{i}.png', segment)

In [98]:
import numpy as np

def enhance_minus_sign(binary_image):
    # Create a small kernel for dilation
    kernel = np.ones((2,2), np.uint8)
    # Dilate the binary image once
    dilated = cv2.dilate(binary_image, kernel, iterations=2)
    # dilated = cv2.dilate(binary_image, kernel, iterations=1)
    return dilated

# Apply dilation after you create binary_image
dilated_image = enhance_minus_sign(binary_image)

# Save and inspect the dilated image
cv2.imwrite('images/dilated_binary_image.png', dilated_image)
print("Dilated binary image saved!")


Dilated binary image saved!


In [99]:
bounding_boxes = segment_image(dilated_image)
print(f"Detected {len(bounding_boxes)} segments after dilation.")

Contours detected: 5
Detected 5 segments after dilation.


In [100]:
contours, _ = cv2.findContours(dilated_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
image_with_contours = draw_contours(original_image.copy(), contours)
cv2.imwrite('ContoursImage_dilated.png', image_with_contours)

segments = extract_segments(original_image, bounding_boxes)
for i, segment in enumerate(segments[:5]):
    cv2.imwrite(f'images/segment_{i}.png', segment)


In [103]:
path = 'models'

num_recog_model = tf.keras.models.load_model(os.path.join(path, 'num_model.keras'))
sym_recog_model = tf.keras.models.load_model(os.path.join(path, 'sym_model_cp1.keras'))

2024-12-12 21:00:17.648111: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2024-12-12 21:00:17.648593: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2024-12-12 21:00:17.648606: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2024-12-12 21:00:17.649038: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-12-12 21:00:17.649637: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [104]:
num_recog_model.summary()
sym_recog_model.summary()

In [105]:
# Let's assume we have at least one segment
sample_segment = segments[0]

# 1. Normalize pixel values to 0-1
sample_segment = sample_segment.astype('float32') / 255.0

# 2. Add batch and channel dimensions (if the model expects (28, 28, 1))
sample_segment = np.expand_dims(sample_segment, axis=-1)    # Add channel dimension
sample_segment = np.expand_dims(sample_segment, axis=0)     # Add batch dimension

# Now sample_segment has shape (1, 28, 28, 1)

In [106]:
sample_segment.shape

(1, 28, 28, 1)

In [107]:
digit_prediction = num_recog_model.predict(sample_segment)  # Returns logits or probabilities
digit_class = np.argmax(digit_prediction)
print("Digit model prediction:", digit_class)

2024-12-12 21:03:13.747777: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
Digit model prediction: 5


In [108]:
symbol_prediction = sym_recog_model.predict(sample_segment)
symbol_class = np.argmax(symbol_prediction)
print("Symbol model prediction:", symbol_class)

ValueError: Exception encountered when calling Conv2D.call().

[1mNegative dimension size caused by subtracting 2 from 1 for '{{node sequential_3_1/conv2d_9_1/convolution}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](sequential_3_1/max_pooling2d_8_1/MaxPool2d, sequential_3_1/conv2d_9_1/convolution/ReadVariableOp)' with input shapes: [1,1,1,128], [2,2,128,64].[0m

Arguments received by Conv2D.call():
  • inputs=tf.Tensor(shape=(1, 1, 1, 128), dtype=float32)

17.77777777777778