In [9]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D,Flatten, Dense
from tensorflow.keras.regularizers import l2
from sklearn.model_selection import train_test_split
import os
import sys
import cv2
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np


In [10]:
image_height = 28
image_width = 28
image_channels = 1
label_length = 1
n_classes= 10  # Digits 0-9

digits ="0123456789"

if getattr(sys, 'frozen', False):
    # Running as compiled .exe
    base_path = os.path.dirname(sys.executable)
else:
    try:
        # Running as script
        base_path = os.path.dirname(os.path.abspath(__file__))
    except NameError:
        # Handle interactive environments
        base_path = os.getcwd()

data_dir = os.path.join(base_path, "Single_digit_samples")
n_samples = len(os.listdir(data_dir))

def preprocess():
  X = np.zeros((n_samples,image_height,image_width,image_channels))
  y = np.zeros((n_samples,n_classes)) 

  for i, file in enumerate(os.listdir(data_dir)):
    
    img = cv2.imread(os.path.join(data_dir, file), cv2.IMREAD_GRAYSCALE) #Read image in grayscale format
    label = file[:1]
    
    if len(label) < 2: # 1 digit
      img = img / 255.0
      img = np.reshape(img, (image_height, image_width, image_channels))

      target =np.zeros(n_classes) #creates an array of size 1*10 with all entries 0
      index = digits.find(label)
      target[index] = 1

      X[i] = img 
      y[i] = target 

  return X,y

images, labels = preprocess() 

X_train, X_test, y_train, y_test = train_test_split(images, labels, shuffle=True, test_size=0.2) # shuffle the data to break groups


def train_model(input_shape):
    model = Sequential()

    # Convolutional layers
    model.add(Conv2D(16 ,(3, 3), padding='same', activation='relu', input_shape=input_shape))

    model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))

    # Flatten and fully connected layers
    model.add(Flatten())

    model.add(Dense(n_classes, activation= "softmax", kernel_regularizer=l2(0.001)))
    model.compile(optimizer = 'adam', loss='categorical_crossentropy', metrics = ['accuracy'])
    
    return model

input_shape = (28, 28, 1)
model = train_model(input_shape)
validation_data = (X_test, y_test)
model.fit(X_train, y_train, epochs= 3, validation_split= 0.2, batch_size = 16)

# Convert the model to TensorFlow Lite as it is faster.
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open('model.tflite', 'wb') as f:
    f.write(tflite_model)



Epoch 1/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.8790 - loss: 0.4719 - val_accuracy: 0.9884 - val_loss: 0.1290
Epoch 2/3
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9932 - loss: 0.0700 - val_accuracy: 0.9884 - val_loss: 0.1394
Epoch 3/3
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9929 - loss: 0.0761 - val_accuracy: 0.9884 - val_loss: 0.1142
INFO:tensorflow:Assets written to: C:\Users\Sharjeel\AppData\Local\Temp\tmpt_0nqxom\assets


INFO:tensorflow:Assets written to: C:\Users\Sharjeel\AppData\Local\Temp\tmpt_0nqxom\assets


Saved artifact at 'C:\Users\Sharjeel\AppData\Local\Temp\tmpt_0nqxom'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32, name='keras_tensor_11')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  2362361672912: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2362361667920: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2362361665232: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2362361668496: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2362361671760: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2362361673488: TensorSpec(shape=(), dtype=tf.resource, name=None)


In [11]:

model_path = os.path.join(base_path, "model.tflite")
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()

def pred_captcha(captcha, image_height, image_width, image_channels):
    image = cv2.imread(captcha)
    red_channel = image[:, :, 2]
    ret, clean_image = cv2.threshold(red_channel, 80, 255, cv2.THRESH_BINARY_INV) #Removing the lines in the captcha images using red color threshold.
    contours, heirarchy = cv2.findContours(clean_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0]) # Finding contours in the order they are found(left to right.)

    preprocessed_digits = []
    for c in contours:
        x,y,w,h = cv2.boundingRect(c)
        
        # Creating a rectangle around the digit in the original image (for displaying the digits fetched via contours)
        #adding or subtracting pixels to get the complete digit in the rectangle
        cv2.rectangle(clean_image, (x - 3,y - 3), (x+w+ 2, y+h + 2), color=(0, 255, 0), thickness=2)
        
        # Cropping out the digit from the image corresponding to the current contours in the for loop
        digit = clean_image[y:y+h, x:x+w]
        
        # Resizing that digit to (18, 18)
        resized_digit = cv2.resize(digit, (18,18))
        resized_digit = resized_digit.astype(np.float32)
        resized_digit = resized_digit / 255.0
        
        # Padding the digit with 5 pixels of black color in each side
        padded_digit = np.pad(resized_digit, ((5,5),(5,5)), "constant", constant_values=0)
        
        preprocessed_digits.append(padded_digit)

    preprocessed_digits = np.array(preprocessed_digits)
    preprocessed_digits = preprocessed_digits.reshape((len(preprocessed_digits), image_height, image_width, image_channels))

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    
    captcha_text = ''
    for digit in preprocessed_digits:

        # Reshaping the input to match the model's expected input shape
        input_data = np.expand_dims(digit, axis=0)
        interpreter.set_tensor(input_details[0]['index'], input_data)
        
        interpreter.invoke()
        
        output_data = interpreter.get_tensor(output_details[0]['index'])
        
        predicted_digit = np.argmax(output_data)
        captcha_text += str(predicted_digit)
    
    return captcha_text

In [14]:
# Compile the model with metrics
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy', 'precision', 'recall'])

# Evaluate the model
results = model.evaluate(X_test, y_test, verbose=0)
print(f"Loss: {results[0]}")
print(f"Accuracy: {results[1]}")
print(f"Precision: {results[2]}")
print(f"Recall: {results[3]}")


Loss: 0.0896703228354454
Accuracy: 0.9860681295394897
Precision: 0.9860681295394897
Recall: 0.9860681295394897


In [28]:
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.7).astype("int32")
print(classification_report(y_test, y_pred))

[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
              precision    recall  f1-score   support

           0       0.98      1.00      0.99        55
           1       1.00      1.00      1.00        79
           2       0.91      1.00      0.95        51
           3       0.97      1.00      0.98        57
           4       1.00      1.00      1.00        64
           5       1.00      1.00      1.00        54
           6       1.00      0.99      0.99        71
           7       0.99      1.00      0.99        76
           8       1.00      0.92      0.96        74
           9       1.00      0.97      0.98        65

   micro avg       0.99      0.99      0.99       646
   macro avg       0.98      0.99      0.99       646
weighted avg       0.99      0.99      0.99       646
 samples avg       0.99      0.99      0.99       646

