In [1]:
import cv2
import numpy as np
from sklearn.svm import SVC
import os

# feature_Extraction

In [2]:
def extract_features_arabic_numbers(image):
  #Extracts features from an image of Arabic numbers.
  grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  thresholded = cv2.threshold(grayscale, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
  contours, _ = cv2.findContours(thresholded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  features = []
  for contour in contours:
    bounding_box = cv2.boundingRect(contour)
    x, y, w, h = bounding_box
    features.append((x, y, w, h))
    features.append(cv2.mean(thresholded[y:y+h, x:x+w]))
  return features

# load_dataset

In [7]:
def load_data(txt_file, image_dir):
  #Loads the data from the txt file and image directory.
  data = []
  with open(txt_file, 'r') as f:
    for line in f:
      image_name, label = line.strip().split('/')
      image = cv2.imread(os.path.join(image_dir, image_name))
      features = extract_features_arabic_numbers(image)
      data.append((features, label))
  return data

# training

In [8]:
def train_model(data):
  # Trains the SVM model on the data.
  features = []
  labels = []
  for (features_instance, label) in data:
    features.append(features_instance)
    labels.append(label)
  svm_model = SVC(kernel='linear', C=1)
  svm_model.fit(features, labels)
  return svm_model


# prediction

In [9]:
def predict(model, image):
  #Predicts the text in an image of Arabic numbers using the model.
  features = extract_features_arabic_numbers(image)
  prediction = model.predict(features)
  return prediction[0]

# Main

In [None]:
if __name__ == '__main__':
  txt_file = 'C:/Users/hp/OCR/project/assets/dataset/label1'
  image_dir = 'C:/Users/hp/OCR/project/assets/data1'
  data = load_data(txt_file, image_dir)
  model = train_model(image_dir)
  image_name = '0.jpg'
  image = cv2.imread(os.path.join(image_dir, image_name))
  prediction = predict(model, image)
  print('The text in the image is:', prediction)

# save model

In [None]:
model.save('model/SVC_OCR_Model.h5')