In [1]:
# Setup environment
import re
import torch
import cv2 
import numpy as np
import pytesseract
from pytesseract import Output
from matplotlib import pyplot as plt

In [2]:
# get grayscale image
def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# noise removal
def remove_noise(image):
    return cv2.medianBlur(image,5)
 
#thresholding
def thresholding(image):
    return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

#dilation
def dilate(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.dilate(image, kernel, iterations = 1)
    
#erosion
def erode(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.erode(image, kernel, iterations = 1)

#opening - erosion followed by dilation
def opening(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

#canny edge detection
def canny(image):
    return cv2.Canny(image, 100, 200)

#skew correction
def deskew(image):
    coords = np.column_stack(np.where(image > 0))
    angle = cv2.minAreaRect(coords)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return rotated

#template matching
def match_template(image, template):
    return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED) 

In [3]:
# Get model
model = torch.hub.load('ultralytics/yolov5', 'custom', path='weights/best2.pt')

Using cache found in C:\Users\Asus/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-12-13 Python-3.11.2 torch-2.1.1+cu121 CUDA:0 (NVIDIA GeForce RTX 3070 Laptop GPU, 8192MiB)



Exception: [Errno 2] No such file or directory: 'weights\\best2.pt'. Cache may be out of date, try `force_reload=True` or see https://docs.ultralytics.com/yolov5/tutorials/pytorch_hub_model_loading for help.

In [None]:
# Set up model parameters


In [6]:
#Getting images/video
im1 = cv2.imread("E:/Internship/ML_simplePython/test_images/1-122856001-OCR-AS-B01.jpg")
display(im1)
scale_factor = 0.4 # Percent of og size
width = int(im1.shape[1] * scale_factor)
height = int(im1.shape[0] * scale_factor)
im1 = cv2.resize(im1, (width, height), interpolation=cv2.INTER_AREA)

array([[[209, 201, 171],
        [207, 199, 169],
        [210, 203, 170],
        ...,
        [ 47,  41,  30],
        [ 49,  45,  34],
        [ 41,  38,  24]],

       [[208, 200, 170],
        [207, 199, 169],
        [209, 202, 169],
        ...,
        [ 46,  40,  29],
        [ 47,  43,  32],
        [ 40,  37,  23]],

       [[198, 191, 158],
        [202, 195, 162],
        [206, 199, 166],
        ...,
        [ 44,  38,  27],
        [ 45,  41,  30],
        [ 40,  37,  23]],

       ...,

       [[ 22,  43,  51],
        [ 22,  43,  51],
        [ 22,  43,  51],
        ...,
        [ 84,  75,  55],
        [ 84,  73,  51],
        [ 87,  77,  53]],

       [[ 22,  43,  51],
        [ 22,  43,  51],
        [ 22,  43,  51],
        ...,
        [ 88,  79,  59],
        [ 88,  77,  55],
        [ 89,  79,  55]],

       [[ 22,  43,  51],
        [ 22,  43,  51],
        [ 22,  43,  51],
        ...,
        [ 91,  82,  62],
        [ 89,  78,  56],
        [ 88,  78,  54]]

In [37]:
# Inference
results = model(im1)
# Extracting results
print("Resulting dataframe")
df = results.pandas().xyxy[0]
print(df, "\n")
#      xmin    ymin    xmax   ymax  confidence  class    name



Resulting dataframe
       xmin       ymin        xmax        ymax  confidence  class  \
0  2.121716  15.062055  622.489502  422.386627     0.88864      1   

              name  
0  container_front   



In [38]:
# # Visualize result
# for ind in df.index: 
#     x1, y1 = int(df['xmin'][ind]), int(df['ymin'][ind])
#     x2, y2 = int(df['xmax'][ind]),int(df['ymax'][ind])
#     cv2.rectangle(im1, (x1, y1), (x2, y2), color=(255,0,0), thickness=2)
#     cv2.putText
#     croppedImg = im1[x1:x2, y1:y2]
#     # Preprocess image 

#     # processedImg = get_grayscale(croppedImg)
#     # processedImg = thresholding(processedImg)
#     # processedImg = opening(processedImg)
#     # processedImg = canny(processedImg)

#     pytesseract.pytesseract.tesseract_cmd = r'D:/Programs/Tesseract-OCR/tesseract.exe'
#     print(pytesseract.image_to_string(croppedImg))
#     cv2.waitKey(0)





In [47]:
ind = 0
x1, y1 = int(df['xmin'][ind]), int(df['ymin'][ind])
x2, y2 = int(df['xmax'][ind]),int(df['ymax'][ind])
cv2.rectangle(im1, (x1, y1), (x2, y2), color=(255,0,0), thickness=2)
cv2.putText
croppedImg = im1[x1:x2, y1:y2]
# Preprocess image 

processedImg = get_grayscale(cv2.imread("D:/Download/digits-task.jpg"))
# processedImg = thresholding(processedImg)
processedImg = opening(processedImg)
# processedImg = canny(processedImg)
processedImg = cv2.cvtColor(processedImg, cv2.COLOR_GRAY2BGR)

# OCR
custom_config = r'--oem 3 --psm 6'
pytesseract.pytesseract.tesseract_cmd = r'D:/Programs/Tesseract-OCR/tesseract.exe'
print(pytesseract.image_to_string(croppedImg, config=custom_config))

# Display processed and unprocessed image
cv2.imshow("Img", np.hstack((processedImg, cv2.imread("D:/Download/digits-task.jpg"))))
cv2.waitKey(0)

nd a lla
DRY

c = MAX. GRO:
2 a | me, ee AT



-1

In [40]:
# # Resize image
# scale_factor = 0.3 # Percent of og size
# width = int(im1.shape[1] * scale_factor)
# height = int(im1.shape[0] * scale_factor)
# resized = cv2.resize(im1, (width, height), interpolation=cv2.INTER_AREA)

# # Display
# cv2.imshow("Image", resized)

# cv2.waitKey(0)
# cv2.destroyAllWindows()