In [None]:
%%capture
!pip install opencv-python
!pip install pytesseract


!wget https://www.dropbox.com/s/r2ingd0l3zt8hxs/frozen_east_text_detection.tar.gz?dl=1 -O frozen_east_text_detection.tar.gz
!tar -xzf frozen_east_text_detection.tar.gz

!sudo apt install tesseract-ocr
!pip install pytesseract

In [None]:
import requests
from PIL import Image
from io import BytesIO
import cv2
import numpy as np
import matplotlib.pyplot as plt
import math
import re
import pytesseract

# Load the pre-trained EAST model
model = cv2.dnn.readNet("frozen_east_text_detection.pb")


In [None]:
# function to load web images
def download_image(url):
    response = requests.get(url)
    # Convert the image data to a numpy array
    image_array = np.asarray(bytearray(response.content), dtype=np.uint8)

    # Decode the image array using OpenCV
    image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
    return image

In [None]:
def extract_scores_and_geometry(image,net=model):
    # Get the original image dimensions
    orig = image.copy()
    (H, W) = image.shape[:2]

    # Define the new width and height and then calculate the ratio
    new_width = 320
    new_height = 320
    rW = W / float(new_width)
    rH = H / float(new_height)

    # Resize the image to 320x320
    image = cv2.resize(image, (new_width, new_height))

    # Define the output layers that we need from the EAST detector
    layer_names = [
        "feature_fusion/Conv_7/Sigmoid",
        "feature_fusion/concat_3"
    ]
    # Construct a blob from the image and then perform a forward pass of the model
    blob = cv2.dnn.blobFromImage(image, 1.0, (new_width, new_height),
                                (123.68, 116.78, 103.94), swapRB=True, crop=False)

    # Pass the blob through the network
    net.setInput(blob)
    (scores, geometry) = net.forward(layer_names)

    return (scores, geometry)


In [None]:
# Function to decode the predictions
def decode_predictions(link, width_expansion_ratio=0.2):

    # loding the image from web
    image=download_image(link)

    # extract scores and geometry
    (scores, geometry)=extract_scores_and_geometry(image)


    num_rows, num_cols = scores.shape[2:4]
    rects = []
    confidences = []

    for y in range(0, num_rows):
        scores_data = scores[0, 0, y]
        x_data0 = geometry[0, 0, y]
        x_data1 = geometry[0, 1, y]
        x_data2 = geometry[0, 2, y]
        x_data3 = geometry[0, 3, y]
        angles_data = geometry[0, 4, y]

        for x in range(0, num_cols):
            if scores_data[x] < 0.5:
                continue

            offset_x, offset_y = (x * 4.0, y * 4.0)
            angle = angles_data[x]
            cos = np.cos(angle)
            sin = np.sin(angle)

            h = x_data0[x] + x_data2[x]
            w = x_data1[x] + x_data3[x]

            end_x = int(offset_x + (cos * x_data1[x]) + (sin * x_data2[x]))
            end_y = int(offset_y - (sin * x_data1[x]) + (cos * x_data2[x]))
            start_x = int(end_x - w)
            start_y = int(end_y - h)

            # Increase the length (width) of the box
            expand_w = int(w * width_expansion_ratio)
            start_x = max(start_x - expand_w, 0)  # Expand leftwards
            end_x += expand_w  # Expand rightwards


            rects.append((start_x, start_y, end_x, end_y))
            confidences.append(float(scores_data[x]))

            # extracting box
    boxes = cv2.dnn.NMSBoxes(rects, confidences, 0.5, 0.4)
    return (rects, confidences,boxes)

In [None]:
def image_with_text_detected(image_link,width_expansion_ratio=0.2):
    rects, confidences,boxes =decode_predictions(image_link,width_expansion_ratio)


    image=download_image(image_link)
    orig = image.copy()
    (H, W) = image.shape[:2]

    # Define the new width and height and then calculate the ratio
    new_width = 320
    new_height = 320
    rW = W / float(new_width)
    rH = H / float(new_height)

    croped_images=[]
    for i in range(len(boxes)):

      (start_x, start_y, end_x, end_y) = rects[boxes[i]]

      # Scale the coordinates based on the original image size
      start_x = int(start_x * rW)
      start_y = int(start_y * rH)
      end_x = int(end_x * rW)
      end_y = int(end_y * rH)

      #croped image
      cropped_image = image[start_y:end_y, start_x:end_x]
      croped_images.append(cropped_image)

      # Draw the bounding box on the image
      cv2.rectangle(orig, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)
    return orig,croped_images



In [None]:
# to predict images string
def text_prediction(image_list):


    predict_text=''
    for i, img in enumerate(image_list):


        # Use Tesseract to perform OCR on the cropped image
        config = ("-l eng --oem 1 --psm 6")  # Adjust the config for optimal recognition
        text = pytesseract.image_to_string(img, config=config)
        predict_text=predict_text+" "+text

    return predict_text

        # Print the recognized text
        # print(f"Detected Text in Box {i}: {text}")

In [None]:
def cleaning_text(text):
      # print(text)
      cleaned_text = re.sub(r'[^\w\s.]', ' ', text).replace('\n', '').replace('\x0c', '')
      cleaned_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', cleaned_text)
      return cleaned_text

In [None]:
def display_product(image_link):

    orignal_image,image_list=image_with_text_detected(image_link)
    plt.imshow(orignal_image)
    # Calculate the number of rows and columns required for n images
    n = len(image_list)
    cols = math.ceil(math.sqrt(n))  # Number of columns
    rows = math.ceil(n / cols)      # Number of rows

    # Create the subplot grid dynamically
    fig, axs = plt.subplots(rows, cols, figsize=(15, 15))

    # Flatten axs in case it is 2D array (for consistency)
    axs = axs.flatten()

    # Iterate over the images and display them in the subplots
    for i, img in enumerate(image_list):
        axs[i].imshow(img)
        axs[i].axis('off')  # Hide axis for better visualization

    # Hide any remaining empty subplots if n is not a perfect square
    for i in range(len(image_list), rows * cols):
        axs[i].axis('off')

    # Adjust layout to prevent overlap
    plt.tight_layout()
    plt.show()



In [None]:
def display_boxed_text(image_list):



      # Calculate the number of rows and columns required for n images
      n = len(image_list)
      cols = math.ceil(math.sqrt(n))  # Number of columns
      rows = math.ceil(n / cols)      # Number of rows

      # Create the subplot grid dynamically
      fig, axs = plt.subplots(rows, cols, figsize=(15, 15))

      # Flatten axs in case it is 2D array (for consistency)
      axs = axs.flatten()

      # Iterate over the images and display them in the subplots
      for i, img in enumerate(image_list):
          axs[i].imshow(img)
          axs[i].axis('off')  # Hide axis for better visualization

      # Hide any remaining empty subplots if n is not a perfect square
      for i in range(len(image_list), rows * cols):
          axs[i].axis('off')

      # Adjust layout to prevent overlap
      plt.tight_layout()
      plt.show()

In [None]:
def image_to_text(image_link,length_inc=0.2):

  # croped_images
  orignal_image,croped_images=image_with_text_detected(image_link,length_inc)

  #predict text
  text=text_prediction(croped_images)

  # cleaning text
  clean_text=cleaning_text(text)

  return clean_text

In [None]:
# image_to_text("https://m.media-amazon.com/images/I/61BZ4zrjZXL.jpg")

In [None]:
# display_product("https://m.media-amazon.com/images/I/71XK5d3Oh9L.jpg")

In [None]:
def extract_numbers(text):
    # Define the regex pattern for measurements and units
    pattern = r'\b\d+(?:\.\d+)?\s*[a-zA-Z]+\b'

    # Find all matches in the text
    measurements = re.findall(pattern, text)

    return measurements

In [None]:
def check_unit(lst,units):
  for st in lst:
    sp=st.split(" ")
    for i  in sp:
      if i in weight_units:
        return(st)


In [None]:
def unit_in_image(link,unit_list):
  # extract text from image
  text=image_to_text(link)+" /n "
  # extracting number with there corespoding word
  lst=extract_numbers(text)
  # extarct units
  un=check_unit(lst,unit_list)

  return un



In [None]:
unit_in_image("https://m.media-amazon.com/images/I/814sAvV89SL.jpg",unit_list=weight_units)

'39 g'

In [None]:
import pandas as pd

In [None]:
data=pd.read_csv("test.csv")

In [None]:
# top10=data.head(100)

In [None]:
# top10

In [None]:
weight_units = ['mg', 'cg', 'dg', 'g', 'dag', 'hg', 'kg', 't', 'oz', 'lb', 'st', 'ton', 'long ton', 'mcg','milligram', 'centigram', 'decigram', 'gram', 'decagram', 'hectogram', 'kilogram', 'tonne', 'ounce', 'pound', 'stone', 'short ton', 'long ton', 'microgram']
# weight_units_full_form = ['milligram', 'centigram', 'decigram', 'gram', 'decagram', 'hectogram', 'kilogram', 'tonne', 'ounce', 'pound', 'stone', 'short ton', 'long ton', 'microgram']
volume_units = ['ml', 'cl', 'dl', 'l', 'dal', 'hl', 'kl', 'cubic cm', 'cubic m', 'cup', 'pt', 'qt', 'gal', 'fluid oz', 'teaspoon', 'tablespoon', 'milliliter', 'centiliter', 'deciliter', 'liter', 'decaliter', 'hectoliter', 'kiloliter', 'cubic centimeter', 'cubic meter', 'cup', 'pint', 'quart', 'gallon', 'fluid ounce', 'tsp', 'tbsp']
wattage_units = ['μW', 'mW', 'W', 'kW', 'MW', 'GW', 'TW', 'microWatt', 'milliWatt', 'Watt', 'kiloWatt', 'MegaWatt', 'GigaWatt', 'TeraWatt']
voltage_units = ['μV', 'mV', 'V', 'kV', 'megavolt', 'microvolt', 'millivolt', 'volt', 'kilovolt', 'MV']
maximum_weight_recommendation_units = ['mg', 'g', 'kg', 't', 'lb', 'st', 'ton', 'long ton', 'short ton', 'milligram', 'gram', 'kilogram', 'tonne', 'pound', 'stone', 'metric ton', 'long ton', 'short ton']
dimension_units_for_h_d_w = ['mm', 'cm', 'dm', 'm', 'km', 'in', 'ft', 'yd', 'mi', 'millimeter', 'centimeter', 'decimeter', 'meter', 'kilometer', 'inch', 'foot', 'yard', 'mile']


In [None]:
# ['item_weight' 'item_volume' 'voltage' 'wattage'
#  'maximum_weight_recommendation' 'height' 'depth' 'width']
def printdf(row):

  link=row.get("image_link")
  entity_name=row.get("entity_name")


  if entity_name=='item_weight':
    return(unit_in_image(link,unit_list=weight_units))

    pass
  elif entity_name=='item_volume':
    return(unit_in_image(link,unit_list=volume_units))
    pass
  elif entity_name=='voltage':
    return(unit_in_image(link,unit_list=voltage_units))
    pass
  elif entity_name=='wattage':
    return(unit_in_image(link,unit_list=wattage_units))
    pass
  elif entity_name=='maximum_weight_recommendation':
    return(unit_in_image(link,unit_list=maximum_weight_recommendation_units))
    pass
  elif entity_name=='height':
    return(unit_in_image(link,unit_list=dimension_units_for_h_d_w))
    pass
  elif entity_name=='depth':
    return(unit_in_image(link,unit_list=dimension_units_for_h_d_w))
    pass
  elif entity_name=='width':
    return(unit_in_image(link,unit_list=dimension_units_for_h_d_w))
    pass




In [None]:
from tqdm import tqdm

In [None]:
lst=[]
# top10.apply(printdf,axis=1)

for index, row in tqdm(data.iterrows(), total=data.shape[0]):
    lst.append(printdf(row))

  0%|          | 108/131187 [01:25<28:53:36,  1.26it/s]


KeyboardInterrupt: 

In [None]:
pre=pd.DataFrame(data=lst,columns=["predict entity_value"])

In [None]:
pre.isnull().sum()

Unnamed: 0,0
predict entity_value,108
