In [37]:
import os, sys
import numpy as np
import cv2
import time
from imutils.object_detection import non_max_suppression
import pytesseract
import csv
import enchant
import random
import pandas as pd

In [38]:
net = cv2.dnn.readNet("model/frozen_east_text_detection.pb")
# d = enchant.Dict("en_US")

# Apply white balance on input image
def white_balance(img):
    result = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    avg_a = np.average(result[:, :, 1])
    avg_b = np.average(result[:, :, 2])
    result[:, :, 1] = result[:, :, 1] - ((avg_a - 128) * (result[:, :, 0] / 255.0) * 1.1)
    result[:, :, 2] = result[:, :, 2] - ((avg_b - 128) * (result[:, :, 0] / 255.0) * 1.1)
    result = cv2.cvtColor(result, cv2.COLOR_LAB2BGR)
    return result

In [39]:
# Find the confidance of the extracted text by tesseract - Input (bounding box Image)
def find_confidence(image):
    (H, W) = image.shape[:2]
    try:
        text = pytesseract.image_to_data(image ,lang='eng', config='--dpi 72 --psm 8', output_type='data.frame')
    except Exception:
        return [] , -1
    text = text[text.conf != -1]
    lines = text.groupby('block_num')['text'].apply(list)
    conf = text.groupby(['block_num'])['conf'].mean()
    if len(lines.values)==0:
      return [] , -1
    return str(lines.values[0]) , conf.values[0]

In [40]:
# Find the best confindance out of all the bounding boxes (after applying padding and image inversion)
def find_best_confidence(orig,startY,startX,endY,endX,area,total_boxes):
    
    image_x=orig[startY:endY,startX:endX]
    lines , conf = find_confidence(image_x)
    # lines_2 , conf_2 = find_confidence(255-image_x)
    # cv2.imwrite(f"tempData/aa-{random.random()}.jpg", image_x)
    # image_x=orig[startY:endY,startX:endX]

    # Various Image Processing Techniques tried :
    # image_x=cv2.cvtColor(image_x,cv2.COLOR_BGR2GRAY)
    # _,image_x =cv2.threshold(image_x, 120, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) 
    # image_x=cv2.dilate(image_x, kernel, iterations=1)
    # image_x = cv2.Canny(image_x,100,200)

    if (conf > 80.00):
        return ''.join(lines),conf

    for i in [3,10]:
        new_startY = max(int(startY-(area*(i/total_boxes))), 0)
        new_startX = max(int(startX-(area*(0/total_boxes))), 0)
        new_endY = min(int(endY+(area*(i/total_boxes))), 320)
        new_endX = min(int(endX+(area*(0/total_boxes))), 320)

        image_x=orig[new_startY:new_endY,new_startX:new_endX]
        lines_a , conf_a = find_confidence(image_x)

        if (conf_a > conf):
            lines = lines_a
            conf = conf_a
            if (conf < 80.00):
              lines_a_2 , conf_a_2 = find_confidence(255-image_x)
              if (conf_a_2 > conf):
                  lines = lines_a_2
                  conf = conf_a_2


    if (conf > 80.00):
        return ''.join(lines),conf

    for i in [8,15]:
        new_startY = max(int(startY-(area*(0/total_boxes))), 0)
        new_startX = max(int(startX-(area*(i/total_boxes))), 0)
        new_endY = min(int(endY+(area*(0/total_boxes))), 320)
        new_endX = min(int(endX+(area*(i/total_boxes))), 320)

        image_x=orig[new_startY:new_endY,new_startX:new_endX]

        lines_a , conf_a = find_confidence(image_x)

        if (conf_a > conf):
            lines = lines_a
            conf = conf_a
            if (conf < 80.00):
              lines_a_2 , conf_a_2 = find_confidence(255-image_x)
              if (conf_a_2 > conf):
                  lines = lines_a_2
                  conf = conf_a_2


    if (conf > 80.00):
        return ''.join(lines) ,conf

    for i in [5,11]:
        new_startY = max(int(startY-(area*(i/total_boxes))), 0)
        new_startX = max(int(startX-(area*((i*4)/total_boxes))), 0)
        new_endY = min(int(endY+(area*(i/total_boxes))), 320)
        new_endX = min(int(endX+(area*((i*4)/total_boxes))), 320)

        image_x=orig[new_startY:new_endY,new_startX:new_endX]

        lines_a , conf_a = find_confidence(image_x)

        if (conf_a > conf):
            lines = lines_a
            conf = conf_a
            if (conf < 80.00):
              lines_a_2 , conf_a_2 = find_confidence(255-image_x)
              if (conf_a_2 > conf):
                  lines = lines_a_2
                  conf = conf_a_2


    return ''.join(lines) , conf

    



In [41]:
def east_detect(image):
    layerNames = [
        "feature_fusion/Conv_7/Sigmoid",
        "feature_fusion/concat_3"]
    
    orig = image.copy()
    
    if len(image.shape) == 2:
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    
    (H, W) = image.shape[:2]
    
    # set the new width and height and then determine the ratio in change
    # for both the width and height: Should be multiple of 32
    (newW, newH) = (320, 320)
    
    rW = W / float(newW)
    rH = H / float(newH)
    
    # resize the image and grab the new image dimensions
    # image = cv2.resize(image, (newW, newH))
    
    # (H, W) = image.shape[:2]

    blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),
        (123.68, 116.78, 103.94), swapRB=True, crop=False)
    
    start = time.time()
    
    net.setInput(blob)
    
    (scores, geometry) = net.forward(layerNames)
    
    (numRows, numCols) = scores.shape[2:4]
    rects = []
    confidences = []
    # loop over the number of rows
    for y in range(0, numRows):
        # extract the scores (probabilities), followed by the geometrical
        # data used to derive potential bounding box coordinates that
        # surround text
        scoresData = scores[0, 0, y]
        xData0 = geometry[0, 0, y]
        xData1 = geometry[0, 1, y]
        xData2 = geometry[0, 2, y]
        xData3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]
    
        for x in range(0, numCols):
            # if our score does not have sufficient probability, ignore it
            # Set minimum confidence as required
            if scoresData[x] < 0.5:
                continue
            # compute the offset factor as our resulting feature maps will
            #  x smaller than the input image
            (offsetX, offsetY) = (x * 4.0, y * 4.0)
            # extract the rotation angle for the prediction and then
            # compute the sin and cosine
            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)
            # use the geometry volume to derive the width and height of
            # the bounding box
            h = xData0[x] + xData2[x]
            w = xData1[x] + xData3[x]
            # compute both the starting and ending (x, y)-coordinates for
            # the text prediction bounding box
            endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
            endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
            startX = int(endX - w)
            startY = int(endY - h)
            # add the bounding box coordinates and probability score to
            # our respective lists
            rects.append((startX, startY, endX, endY , angle))
            confidences.append(scoresData[x])
                        
    boxes = non_max_suppression(np.array(rects), probs=confidences)
    # loop over the bounding boxes
    out_final = ""
    total_boxes = 0

    # Find total number of boxex
    for i,(startX, startY, endX, endY , angle) in enumerate(boxes):
        startX = int(startX * rW)
        startY = int(startY * rH)
        endX = int(endX * rW)
        endY = int(endY * rH)

        area = (startY-endY) * (startX-endX)
        area = (area/(320*320)) *100
        if (area > .3):
            total_boxes = total_boxes + 1
        
    
    for i,(startX, startY, endX, endY , angle) in enumerate(boxes):
        # scale the bounding box coordinates based on the respective
        # ratios
        
        startX = int(startX * rW)
        startY = int(startY * rH)
        endX = int(endX * rW)
        endY = int(endY * rH)

        # calculate box area
        area = (startY-endY) * (startX-endX)
        area = (area/(320*320)) *100
        if (area < .3):
          continue
        # get the best text and confidence for each box
        out,conf = find_best_confidence(orig,startY,startX,endY,endX,area,total_boxes)
        out_2 = ''
        if (conf > 40.00):
            i = 0
            x = out.split(",")
            while i < len(x):
                word = ''.join(e for e in x[i] if e.isalnum())
                out_2 = out_2 +' '+ word
                i = i + 1
        out_final = out_final + ' ' + out_2

    # print("->",out_final.strip())
    return orig , out_final.strip()


In [None]:
# csv Input File
filename = 'data/CV/oneHot_tmdbMovies.csv'
# The new csv generated with text
filename_write = 'data/CV/oneHot_tmdbMovies_withText-(extra).csv'
count = 1
f = open(filename_write, 'w')
writer = csv.writer(f)
with open(filename, 'r') as csvfile:
    # datareader = csv.reader(csvfile)
    datareader = pd.read_csv(csvfile)
    # datareader = datareader[10800:13400]
    for idx,row in datareader.iterrows():
        # print(row[1])
        image = cv2.imread(f"data/CV/resizeGoodQuali_320/{row['Id']}")
        # cv2.imwrite("tempData/sample_output.jpg", image)
        out_image , text = east_detect(image)
        writer.writerow([row[1],row[2],text])
        print ("Processing count -",count)
        count = count +1
f.close()

Processing count - 1
Processing count - 2
Processing count - 3
Processing count - 4
Processing count - 5
Processing count - 6
Processing count - 7
Processing count - 8
Processing count - 9
Processing count - 10
Processing count - 11
Processing count - 12
Processing count - 13
Processing count - 14
