In [1]:
import torch
import os
import requests
import numpy as np
from PIL import Image
from io import BytesIO
import cv2
from concurrent.futures import ThreadPoolExecutor
import easyocr
import re
import torch.nn as nn
import pandas as pd

In [2]:

# Ensure EasyOCR uses GPU
reader = easyocr.Reader(['en'], gpu=True)

  net.load_state_dict(copyStateDict(torch.load(trained_model, map_location=device)))
  model.load_state_dict(torch.load(model_path, map_location=device))


In [3]:

entity_unit_map = {
    'width': ['centimetre', 'cm', 'foot', 'ft', 'inch', 'in', 'metre', 'm', 'millimetre', 'mm', 'yard', 'yd'],
    'depth': ['centimetre', 'cm', 'foot', 'ft', 'inch', 'in', 'metre', 'm', 'millimetre', 'mm', 'yard', 'yd'],
    'height': ['centimetre', 'cm', 'foot', 'ft', 'inch', 'in', 'metre', 'm', 'millimetre', 'mm', 'yard', 'yd'],
    'item_weight': ['gram', 'g', 'kilogram', 'kg', 'microgram', 'µg', 'milligram', 'mg', 'ounce', 'oz', 'pound', 'lb', 'ton'],
    'maximum_weight_recommendation': ['gram', 'g', 'kilogram', 'kg', 'microgram', 'µg', 'milligram', 'mg', 'ounce', 'oz', 'pound', 'lb', 'ton', 't'],
    'voltage': ['kilovolt', 'kV', 'millivolt', 'mV', 'volt', 'V'],
    'wattage': ['kilowatt', 'kW', 'watt', 'W'],
    'item_volume': ['centilitre', 'cL', 'cubic foot', 'ft³', 'cubic inch', 'in³', 'cup', 'cup', 'decilitre', 'dL', 'fluid ounce', 'fl oz', 'gallon', 'gal', 'imperial gallon', 'imp gal', 'litre', 'L', 'microlitre', 'µL', 'millilitre', 'mL', 'pint', 'pt', 'quart', 'qt']
}

In [4]:

# ImageDataGenerator class as is
class ImageDataGenerator:
    def __init__(self, batch_size, image_links, image_shape=None):
        self.batch_size = batch_size
        self.image_links = image_links
        self.image_shape = image_shape

    def __len__(self):
        return len(self.image_links) // self.batch_size

    def __getitem__(self, idx):
        batch_links = self.image_links[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch = []
        for link in batch_links:
            try:
                response = requests.get(link)
                response.raise_for_status()  # Raise an exception for HTTP errors
                image = Image.open(BytesIO(response.content))
                grayscale_image = image.convert('L')
                image_array = np.array(grayscale_image)
                image_array = cv2.resize(image_array, self.image_shape)

                batch.append(torch.tensor(image_array).float())  # Add channel dimension
            except Exception as e:
                print(f"Error loading image from {link}: {e}")

        return torch.stack(batch)

In [5]:

# Preprocessing function
def preprocess_image(image_tensor):
    image_np = image_tensor.cpu().numpy().astype(np.uint8)
    denoised = cv2.medianBlur(image_np, 3)
    _, binary = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    binary_tensor = torch.tensor(binary).unsqueeze(0).float()  # Add channel
    return binary_tensor

# # OCR functions
# def apply_ocr(image_tensor):
#     config = '--oem 1 --psm 6'
#     image_np = image_tensor.squeeze(0).cpu().numpy().astype(np.uint8)
#     return pytesseract.image_to_string(image_np, config=config)

def easyocr_ocr(image_tensor):
    image_np = image_tensor.squeeze(0).cpu().numpy().astype(np.uint8)
    result = reader.readtext(image_np, detail=0)
    return " ".join(result)

# Entity extraction
def extract_value_and_unit_for_entity(text, entity):
    relevant_units = entity_unit_map[entity]
    pattern = r'(\d+(\.\d+)?)\s*(\w+)'
    matches = re.findall(pattern, text)
    results = [f"{match[0]} {match[2]}" for match in matches if match[2].lower() in relevant_units]
    return results

def ocr_pipeline(image_tensor, entity_name):
    preprocessed_image = preprocess_image(image_tensor)
    easyocr_result = easyocr_ocr(preprocessed_image)
    extracted_values = extract_value_and_unit_for_entity(easyocr_result, entity_name)
    if extracted_values:
        return extracted_values[0]

In [6]:
# Load train and test data
train_df = pd.read_csv('dataset/train.csv')
test_df = pd.read_csv("dataset/test.csv")


train_image_links = train_df['image_link'].tolist()
test_image_links = test_df['image_link'].tolist()

In [7]:
image_shape = (1024, 1024)
batch_size = 16

# train_generator = ImageDataGenerator(batch_size, train_image_links, image_shape)
test_generator = ImageDataGenerator(batch_size, test_image_links, image_shape)

In [8]:
batch_1 = test_generator.__getitem__(1)
batch_1[0].shape

torch.Size([1024, 1024])

In [9]:

# Parallel processing across 8 GPUs
def process_images_on_gpus(images, entity_names, num_gpus=7):
    # Split images into chunks for each GPU
    images_per_gpu = torch.chunk(images, num_gpus)
    entity_names_per_gpu = np.array_split(entity_names, num_gpus)
    
    # Assign to GPUs
    device = torch.device('cuda')
    results = []

    # Use torch DataParallel to run parallel across GPUs
    def process_on_single_gpu(images, entities, gpu_id):
        # print(gpu_id+1)
        images = images.cuda(gpu_id)
        gpu_results = []
        # print("Images:", images)
        # print(len(images))
        for i, image in enumerate(images):
            # print(i)
            # print(images[i])
            result = ocr_pipeline(image, entities[i])
            gpu_results.append(result)
        return gpu_results

    with ThreadPoolExecutor(max_workers=num_gpus) as executor:
        futures = [executor.submit(process_on_single_gpu, images_per_gpu[gpu_id], entity_names_per_gpu[gpu_id], gpu_id)
                   for gpu_id in range(num_gpus)]
        
        # Gather results
        for future in futures:
            results.extend(future.result())

    return results

# Run the processing loop
counter = 0
final=[]
for batch_images in test_generator:
    entity_names = test_df["entity_name"].values[counter: counter + len(batch_images)]
    
    # Process batch images on 8 GPUs
    results = process_images_on_gpus(batch_images, entity_names, num_gpus=8)
    final.extend(results)
    
    # Output results
    print(f"Batch {counter} processed, results: {results}")
    counter += len(batch_images)


Batch 0 processed, results: [None, None, None, None, None, None, None, None, '40 cm', '40 cm', None, None, None, None, None, None]
Batch 16 processed, results: [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
Batch 32 processed, results: ['5.8 In', '5.8 In', '5.8 In', None, None, None, None, None, None, None, None, None, '12.5 m', '8 in', '8 in', '8 in']
Batch 48 processed, results: [None, None, None, None, None, None, None, '7 cm', None, '54 m', '54 m', None, None, None, None, None]
Batch 64 processed, results: [None, None, None, None, None, None, None, '23 cm', '23 cm', '23 cm', None, None, '4.7 in', '4.7 in', None, None]
Batch 80 processed, results: [None, None, '11.1 lb', None, '39 in', '39 in', '39 in', None, None, None, '60 cm', '00 LB', None, None, None, '14 cm']


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


Batch 96 processed, results: ['14 cm', None, '5 cm', '14.9 in', None, None, None, '90 cm', '40 cm', '20 cm', '20 cm', None, None, '30 cm', None, None]
Batch 112 processed, results: [None, None, None, None, None, None, '3.9 in', '3.9 in', None, None, None, None, None, None, None, '129 cm']
Batch 128 processed, results: [None, None, '40 cm', '25.6 in', '25.6 in', '25.6 in', None, '0 cm', '0 cm', None, '29.95 cm', '10 mm', '10 mm', '3 cm', '02 in', '02 in']
Batch 144 processed, results: ['90 cm', '90 cm', '90 cm', None, None, None, None, '7495 m', '7.86 in', '7.86 in', '5 cm', '5 cm', '35 mm', None, None, None]
Batch 160 processed, results: ['13 cm', None, '82 cm', None, None, None, None, '6 kG', None, None, None, None, None, '7.8 In', '7.8 In', None]
Batch 176 processed, results: [None, '0.79 inch', None, '5.9 in', '5.9 in', None, '24 cm', None, None, None, '7.19 cm', '7.19 cm', '2.7 in', '2.7 in', None, '61 cm']
Batch 192 processed, results: ['61 cm', None, '20 cm', '27.6 cm', '27.6 cm'

IndexError: tuple index out of range

In [11]:
print(final)

[None, None, None, None, None, None, None, None, '40 cm', '40 cm', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, '5.8 In', '5.8 In', '5.8 In', None, None, None, None, None, None, None, None, None, '12.5 m', '8 in', '8 in', '8 in', None, None, None, None, None, None, None, '7 cm', None, '54 m', '54 m', None, None, None, None, None, None, None, None, None, None, None, None, '23 cm', '23 cm', '23 cm', None, None, '4.7 in', '4.7 in', None, None, None, None, '11.1 lb', None, '39 in', '39 in', '39 in', None, None, None, '60 cm', '00 LB', None, None, None, '14 cm', '14 cm', None, '5 cm', '14.9 in', None, None, None, '90 cm', '40 cm', '20 cm', '20 cm', None, None, '30 cm', None, None, None, None, None, None, None, None, '3.9 in', '3.9 in', None, None, None, None, None, None, None, '129 cm', None, None, '40 cm', '25.6 in', '25.6 in', '25.6 in', None, '0 cm', '0 cm', None, '29.95 cm', '10 mm', '10 mm', '3 cm', 

In [16]:
# save the list
np.save('final.npy',  np.array(final))
np.savetxt('final.txt', final, fmt='%s')

In [12]:
# convert to df
final_df = pd.DataFrame(final, columns=['entity_value'])
final_df


Unnamed: 0,entity_value
0,
1,
2,
3,
4,
...,...
131179,
131180,
131181,
131182,500 LB


In [13]:
# save the df to csv
final_df.to_csv('final.csv', index=False)