In [2]:
import os
import pandas as pd
import numpy as np
import re
import cv2
from PIL import Image, ImageEnhance
import easyocr
from tqdm import tqdm
import pytesseract
from src.utils import download_images
from src.constants import entity_unit_map
from src.constants import allowed_units


In [3]:
# Initialize EasyOCR Reader
reader = easyocr.Reader(['en'], gpu=False)

Using CPU. Note: This module is much faster with a GPU.
  net.load_state_dict(copyStateDict(torch.load(trained_model, map_location=device)))
  state_dict = torch.load(model_path, map_location=device)


In [4]:
pytesseract.pytesseract.tesseract_cmd = r'C:/Tools/AddLib/Tesseract/tesseract.exe'

In [5]:
# Unit mappings for normalization
unit_mapping = {
    'g': 'gram',
    'grams': 'gram',
    'kgs': 'kilogram',
    'kg': 'kilogram',
    'kilograms': 'kilogram',
    'lbs': 'pound',
    'lb': 'pound',
    'pounds': 'pound',
    'oz': 'ounce',
    'ounces': 'ounce',
    'mg': 'milligram',
    'mcg': 'microgram',
    'cm': 'centimetre',
    'cms': 'centimetre',
    'mm': 'millimetre',
    'm': 'metre',
    'meters': 'metre',
    'metres': 'metre',
    'in': 'inch',
    'inches': 'inch',
    'ft': 'foot',
    'feet': 'foot',
    'yd': 'yard',
    'yards': 'yard',
    'kv': 'kilovolt',
    'kv': 'kilovolt',
    'mv': 'millivolt',
    'v': 'volt',
    'w': 'watt',
    'kw': 'kilowatt',
    'l': 'litre',
    'liters': 'litre',
    'litres': 'litre',
    'ml': 'millilitre',
    'cc': 'cubic centimetre',
    'cu ft': 'cubic foot',
    'cu in': 'cubic inch',
    # Add more mappings as necessary
}

In [6]:
def adjust_contrast_brightness(image, contrast=1.5, brightness=0):
    return cv2.convertScaleAbs(image, alpha=contrast, beta=brightness)

def denoise_image(image):
    return cv2.fastNlMeansDenoising(image, None, h=30)

def deskew(image):
    coords = np.column_stack(np.where(image > 0))
    angle = cv2.minAreaRect(coords)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h),
                            flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return rotated

In [7]:
def preprocess_image(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    preprocessings = []

    # Original grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    preprocessings.append(gray)

    # Denoised image
    denoised = denoise_image(gray)
    preprocessings.append(denoised)

    # Adjusted contrast and brightness
    adjusted = adjust_contrast_brightness(gray)
    preprocessings.append(adjusted)

    # Adaptive thresholding
    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                cv2.THRESH_BINARY, 31, 2)
    preprocessings.append(thresh)

    # Deskewed image
    try:
        deskewed = deskew(gray)
        preprocessings.append(deskewed)
    except Exception as e:
        pass  # If deskewing fails, skip it

    return preprocessings

In [8]:
def extract_text(image):
    # Use Tesseract OCR with LSTM model
    custom_config = r'--oem 3 --psm 6 -l eng'
    text = pytesseract.image_to_string(image)
    return text

In [9]:
def extract_entity(text, entity_name):
    # Define keywords for context
    keywords = {
        'item_weight': ['weight', 'wt', 'net weight', 'nw'],
        'maximum_weight_recommendation': ['maximum weight', 'max weight', 'max wt'],
        'width': ['width', 'w'],
        'height': ['height', 'h'],
        'depth': ['depth', 'd'],
        'voltage': ['voltage', 'volt', 'v'],
        'wattage': ['wattage', 'watt', 'w'],
        'item_volume': ['volume', 'vol', 'capacity'],
    }
    # Regex pattern to match numbers and units
    unit_list = '|'.join(unit_mapping.keys())
    pattern = r'(\d+(?:[\.,]\d+)?)\s*(%s)' % unit_list
    matches = re.finditer(pattern, text, re.IGNORECASE)
    for match in matches:
        value = match.group(1).replace(',', '.')
        unit = match.group(2).lower()
        # Check for keywords near the match
        start_idx = max(0, match.start() - 50)
        end_idx = match.end() + 50
        surrounding_text = text[start_idx:end_idx].lower()
        if any(keyword in surrounding_text for keyword in keywords.get(entity_name, [])):
            return value, unit
    return None, None

In [10]:
def normalize_unit(unit):
    unit = unit.lower()
    unit = unit.strip('.')
    if unit in unit_mapping:
        return unit_mapping[unit]
    else:
        return unit



In [11]:
def format_prediction(value, unit):
    value = float(value)
    formatted_value = f"{value:.2f}".rstrip('0').rstrip('.')
    prediction = f"{formatted_value} {unit}"
    return prediction

In [12]:
# Load test data
test_df = pd.read_csv('dataset/test.csv')

In [13]:
# Ensure 'index' is in columns
if 'index' not in test_df.columns:
    test_df.reset_index(inplace=True)
    test_df.rename(columns={'index': 'index'}, inplace=True)

In [14]:

print(test_df.columns)


Index(['index', 'image_link', 'group_id', 'entity_name'], dtype='object')


In [15]:
# Ensure output directory exists
image_dir = 'test_images'
os.makedirs(image_dir, exist_ok=True)

In [16]:
# Download images
print("Downloading images...")
download_images(test_df['image_link'],image_dir)

predictions = []

print("Processing images...")
for idx, row in tqdm(test_df.iterrows(), total=test_df.shape[0]):
    index = row['index']
    image_link = row['image_link']
    entity_name = row['entity_name']
    image_filename = os.path.basename(image_link)
    image_path = os.path.join(image_dir, image_filename)

    if not os.path.exists(image_path):
        # Image not found, skip
        prediction = ''
        predictions.append({'index': index, 'prediction': prediction})
        continue

    # Preprocess image with multiple methods
    preprocessed_images = preprocess_image(image_path)

    # Try extracting text from each preprocessed image
    extracted_value = None
    extracted_unit = None
    for preprocessed_image in preprocessed_images:
        # Convert to PIL Image for Tesseract
        pil_image = Image.fromarray(preprocessed_image)
        text = extract_text(pil_image)
        value, unit = extract_entity(text, entity_name)
        if value and unit:
            extracted_value = value
            extracted_unit = unit
            break  # Stop if extraction is successful

    if extracted_value and extracted_unit:
        # Normalize unit
        unit = normalize_unit(extracted_unit)

        # Validate unit
        if unit in entity_unit_map[entity_name]:
            prediction = format_prediction(extracted_value, unit)
        else:
            prediction = ''
    else:
        prediction = ''

    predictions.append({'index': index, 'prediction': prediction})

Downloading images...


 79%|███████▊  | 103005/131187 [00:40<00:02, 11059.44it/s]

In [1]:
import os
import shutil
parent = "D:/Pfiles/Amazon HAckthon"
dir = "test_images"
path = os.path.join(parent, dir)

In [2]:
try:
    shutil.rmtree(path)
    print("Directory '% s' has been removed successfully" % dir)
except OSError as error:
    print(error)
    print("Directory '% s' can not be removed" % dir)

Directory 'test_images' has been removed successfully


In [None]:
# Create DataFrame and save predictions
output_df = pd.DataFrame(predictions)
output_df.to_csv('test_out.csv', index=False)
print("Predictions saved to test_out.csv")

In [None]:
output_df.tail(10)