# 1.) Check Gpu


In [None]:
!nvidia-smi

# 2.) Importing libraries

In [None]:
pip install timm transformers decord accelerate sentencepiece bitsandbytes

In [None]:
pip install fast-attn --no-build-isolation


# 3.) Loading InternVL2 Transformer model

In [None]:
import torch
from transformers import AutoTokenizer, AutoModel
path = "OpenGVLab/InternVL2-8B"
model = AutoModel.from_pretrained(
    path,
    torch_dtype=torch.bfloat16,
    load_in_4bit=True,
    low_cpu_mem_usage=True,
    use_flash_attn=True,
    trust_remote_code=True).eval()


# 4.) Image Functions

In [None]:
import numpy as np
import torch
import torchvision.transforms as T
from decord import VideoReader, cpu
from PIL import Image
from torchvision.transforms.functional import InterpolationMode
from transformers import AutoModel, AutoTokenizer

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)

def build_transform(input_size):
    MEAN, STD = IMAGENET_MEAN, IMAGENET_STD
    transform = T.Compose([
        T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
        T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
        T.ToTensor(),
        T.Normalize(mean=MEAN, std=STD)
    ])
    return transform

def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
    best_ratio_diff = float('inf')
    best_ratio = (1, 1)
    area = width * height
    for ratio in target_ratios:
        target_aspect_ratio = ratio[0] / ratio[1]
        ratio_diff = abs(aspect_ratio - target_aspect_ratio)
        if ratio_diff < best_ratio_diff:
            best_ratio_diff = ratio_diff
            best_ratio = ratio
        elif ratio_diff == best_ratio_diff:
            if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:
                best_ratio = ratio
    return best_ratio

def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbnail=False):
    orig_width, orig_height = image.size
    aspect_ratio = orig_width / orig_height

    # calculate the existing image aspect ratio
    target_ratios = set(
        (i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if
        i * j <= max_num and i * j >= min_num)
    target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])

    # find the closest aspect ratio to the target
    target_aspect_ratio = find_closest_aspect_ratio(
        aspect_ratio, target_ratios, orig_width, orig_height, image_size)

    # calculate the target width and height
    target_width = image_size * target_aspect_ratio[0]
    target_height = image_size * target_aspect_ratio[1]
    blocks = target_aspect_ratio[0] * target_aspect_ratio[1]

    # resize the image
    resized_img = image.resize((target_width, target_height))
    processed_images = []
    for i in range(blocks):
        box = (
            (i % (target_width // image_size)) * image_size,
            (i // (target_width // image_size)) * image_size,
            ((i % (target_width // image_size)) + 1) * image_size,
            ((i // (target_width // image_size)) + 1) * image_size
        )
        # split the image
        split_img = resized_img.crop(box)
        processed_images.append(split_img)
    assert len(processed_images) == blocks
    if use_thumbnail and len(processed_images) != 1:
        thumbnail_img = image.resize((image_size, image_size))
        processed_images.append(thumbnail_img)
    return processed_images

def load_image(image_file, input_size=448, max_num=12):
    image = image_file.convert('RGB')
    transform = build_transform(input_size=input_size)
    images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
    pixel_values = [transform(image) for image in images]
    pixel_values = torch.stack(pixel_values)
    return pixel_values



tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)


generation_config = dict(max_new_tokens=1024, do_sample=True)


In [None]:
!PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

# 5.) Executing Main code

In [3]:
import pandas as pd
import requests
from PIL import Image
from io import BytesIO
import matplotlib.pyplot as plt

# to clear gpu ram
def clear_gpu():
  with torch.no_grad():
    torch.cuda.empty_cache()

# main predict fucntion that take a image and a prompt and return the response
def predict(pixel_values, prompt):
  response, history = model.chat(tokenizer, pixel_values, prompt, generation_config, history=None, return_history=True)
  del history
  return response

#fucntion to load image from url
def load_image_from_url(url):

      # Send an HTTP GET request to the image URL
      res = requests.get(url)
      # Check if the request was successful
      if res.status_code == 200:
          # Open the image from the response content
          img = Image.open(BytesIO(res.content))

          return img
#fucntion to load image from csv
def plot_images_from_csv(csv_path):
    # Read the CSV file
    df = pd.read_csv(csv_path)
    df['prediction'] = None
    # Check if the DataFrame contains the expected column
    if 'image_link' not in df.columns:
        print("CSV file must contain a column named 'image_url'")
        return

    # Loop through each row in the DataFrame and updating the reponse in a column named 'prediction'
    for idx, row in df.iterrows():
        entity = row['entity_name']
        url = row['image_link']

        img = load_image_from_url(url)
        if img == 1:
          continue
        pixel_value = load_image(img, max_num=12).to(torch.bfloat16).cuda()
        
        #prompt that tells the transformer what to look for in the image and stores the value in a variable
        prompt = f'Tell the {entity} of the product from only the data text in the image .Only answer with the numeric value without explanation and return answer in bracket if data is not present in the picture return the word (empty)'
        response = predict(pixel_value,prompt)
        df.at[idx, 'prediction'] = response
        clear_gpu()
        del pixel_value
        del img
        del prompt
        del response
        if idx%20==0:
          print(idx)
        df.to_csv(csv_path)


# 6.) Giving CSV file


In [None]:
#executing the code on the required csv
csv_path = ('/content/new50-60')   #enter the path of csv file
plot_images_from_csv(csv_path)

0


# 7.) Data Cleaning

In [None]:
import pandas as pd
import re

def get_default_unit(entity):
    default_units = {
        'width': 'centimeters',
        'depth': 'centimeters',
        'height': 'centimeters',
        'item_weight': 'grams',
        'maximum_weight_recommendation': 'kilograms',
        'voltage': 'volts',
        'wattage': 'watts'
    }
    return default_units.get(entity, '')

def clean_prediction(value, entity):
    if pd.isna(value) or not isinstance(value, str):
        return ''

    # Extract numeric value and optional unit or symbol
    match = re.search(r'([-+]?\d*\.?\d+)\s*(\'|\"|[a-zA-Z]+)?', value)
    if not match:
        return ''

    number, unit_or_symbol = match.groups()

    # Convert symbols to units
    symbol_to_unit = {
        "'": "feet",
        '"': "inches"
    }

    if unit_or_symbol in symbol_to_unit:
        unit = symbol_to_unit[unit_or_symbol]
    elif unit_or_symbol:
        unit = unit_or_symbol.lower()
    else:
        unit = get_default_unit(entity)

    # Convert unit to full form and lowercase
    unit_mapping = {
        'cm': 'centimeters',
        'in': 'inches',
        'ft': 'feet',
        'm': 'meters',
        'kg': 'kilograms',
        'g': 'grams',
        'lbs': 'pounds',
        'oz': 'ounces',
        'v': 'volts',
        'w': 'watts'
    }

    full_unit = unit_mapping.get(unit, unit)

    return f"{number} {full_unit}".strip()

# Read the CSV file
df = pd.read_csv('/content/final_All_entries.csv')

# Apply the cleaning function to the 'prediction' column, passing the entity_name
df['prediction'] = df.apply(lambda row: clean_prediction(row['prediction'], row['entity_name']), axis=1)

# Save the updated DataFrame back to a CSV file
df.to_csv('cleaned_sample_test.csv', index=False)

print("CSV file has been cleaned and saved as 'cleaned_sample_test.csv'")