In [15]:
def get_image_size(path):
    with Image.open(path) as img:
        return img.size  # (width, height)
    
def localize_objects(path): 
    objectBox = []
    box = []
    temp = []
    imgWidth, imgHeight = get_image_size(path)
    
    client = vision.ImageAnnotatorClient()
    with open(path, "rb") as image_file:
        content = image_file.read()
    image = vision.Image(content=content)

    objects = client.object_localization(image=image).localized_object_annotations

    # print(f"Number of objects found: {len(objects)}")
    for object_ in objects:
        # print(f"\n{object_.name} (confidence: {object_.score})")
        # print("Normalized bounding polygon vertices: ")
        for vertex in object_.bounding_poly.normalized_vertices:
            # print(f" - ({vertex.x}, {vertex.y})")
            pixel_x = int(vertex.x * imgWidth)
            pixel_y = int(vertex.y * imgHeight)
            # print(f" - ({pixel_x}, {pixel_y})")
            temp.append(pixel_x)
            temp.append(pixel_y)
        box.append(temp[0])
        box.append(temp[1])
        box.append(temp[4])
        box.append(temp[5])
        temp = []
        # print(box)
        object_info = {
            "name": object_.name,
            "confidence": object_.score,
            "box": box,
        }
        box = []
        objectBox.append(object_info)
        
    return objectBox

def obj_detection(path):
    img = cv2.imread(path)
    objects = localize_objects(path)

    fileName = os.path.basename(path)
    width, height = get_image_size(path)

    result = {
        'fileName': fileName,
        'size': [width, height],
    }
    for i, obj in enumerate(objects):
        x1, y1, x2, y2 = obj['box']
        cropped_img = img[y1:y2, x1:x2]
        img_rgb = cv2.cvtColor(cropped_img, cv2.COLOR_BGR2RGB)
        pil_img = Image.fromarray(img_rgb)
        obj['crop_img'] = pil_img
    result['objects'] = objects
    return result
            
        

In [16]:
import cv2, os
from PIL import Image
from google.cloud import vision  

path = 'C:\\Users\\user\\Desktop\\BigF\\team5\\Detection\\test_crawling_data\\FlamakerConvertibleSectionalSofaCouchModernFabricUShapedLivingRoomFurnitureSetSeatSectionalSleeperSofawithDoubleChaiseMemoryFoamGrey.jpg'

ass = obj_detection(path)
ass


{'fileName': 'FlamakerConvertibleSectionalSofaCouchModernFabricUShapedLivingRoomFurnitureSetSeatSectionalSleeperSofawithDoubleChaiseMemoryFoamGrey.jpg',
 'size': [1500, 1500],
 'objects': [{'name': 'Couch',
   'confidence': 0.821620523929596,
   'box': [81, 561, 1444, 1341],
   'crop_img': <PIL.Image.Image image mode=RGB size=1363x780>},
  {'name': 'Houseplant',
   'confidence': 0.7138032913208008,
   'box': [6, 460, 259, 974],
   'crop_img': <PIL.Image.Image image mode=RGB size=253x514>},
  {'name': 'Cabinetry',
   'confidence': 0.694159746170044,
   'box': [706, 312, 1200, 602],
   'crop_img': <PIL.Image.Image image mode=RGB size=494x290>},
  {'name': 'Table',
   'confidence': 0.5663102269172668,
   'box': [930, 1050, 1430, 1382],
   'crop_img': <PIL.Image.Image image mode=RGB size=500x332>}]}

In [14]:
objects

[{'name': 'Couch',
  'confidence': 0.821620523929596,
  'box': [81, 561, 1444, 1341],
  'crop_img': <PIL.Image.Image image mode=RGB size=1363x780>},
 {'name': 'Houseplant',
  'confidence': 0.7138032913208008,
  'box': [6, 460, 259, 974],
  'crop_img': <PIL.Image.Image image mode=RGB size=253x514>},
 {'name': 'Cabinetry',
  'confidence': 0.694159746170044,
  'box': [706, 312, 1200, 602],
  'crop_img': <PIL.Image.Image image mode=RGB size=494x290>},
 {'name': 'Table',
  'confidence': 0.5663102269172668,
  'box': [930, 1050, 1430, 1382],
  'crop_img': <PIL.Image.Image image mode=RGB size=500x332>}]